Spaces:
Paused
Paused
update
Browse files
api.py
CHANGED
|
@@ -294,19 +294,24 @@ def transcribe_audio_gradio(audio, language="auto"):
|
|
| 294 |
return "请上传音频文件"
|
| 295 |
|
| 296 |
# 读取音频数据
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
# 转换为单声道
|
| 300 |
-
if
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
# 转换为numpy array并归一化
|
| 304 |
-
input_wav = waveform.numpy().astype(np.float32)
|
| 305 |
|
| 306 |
# 重采样到16kHz
|
| 307 |
-
if
|
| 308 |
-
resampler = torchaudio.transforms.Resample(
|
| 309 |
-
|
|
|
|
| 310 |
|
| 311 |
# 模型推理
|
| 312 |
text = model.generate(
|
|
|
|
| 294 |
return "请上传音频文件"
|
| 295 |
|
| 296 |
# 读取音频数据
|
| 297 |
+
fs, input_wav = audio
|
| 298 |
+
|
| 299 |
+
print('------------------------------')
|
| 300 |
+
print(fs, type(fs))
|
| 301 |
+
print(input_wav, type(input_wav))
|
| 302 |
+
print('------------------------------')
|
| 303 |
+
|
| 304 |
+
input_wav = input_wav.astype(np.float32) / np.iinfo(np.int16).max
|
| 305 |
|
| 306 |
# 转换为单声道
|
| 307 |
+
if len(input_wav.shape) > 1:
|
| 308 |
+
input_wav = input_wav.mean(-1)
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
# 重采样到16kHz
|
| 311 |
+
if fs != 16000:
|
| 312 |
+
resampler = torchaudio.transforms.Resample(fs, 16000)
|
| 313 |
+
input_wav_t = torch.from_numpy(input_wav).to(torch.float32)
|
| 314 |
+
input_wav = resampler(input_wav_t[None, :])[0, :].numpy()
|
| 315 |
|
| 316 |
# 模型推理
|
| 317 |
text = model.generate(
|