Spaces:
Runtime error
Runtime error
from typing import Tuple | |
import numpy as np | |
import torch | |
from torchaudio.transforms import Resample | |
from huggingface_hub import hf_hub_download | |
import gradio as gr | |
from pipeline import PreTrainedPipeline | |
HF_HUB_URL = 'ales/wav2vec2-cv-be' | |
LM_HUB_FP = 'language_model/cv8be_5gram.bin' | |
def main(rate_audio_tuple: Tuple[int, np.ndarray]): | |
sampling_rate, audio = rate_audio_tuple | |
# resample audio to 16kHz | |
resampler = Resample(orig_freq=sampling_rate, new_freq=16_000) | |
audio_resampled = resampler(torch.tensor(audio)).numpy().flatten() | |
# download Language Model from HF Hub | |
lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP) | |
# init pipeline | |
pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp) | |
# recognize speech | |
text_recognized = pipeline(inputs=audio_resampled)['text'][0] | |
return text_recognized | |
iface = gr.Interface( | |
fn=main, | |
inputs='microphone', | |
outputs="text" | |
) | |
iface.launch() | |