Spaces:
Runtime error
Runtime error
import librosa | |
import gradio as gr | |
import numpy as np | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
import soundfile as sf | |
import torch | |
# load model and tokenizer | |
processor = Wav2Vec2Processor.from_pretrained("aditii09/facebook_english_asr") | |
model = Wav2Vec2ForCTC.from_pretrained("aditii09/facebook_english_asr") | |
def speech2text(audio): | |
sr, data = audio | |
# resample to 16hz | |
data_16hz = librosa.resample(data[:,0].astype(np.float32),sr,16000) | |
# tokenize | |
input_values = processor([data_16hz], return_tensors="pt", padding="longest").input_values # Batch size 1 | |
# retrieve logits | |
logits = model(input_values).logits | |
# take argmax and decode | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = processor.batch_decode(predicted_ids) | |
return transcription[0].lower() # batch size 1 | |
iface = gr.Interface(speech2text, "microphone", "text") | |
iface.launch() | |