|
import gradio as gr |
|
import torch |
|
from transformers import pipeline |
|
from datasets import load_dataset |
|
|
|
|
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
|
def convert_audio(): |
|
pipe = pipeline( |
|
"automatic-speech-recognition", |
|
model="openai/whisper-small", |
|
chunk_length_s=30, |
|
device=device, |
|
) |
|
|
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") |
|
sample = ds[0]["audio"] |
|
print("Using predefined audio sample:") |
|
audio_data = sample['array'] |
|
|
|
prediction = pipe(audio_data)["text"] |
|
print(prediction) |
|
return prediction |
|
|
|
|
|
|
|
demo = gr.Interface( |
|
fn = convert_audio, |
|
inputs = None, |
|
outputs = "text", |
|
) |
|
|
|
demo.launch(share=True) |
|
|
|
|