Chandranshu Jain
Update app.py
f6c5e0e verified
from transformers import pipeline
import os
import gradio as gr
import torch
from IPython.display import Audio as IPythonAudio
from gtts import gTTS
import IPython.display as ipd
#Audio to text
asr = pipeline(task="automatic-speech-recognition",
model="distil-whisper/distil-small.en")
#Text to text
translator = pipeline(task="translation",
model="facebook/nllb-200-distilled-600M",
torch_dtype=torch.bfloat16)
#Text to audio
pipe = pipeline("text-to-speech", model="suno/bark-small",
torch_dtype=torch.bfloat16)
demo = gr.Blocks()
def transcribe_speech(filepath):
if filepath is None:
gr.Warning("No audio found, please retry.")
return ""
output = translator(asr(filepath)["text"],
src_lang="eng_Latn",
tgt_lang="hin_Deva")
narrated_text=pipe(output[0]['translation_text'])
#tts = gTTS(text=narrated_text, lang='hi', slow=False)
#tts.save("translated_audio.mp3")
#return ipd.Audio("translated_audio.mp3", autoplay=True)
return narrated_text
mic_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="microphone",
type="filepath"),
outputs="audio",
#outputs=gr.Audio(label="Translated Message"),
allow_flagging="never")
file_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="upload",
type="filepath"),
outputs="audio",
#outputs=gr.Audio(label="Translated Message"),
allow_flagging="never"
)
with demo:
gr.TabbedInterface(
[mic_transcribe,
file_transcribe],
["Transcribe Microphone",
"Transcribe Audio File"],
)
demo.launch(share=True)
demo.close()