Spaces:

Teapack1
/

Assistant-Audio-Intent-Classification

Sleeping

Teapack1 commited on Nov 24, 2023

Commit

2edd588

•

1 Parent(s): 25a1651

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,34 @@
-import gradio as gr
 from transformers import pipeline
-import numpy as np
-asr_model = "distil-whisper/distil-medium.en"
-asr_pipe = pipeline("automatic-speech-recognition", model=asr_model)
-def transcribe(stream, new_chunk):
-    sr, y = new_chunk
-    y = y.astype(np.float32)
-    y /= np.max(np.abs(y))
-    if stream is not None:
-        stream = np.concatenate([stream, y])
-    else:
-        stream = y
-    return stream, asr_pipe({"sampling_rate": sr, "raw": stream})["text"]
 demo = gr.Blocks()
-mic = gr.Interface(
-    fn = transcribe,
-    inputs = [
-        "state", gr.Audio(sources=["microphone"], streaming=True)],
-    outputs = ["state", "text"],
-    theme="huggingface",
-    title="Whisper & BERT demo - Intent Classification",
-    description=(
-        "Transcribe audio inputs with Whisper ASR model and detect intention from the text. Use BERT NLP model to classify the intention as one of the commands to command a light."
-    ),
-    live=True,
 )
-if __name__ == "__main__":
-    demo.launch()

 from transformers import pipeline
+model_id = "sanchit-gandhi/whisper-small-dv"  # update with your model id
+pipe = pipeline("automatic-speech-recognition", model=model_id)
+def transcribe_speech(filepath):
+    output = pipe(
+        filepath,
+        max_new_tokens=256,
+        generate_kwargs={
+            "task": "transcribe",
+            "language": "sinhalese",
+        },  # update with the language you've fine-tuned on
+        chunk_length_s=30,
+        batch_size=8,
+    )
+    return output["text"]
+import gradio as gr
 demo = gr.Blocks()
+mic_transcribe = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(sources="microphone", type="filepath"),
+    outputs=gr.outputs.Textbox(),
 )
+with demo:
+    gr.TabbedInterface(
+        [mic_transcribe],
+        ["Transcribe Microphone"],
+demo.launch(debug=True)