Spaces:

Teapack1
/

Assistant-Audio-Intent-Classification

Sleeping

File size: 1,063 Bytes

a95b4f8
db9a501
 
a95b4f8
64843fe
 
1f6c393
a95b4f8
7e193fe
 
db9a501
 
a95b4f8
7e193fe
 
 
 
1f6c393
a95b4f8
1f6c393
a95b4f8
1f6c393
 
 
 
 
 
 
 
 
 
75818ad
1f6c393
 
7e193fe
db9a501
a95b4f8
819c345

import gradio as gr
from transformers import pipeline
import numpy as np

asr_model = "distil-whisper/distil-medium.en"

asr_pipe = pipeline("automatic-speech-recognition", model=asr_model)

def transcribe(stream, new_chunk):
    sr, y = new_chunk
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    if stream is not None:
        stream = np.concatenate([stream, y])
    else:
        stream = y
    return stream, asr_pipe({"sampling_rate": sr, "raw": stream})["text"]

demo = gr.Blocks()


mic = gr.Interface(
    fn = transcribe,
    inputs = [
        "state", gr.Audio(sources=["microphone"], streaming=True)],
    outputs = ["state", "text"],
    layout="horizontal",
    theme="huggingface",
    title="Whisper & BERT demo - Intent Classification",
    description=(
        "Transcribe audio inputs with Whisper ASR model and detect intention from the text. Use BERT NLP model to classify the intention as one of the commands to command a light."
    ),
    allow_flagging="never",
    live=True,
)

if __name__ == "__main__":
    demo.launch()