Teapack1 commited on
Commit
22ba507
1 Parent(s): 51a2f53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -27
app.py CHANGED
@@ -1,35 +1,69 @@
 
1
  from transformers import pipeline
 
 
2
 
3
- model_id = "sanchit-gandhi/whisper-small-dv" # update with your model id
4
- pipe = pipeline("automatic-speech-recognition", model=model_id)
5
-
6
- def transcribe_speech(filepath):
7
- output = pipe(
8
- filepath,
9
- max_new_tokens=256,
10
- generate_kwargs={
11
- "task": "transcribe",
12
- "language": "sinhalese",
13
- }, # update with the language you've fine-tuned on
14
- chunk_length_s=30,
15
- batch_size=8,
16
- )
17
- return output["text"]
18
 
19
- import gradio as gr
 
20
 
21
- demo = gr.Blocks()
 
 
22
 
23
- mic_transcribe = gr.Interface(
24
- fn=transcribe_speech,
25
- inputs=gr.Audio(sources="microphone", type="filepath"),
26
- outputs=gr.outputs.Textbox(),
27
- )
 
 
 
 
 
 
28
 
29
- with demo:
30
- gr.TabbedInterface(
31
- [mic_transcribe],
32
- ["Transcribe Microphone"],
33
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
 
35
  demo.launch(debug=True)
 
1
+ import gradio as gr
2
  from transformers import pipeline
3
+ import numpy as np
4
+ import time
5
 
6
+ # Initialize the pipelines
7
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en")
8
+ classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ candidate_labels = ["dim the light", "turn on light fully", "turn off light fully", "raise the light", "not about lighting"]
11
+ last_update_time = time.time() - 5 # Initialize with a value to ensure immediate first update
12
 
13
+ # Buffer to hold the last updated values
14
+ last_transcription = ""
15
+ last_classification = ""
16
 
17
+ def transcribe_and_classify(stream, new_chunk):
18
+ global last_update_time, last_transcription, last_classification
19
+ sr, y = new_chunk
20
+ y = y.astype(np.float32)
21
+ y /= np.max(np.abs(y))
22
+
23
+ # Concatenate new audio chunk to the stream
24
+ if stream is not None:
25
+ stream = np.concatenate([stream, y])
26
+ else:
27
+ stream = y
28
 
29
+ # Keep only the last 10 seconds of audio
30
+ num_samples_last_10_seconds = 5 * sr
31
+ if len(stream) > num_samples_last_10_seconds:
32
+ stream = stream[-num_samples_last_10_seconds:]
33
+
34
+ current_time = time.time()
35
+
36
+ # Update every 5 seconds
37
+ if current_time - last_update_time >= 5:
38
+ last_update_time = current_time
39
+ # Transcribe the last 10 seconds of audio
40
+ transcription = transcriber({"sampling_rate": sr, "task": "transcribe", "language": "english", "raw": stream})["text"]
41
+ last_transcription = transcription # Update the buffer
42
+
43
+ # Classify the transcribed text
44
+ if transcription.strip():
45
+ output = classifier(transcription, candidate_labels, multi_label=False)
46
+ top_label = output['labels'][0]
47
+ top_score = output['scores'][0]
48
+ last_classification = f"{top_label.upper()}, score: {top_score:.2f}"
49
+
50
+ # Return the last updated transcription and classification
51
+ return stream, last_transcription, last_classification
52
+
53
+ # Define the Gradio interface
54
+ demo = gr.Interface(
55
+ fn=transcribe_and_classify,
56
+ inputs=[
57
+ "state",
58
+ gr.Audio(sources=["microphone"], streaming=True)
59
+ ],
60
+ outputs=[
61
+ "state",
62
+ "text",
63
+ "text"
64
+ ],
65
+ live=True
66
+ )
67
 
68
+ # Launch the demo
69
  demo.launch(debug=True)