Files changed (1) hide show
  1. app.py +11 -20
app.py CHANGED
@@ -26,26 +26,16 @@ def transcribe_and_classify(stream, new_chunk):
26
  else:
27
  stream = y
28
 
29
- # Keep only the last 10 seconds of audio
30
- num_samples_last_10_seconds = 5 * sr
31
- if len(stream) > num_samples_last_10_seconds:
32
- stream = stream[-num_samples_last_10_seconds:]
33
-
34
- current_time = time.time()
35
 
36
- # Update every 5 seconds
37
- if current_time - last_update_time >= 5:
38
- last_update_time = current_time
39
- # Transcribe the last 10 seconds of audio
40
- transcription = transcriber({"sampling_rate": sr, "task": "transcribe", "language": "english", "raw": stream})["text"]
41
- last_transcription = transcription # Update the buffer
42
-
43
- # Classify the transcribed text
44
- if transcription.strip():
45
- output = classifier(transcription, candidate_labels, multi_label=False)
46
- top_label = output['labels'][0]
47
- top_score = output['scores'][0]
48
- last_classification = f"{top_label.upper()}, score: {top_score:.2f}"
49
 
50
  # Return the last updated transcription and classification
51
  return stream, last_transcription, last_classification
@@ -55,13 +45,14 @@ demo = gr.Interface(
55
  fn=transcribe_and_classify,
56
  inputs=[
57
  "state",
58
- gr.Audio(sources=["microphone"], streaming=True)
59
  ],
60
  outputs=[
61
  "state",
62
  "text",
63
  "text"
64
  ],
 
65
  )
66
 
67
  # Launch the demo
 
26
  else:
27
  stream = y
28
 
29
+ # Transcribe the last 10 seconds of audio
30
+ transcription = transcriber({"sampling_rate": sr, "task": "transcribe", "language": "english", "raw": stream})["text"]
31
+ last_transcription = transcription # Update the buffer
 
 
 
32
 
33
+ # Classify the transcribed text
34
+ if transcription.strip():
35
+ output = classifier(transcription, candidate_labels, multi_label=False)
36
+ top_label = output['labels'][0]
37
+ top_score = output['scores'][0]
38
+ last_classification = f"{top_label.upper()}, score: {top_score:.2f}"
 
 
 
 
 
 
 
39
 
40
  # Return the last updated transcription and classification
41
  return stream, last_transcription, last_classification
 
45
  fn=transcribe_and_classify,
46
  inputs=[
47
  "state",
48
+ gr.Audio(sources=["microphone"])
49
  ],
50
  outputs=[
51
  "state",
52
  "text",
53
  "text"
54
  ],
55
+
56
  )
57
 
58
  # Launch the demo