Spaces:

Teapack1
/

Assistant-Audio-Intent-Classification

Sleeping

Update app.py

by Teapack1 - opened Nov 24, 2023

←

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,26 +26,16 @@ def transcribe_and_classify(stream, new_chunk):
     else:
         stream = y
-    # Keep only the last 10 seconds of audio
-    num_samples_last_10_seconds = 5 * sr
-    if len(stream) > num_samples_last_10_seconds:
-        stream = stream[-num_samples_last_10_seconds:]
-    current_time = time.time()
-    # Update every 5 seconds
-    if current_time - last_update_time >= 5:
-        last_update_time = current_time
-        # Transcribe the last 10 seconds of audio
-        transcription = transcriber({"sampling_rate": sr, "task": "transcribe", "language": "english", "raw": stream})["text"]
-        last_transcription = transcription  # Update the buffer
-        # Classify the transcribed text
-        if transcription.strip():
-            output = classifier(transcription, candidate_labels, multi_label=False)
-            top_label = output['labels'][0]
-            top_score = output['scores'][0]
-            last_classification = f"{top_label.upper()}, score: {top_score:.2f}"
     # Return the last updated transcription and classification
     return stream, last_transcription, last_classification
@@ -55,13 +45,14 @@ demo = gr.Interface(
     fn=transcribe_and_classify,
     inputs=[
         "state",
-        gr.Audio(sources=["microphone"], streaming=True)
     ],
     outputs=[
         "state",
         "text",
         "text"
     ],
 )
 # Launch the demo

     else:
         stream = y
+    # Transcribe the last 10 seconds of audio
+    transcription = transcriber({"sampling_rate": sr, "task": "transcribe", "language": "english", "raw": stream})["text"]
+    last_transcription = transcription  # Update the buffer
+    # Classify the transcribed text
+    if transcription.strip():
+        output = classifier(transcription, candidate_labels, multi_label=False)
+        top_label = output['labels'][0]
+        top_score = output['scores'][0]
+        last_classification = f"{top_label.upper()}, score: {top_score:.2f}"
     # Return the last updated transcription and classification
     return stream, last_transcription, last_classification
     fn=transcribe_and_classify,
     inputs=[
         "state",
+        gr.Audio(sources=["microphone"])
     ],
     outputs=[
         "state",
         "text",
         "text"
     ],
 )
 # Launch the demo