Spaces:

Teapack1
/

Assistant-Audio-Intent-Classification

Sleeping

App Files Files Community

Assistant-Audio-Intent-Classification / app.py

Teapack1

Update app.py

e492f4d 12 months ago

raw

history blame

1.91 kB

	import gradio as gr
	from transformers import pipeline
	import numpy as np
	import time

	# Initialize the pipelines
	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en")
	classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")

	candidate_labels = ["dim the light", "turn on light fully", "turn off light fully", "raise the light", "not about lighting"]
	last_update_time = time.time() - 5 # Initialize with a value to ensure immediate first update

	# Buffer to hold the last updated values
	last_transcription = ""
	last_classification = ""

	def transcribe_and_classify(stream, new_chunk):
	global last_update_time, last_transcription, last_classification
	sr, y = new_chunk
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	# Concatenate new audio chunk to the stream
	if stream is not None:
	stream = np.concatenate([stream, y])
	else:
	stream = y

	# Transcribe the last 10 seconds of audio
	transcription = transcriber({"sampling_rate": sr, "task": "transcribe", "language": "english", "raw": stream})["text"]
	last_transcription = transcription # Update the buffer

	# Classify the transcribed text
	if transcription.strip():
	output = classifier(transcription, candidate_labels, multi_label=False)
	top_label = output['labels'][0]
	top_score = output['scores'][0]
	last_classification = f"{top_label.upper()}, score: {top_score:.2f}"

	# Return the last updated transcription and classification
	return stream, last_transcription, last_classification

	# Define the Gradio interface
	demo = gr.Interface(
	fn=transcribe_and_classify,
	inputs=[
	"state",
	gr.Audio(sources=["microphone"])
	],
	outputs=[
	"state",
	"text",
	"text"
	],

	)

	# Launch the demo
	demo.launch(debug=True)