Spaces:

Teapack1
/

Assistant-Audio-Intent-Classification

Sleeping

Rename app.py to _app.py

e303f20 12 months ago

1.62 kB

	from transformers import pipeline
	from transformers.pipelines.audio_utils import ffmpeg_microphone_live
	import torch
	import gradio as gr

	asr_model = "openai/whisper-tiny.en"
	nlp_model = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"

	pipe = pipeline("automatic-speech-recognition", model=asr_model, device=device)
	sampling_rate = pipe.feature_extractor.sampling_rate

	chunk_length_s = 10 # how often returns the text
	stream_chunk_s = 1 # how often the microphone is checked for new audio
	mic = ffmpeg_microphone_live(
	sampling_rate=sampling_rate,
	chunk_length_s=chunk_length_s,
	stream_chunk_s=stream_chunk_s,
	)

	def listen_print_loop(responses):
	for response in responses:
	if response["text"]:
	print(response["text"], end="\r")
	return response["text"]
	if not response["partial"]:
	print("")


	classifier = pipeline("zero-shot-classification", model=nlp_model)
	candidate_labels = ["dim the light", "turn on light fully", "turn off light fully", "raise the light", "nothing about light"]


	while True:
	context = listen_print_loop(pipe(mic))
	print(context)
	output = classifier(context, candidate_labels, multi_label=False)
	top_label = output['labels'][0]
	top_score = output['scores'][0]
	print(f"Top Prediction: {top_label} with a score of {top_score:.2f}")


	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.inputs.Audio(source="microphone", type="filepath"),
	outputs="text",
	title="Real-Time ASR Transcription",
	description="Speak into the microphone and get the real-time transcription."
	)

	iface.launch()