Spaces:

Kameshr
/

Speech-to-Text

Build error

App Files Files Community

Speech-to-Text / app.py

Kameshr

Create app.py

7910b12 verified 27 days ago

raw

history blame contribute delete

3.35 kB

	import gradio as gr
	import requests
	import wave
	import pyaudio
	import soundfile as sf
	import os



	# API URL and headers
	API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
	headers = {"Authorization": f"Bearer {HF_TOKEN}"}

	# Audio configuration
	FORMAT = pyaudio.paInt16
	CHANNELS = 1
	RATE = 16000 # Whisper models expect 16kHz
	CHUNK = 1024

	class AudioRecorder:
	def __init__(self):
	self.is_recording = False
	self.frames = []
	self.audio = pyaudio.PyAudio()

	def start_recording(self):
	"""Starts audio recording."""
	self.is_recording = True
	self.frames = []
	self.stream = self.audio.open(
	format=FORMAT,
	channels=CHANNELS,
	rate=RATE,
	input=True,
	frames_per_buffer=CHUNK
	)

	def record_chunk(self):
	"""Records a chunk of audio."""
	if self.is_recording:
	data = self.stream.read(CHUNK, exception_on_overflow=False)
	self.frames.append(data)

	def stop_recording(self):
	"""Stops the audio recording."""
	self.is_recording = False
	self.stream.stop_stream()
	self.stream.close()

	def save_audio(self, filename="output.wav"):
	"""Saves the recorded audio to a WAV file."""
	with wave.open(filename, 'wb') as wf:
	wf.setnchannels(CHANNELS)
	wf.setsampwidth(self.audio.get_sample_size(FORMAT))
	wf.setframerate(RATE)
	wf.writeframes(b''.join(self.frames))

	# Convert to FLAC
	flac_filename = "output.flac"
	data, samplerate = sf.read(filename)
	sf.write(flac_filename, data, samplerate, format='FLAC')
	return flac_filename

	def close(self):
	self.audio.terminate()

	recorder = AudioRecorder()

	def start_recording():
	recorder.start_recording()
	return "Recording started."

	def record_audio():
	recorder.record_chunk()
	return "Recording in progress..."

	def stop_and_transcribe():
	try:
	recorder.stop_recording()
	flac_file = recorder.save_audio()

	with open(flac_file, "rb") as f:
	response = requests.post(
	API_URL,
	headers=headers,
	data=f.read()
	)

	if response.status_code == 200:
	result = response.json()
	return result.get("text", "No transcription available.")
	else:
	return f"API error: {response.status_code}"
	except Exception as e:
	return f"Error: {str(e)}"
	finally:
	if os.path.exists("output.wav"):
	os.remove("output.wav")
	if os.path.exists("output.flac"):
	os.remove("output.flac")

	# Define Gradio interface
	def build_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# Speech-to-Text Transcription with Whisper")

	with gr.Row():
	start_button = gr.Button("Start Recording")
	stop_button = gr.Button("Stop and Transcribe")

	transcription_output = gr.Textbox(label="Transcription")

	start_button.click(start_recording, outputs=None)
	stop_button.click(stop_and_transcribe, outputs=transcription_output)

	return demo

	if __name__ == "__main__":
	interface = build_interface()
	interface.launch()