Spaces:
Build error
Build error
import gradio as gr | |
import requests | |
import wave | |
import pyaudio | |
import soundfile as sf | |
import os | |
# API URL and headers | |
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3" | |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
# Audio configuration | |
FORMAT = pyaudio.paInt16 | |
CHANNELS = 1 | |
RATE = 16000 # Whisper models expect 16kHz | |
CHUNK = 1024 | |
class AudioRecorder: | |
def __init__(self): | |
self.is_recording = False | |
self.frames = [] | |
self.audio = pyaudio.PyAudio() | |
def start_recording(self): | |
"""Starts audio recording.""" | |
self.is_recording = True | |
self.frames = [] | |
self.stream = self.audio.open( | |
format=FORMAT, | |
channels=CHANNELS, | |
rate=RATE, | |
input=True, | |
frames_per_buffer=CHUNK | |
) | |
def record_chunk(self): | |
"""Records a chunk of audio.""" | |
if self.is_recording: | |
data = self.stream.read(CHUNK, exception_on_overflow=False) | |
self.frames.append(data) | |
def stop_recording(self): | |
"""Stops the audio recording.""" | |
self.is_recording = False | |
self.stream.stop_stream() | |
self.stream.close() | |
def save_audio(self, filename="output.wav"): | |
"""Saves the recorded audio to a WAV file.""" | |
with wave.open(filename, 'wb') as wf: | |
wf.setnchannels(CHANNELS) | |
wf.setsampwidth(self.audio.get_sample_size(FORMAT)) | |
wf.setframerate(RATE) | |
wf.writeframes(b''.join(self.frames)) | |
# Convert to FLAC | |
flac_filename = "output.flac" | |
data, samplerate = sf.read(filename) | |
sf.write(flac_filename, data, samplerate, format='FLAC') | |
return flac_filename | |
def close(self): | |
self.audio.terminate() | |
recorder = AudioRecorder() | |
def start_recording(): | |
recorder.start_recording() | |
return "Recording started." | |
def record_audio(): | |
recorder.record_chunk() | |
return "Recording in progress..." | |
def stop_and_transcribe(): | |
try: | |
recorder.stop_recording() | |
flac_file = recorder.save_audio() | |
with open(flac_file, "rb") as f: | |
response = requests.post( | |
API_URL, | |
headers=headers, | |
data=f.read() | |
) | |
if response.status_code == 200: | |
result = response.json() | |
return result.get("text", "No transcription available.") | |
else: | |
return f"API error: {response.status_code}" | |
except Exception as e: | |
return f"Error: {str(e)}" | |
finally: | |
if os.path.exists("output.wav"): | |
os.remove("output.wav") | |
if os.path.exists("output.flac"): | |
os.remove("output.flac") | |
# Define Gradio interface | |
def build_interface(): | |
with gr.Blocks() as demo: | |
gr.Markdown("# Speech-to-Text Transcription with Whisper") | |
with gr.Row(): | |
start_button = gr.Button("Start Recording") | |
stop_button = gr.Button("Stop and Transcribe") | |
transcription_output = gr.Textbox(label="Transcription") | |
start_button.click(start_recording, outputs=None) | |
stop_button.click(stop_and_transcribe, outputs=transcription_output) | |
return demo | |
if __name__ == "__main__": | |
interface = build_interface() | |
interface.launch() | |