Speech-to-Text / app.py
Kameshr's picture
Create app.py
7910b12 verified
import gradio as gr
import requests
import wave
import pyaudio
import soundfile as sf
import os
# API URL and headers
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
# Audio configuration
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000 # Whisper models expect 16kHz
CHUNK = 1024
class AudioRecorder:
def __init__(self):
self.is_recording = False
self.frames = []
self.audio = pyaudio.PyAudio()
def start_recording(self):
"""Starts audio recording."""
self.is_recording = True
self.frames = []
self.stream = self.audio.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK
)
def record_chunk(self):
"""Records a chunk of audio."""
if self.is_recording:
data = self.stream.read(CHUNK, exception_on_overflow=False)
self.frames.append(data)
def stop_recording(self):
"""Stops the audio recording."""
self.is_recording = False
self.stream.stop_stream()
self.stream.close()
def save_audio(self, filename="output.wav"):
"""Saves the recorded audio to a WAV file."""
with wave.open(filename, 'wb') as wf:
wf.setnchannels(CHANNELS)
wf.setsampwidth(self.audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(self.frames))
# Convert to FLAC
flac_filename = "output.flac"
data, samplerate = sf.read(filename)
sf.write(flac_filename, data, samplerate, format='FLAC')
return flac_filename
def close(self):
self.audio.terminate()
recorder = AudioRecorder()
def start_recording():
recorder.start_recording()
return "Recording started."
def record_audio():
recorder.record_chunk()
return "Recording in progress..."
def stop_and_transcribe():
try:
recorder.stop_recording()
flac_file = recorder.save_audio()
with open(flac_file, "rb") as f:
response = requests.post(
API_URL,
headers=headers,
data=f.read()
)
if response.status_code == 200:
result = response.json()
return result.get("text", "No transcription available.")
else:
return f"API error: {response.status_code}"
except Exception as e:
return f"Error: {str(e)}"
finally:
if os.path.exists("output.wav"):
os.remove("output.wav")
if os.path.exists("output.flac"):
os.remove("output.flac")
# Define Gradio interface
def build_interface():
with gr.Blocks() as demo:
gr.Markdown("# Speech-to-Text Transcription with Whisper")
with gr.Row():
start_button = gr.Button("Start Recording")
stop_button = gr.Button("Stop and Transcribe")
transcription_output = gr.Textbox(label="Transcription")
start_button.click(start_recording, outputs=None)
stop_button.click(stop_and_transcribe, outputs=transcription_output)
return demo
if __name__ == "__main__":
interface = build_interface()
interface.launch()