Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
from faster_whisper import WhisperModel | |
import librosa | |
# Load the model | |
model = WhisperModel("navidved/faster-gooya-v1", device="cpu", compute_type="int8", local_files_only=False) | |
# Define the maximum audio length in seconds | |
MAX_AUDIO_LENGTH = 30 # seconds | |
# Define the inference function | |
def transcribe_audio(audio): | |
if audio is None: | |
return "No audio file uploaded. Please try again." | |
results = "" | |
try: | |
audio_data, sr = librosa.load(audio, sr=None) | |
duration = librosa.get_duration(y=audio_data, sr=sr) | |
# Check if the audio is longer than the allowed duration | |
if duration > MAX_AUDIO_LENGTH: | |
return f"Audio is too long. Please upload an audio file shorter than {MAX_AUDIO_LENGTH} seconds." | |
# Perform transcription | |
segments, _ = model.transcribe(audio, vad_filter=True) | |
for seg in segments: | |
results += seg.text | |
return results | |
except Exception as e: | |
return f"Error during transcription: {str(e)}" | |
# Create a Gradio interface for uploading audio or using the microphone | |
with gr.Blocks() as interface: | |
gr.Markdown("# Gooya v1 Persian Speech Recognition") | |
gr.Markdown("Upload an audio file or use your microphone to transcribe speech to text.") | |
# Create the input and output components | |
audio_input = gr.Audio(type="filepath", label="Input Audio") | |
output_text = gr.Textbox(label="Transcription") | |
# Add a button to trigger the transcription | |
transcribe_button = gr.Button("Transcribe") | |
# Bind the transcribe_audio function to the button click | |
transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text) | |
# Launch the Gradio app | |
interface.launch() | |