File size: 1,811 Bytes
25ca65d
 
0166f6b
 
25ca65d
 
f8e2f48
0166f6b
 
f8e2f48
25ca65d
 
 
 
ad4be01
 
25ca65d
0166f6b
 
 
 
 
 
 
25ca65d
0166f6b
ad4be01
 
57e87ea
25ca65d
 
 
 
 
ec85040
25ca65d
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
from transformers import pipeline
from faster_whisper import WhisperModel
import librosa

# Load the model
model = WhisperModel("navidved/faster-gooya-v1", device="cpu", compute_type="int8", local_files_only=False)

# Define the maximum audio length in seconds
MAX_AUDIO_LENGTH = 30  # seconds

# Define the inference function
def transcribe_audio(audio):
    if audio is None:
        return "No audio file uploaded. Please try again."     
    results = ""
    try:
        audio_data, sr = librosa.load(audio, sr=None)
        duration = librosa.get_duration(y=audio_data, sr=sr)

        # Check if the audio is longer than the allowed duration
        if duration > MAX_AUDIO_LENGTH:
            return f"Audio is too long. Please upload an audio file shorter than {MAX_AUDIO_LENGTH} seconds."
        
        # Perform transcription
        segments, _ = model.transcribe(audio, vad_filter=True)
        for seg in segments:
            results += seg.text
        return results
    except Exception as e:
        return f"Error during transcription: {str(e)}"

# Create a Gradio interface for uploading audio or using the microphone
with gr.Blocks() as interface:
    gr.Markdown("# Gooya v1 Persian Speech Recognition")
    gr.Markdown("Upload an audio file or use your microphone to transcribe speech to text.")
    
    # Create the input and output components
    audio_input = gr.Audio(type="filepath", label="Input Audio")
    output_text = gr.Textbox(label="Transcription")
    
    # Add a button to trigger the transcription
    transcribe_button = gr.Button("Transcribe")
    
    # Bind the transcribe_audio function to the button click
    transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)

# Launch the Gradio app
interface.launch()