Spaces:

navidved
/

gooya-v1

Running

App Files Files Community

gooya-v1 / app.py

navidved

Update app.py

ec85040 verified 3 months ago

raw

history blame

1.81 kB

	import gradio as gr
	from transformers import pipeline
	from faster_whisper import WhisperModel
	import librosa

	# Load the model
	model = WhisperModel("navidved/faster-gooya-v1", device="cpu", compute_type="int8", local_files_only=False)

	# Define the maximum audio length in seconds
	MAX_AUDIO_LENGTH = 30 # seconds

	# Define the inference function
	def transcribe_audio(audio):
	if audio is None:
	return "No audio file uploaded. Please try again."
	results = ""
	try:
	audio_data, sr = librosa.load(audio, sr=None)
	duration = librosa.get_duration(y=audio_data, sr=sr)

	# Check if the audio is longer than the allowed duration
	if duration > MAX_AUDIO_LENGTH:
	return f"Audio is too long. Please upload an audio file shorter than {MAX_AUDIO_LENGTH} seconds."

	# Perform transcription
	segments, _ = model.transcribe(audio, vad_filter=True)
	for seg in segments:
	results += seg.text
	return results
	except Exception as e:
	return f"Error during transcription: {str(e)}"

	# Create a Gradio interface for uploading audio or using the microphone
	with gr.Blocks() as interface:
	gr.Markdown("# Gooya v1 Persian Speech Recognition")
	gr.Markdown("Upload an audio file or use your microphone to transcribe speech to text.")

	# Create the input and output components
	audio_input = gr.Audio(type="filepath", label="Input Audio")
	output_text = gr.Textbox(label="Transcription")

	# Add a button to trigger the transcription
	transcribe_button = gr.Button("Transcribe")

	# Bind the transcribe_audio function to the button click
	transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)

	# Launch the Gradio app
	interface.launch()