Spaces:

navidved
/

gooya-v1

Running

App Files Files Community

gooya-v1 / app.py

navidved

Update app.py

0166f6b verified 4 months ago

raw

history blame

1.85 kB

	import gradio as gr
	from transformers import pipeline
	from faster_whisper import WhisperModel
	import librosa

	# Load the model
	# pipe = pipeline("automatic-speech-recognition", model="navidved/persian-whisper-large-v3-ct2")
	model = WhisperModel("navidved/persian-whisper-large-v3-ct2", device="cpu", compute_type="int8")

	# Define the maximum audio length in seconds
	MAX_AUDIO_LENGTH = 40 # 40 seconds

	# Define the inference function
	def transcribe_audio(audio):
	if audio is None:
	return "No audio file uploaded. Please try again."

	try:
	audio_data, sr = librosa.load(audio, sr=None)
	duration = librosa.get_duration(y=audio_data, sr=sr)

	# Check if the audio is longer than the allowed duration
	if duration > MAX_AUDIO_LENGTH:
	return f"Audio is too long. Please upload an audio file shorter than {MAX_AUDIO_LENGTH} seconds."

	# Perform transcription
	segments, _ = model.transcribe(audio, vad_filter=True)
	result = segments.text
	return result
	except Exception as e:
	return f"Error during transcription: {str(e)}"

	# Create a Gradio interface for uploading audio or using the microphone
	with gr.Blocks() as interface:
	gr.Markdown("# Whisper Large V3 Speech Recognition")
	gr.Markdown("Upload an audio file or use your microphone to transcribe speech to text.")

	# Create the input and output components
	audio_input = gr.Audio(type="filepath", label="Input Audio")
	output_text = gr.Textbox(label="Transcription")

	# Add a button to trigger the transcription
	transcribe_button = gr.Button("Transcribe")

	# Bind the transcribe_audio function to the button click
	transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)

	# Launch the Gradio app
	interface.launch()