Spaces:

tomekstor9
/

openai-whisper-large-v3-turbo

Sleeping

App Files Files Community

openai-whisper-large-v3-turbo / app.py

tomekstor9

Update app.py

8c2dbdd verified about 2 months ago

raw

history blame contribute delete

3.28 kB

	import gradio as gr
	from transformers import pipeline
	from pydub import AudioSegment
	import os

	# Załaduj dokładniejszy model Whisper z ustawieniem języka na polski
	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-medium")

	# Załaduj model do tłumaczenia na angielski
	translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")

	# Funkcja poprawy jakości audio: bitrate 128 kbps i 16 kHz
	def reduce_audio_quality(input_path):
	try:
	audio = AudioSegment.from_file(input_path)
	reduced_audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
	reduced_path = "reduced_audio.wav"
	reduced_audio.export(reduced_path, format="wav", bitrate="128k")
	return reduced_path
	except Exception as e:
	print(f"Błąd podczas zmniejszania jakości pliku: {e}")
	return None

	# Funkcja podziału audio na segmenty 30-sekundowe
	def split_audio_to_segments(input_path, segment_length=30):
	audio = AudioSegment.from_file(input_path)
	segments = []
	for i in range(0, len(audio), segment_length * 1000):
	segment = audio[i:i + segment_length * 1000]
	segment_path = f"segment_{i // 1000}.wav"
	segment.export(segment_path, format="wav")
	segments.append(segment_path)
	return segments

	# Funkcja przetwarzania pliku z użyciem streaming
	def transcribe_audio_stream(file):
	try:
	reduced_audio = reduce_audio_quality(file.name)
	if not reduced_audio:
	yield "Nie udało się zmniejszyć rozmiaru pliku."
	return

	segments = split_audio_to_segments(reduced_audio, segment_length=30)
	full_transcription = ""

	for segment in segments:
	result = transcriber(segment)
	full_transcription += result['text'] + " "
	os.remove(segment)
	yield full_transcription.strip() # Wyświetl częściową transkrypcję na bieżąco

	os.remove(reduced_audio)
	except Exception as e:
	yield f"Błąd: {e}"

	# Funkcja tłumaczenia tekstu
	def translate_text(text):
	try:
	translation = translator(text)[0]['translation_text']
	return translation.strip()
	except Exception as e:
	return f"Błąd podczas tłumaczenia: {e}"

	# Interfejs Gradio
	with gr.Blocks() as app:
	gr.Markdown("## Whisper Medium - Transkrypcja i Tłumaczenie")
	gr.Markdown("Prześlij plik audio/wideo, wygeneruj transkrypcję, popraw ją ręcznie i przetłumacz na angielski.")

	with gr.Row():
	file_input = gr.File(label="Prześlij plik audio lub wideo (MOV, MP4, WAV, MP3)")
	transcribe_button = gr.Button("Wykonaj transkrypcję")

	transcription_output = gr.Textbox(label="Transkrypcja tekstowa (edytowalna)", lines=10)
	translate_button = gr.Button("Przetłumacz na angielski")
	translation_output = gr.Textbox(label="Tłumaczenie na angielski", lines=10)

	# Streaming transkrypcji
	transcribe_button.click(
	transcribe_audio_stream,
	inputs=file_input,
	outputs=transcription_output
	)

	# Tłumaczenie tekstu po poprawkach
	translate_button.click(
	translate_text,
	inputs=transcription_output,
	outputs=translation_output
	)

	app.launch()