|
import gradio as gr |
|
from transformers import pipeline |
|
from pydub import AudioSegment |
|
import os |
|
|
|
|
|
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-medium") |
|
|
|
|
|
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en") |
|
|
|
|
|
def reduce_audio_quality(input_path): |
|
try: |
|
audio = AudioSegment.from_file(input_path) |
|
reduced_audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2) |
|
reduced_path = "reduced_audio.wav" |
|
reduced_audio.export(reduced_path, format="wav", bitrate="128k") |
|
return reduced_path |
|
except Exception as e: |
|
print(f"Błąd podczas zmniejszania jakości pliku: {e}") |
|
return None |
|
|
|
|
|
def split_audio_to_segments(input_path, segment_length=30): |
|
audio = AudioSegment.from_file(input_path) |
|
segments = [] |
|
for i in range(0, len(audio), segment_length * 1000): |
|
segment = audio[i:i + segment_length * 1000] |
|
segment_path = f"segment_{i // 1000}.wav" |
|
segment.export(segment_path, format="wav") |
|
segments.append(segment_path) |
|
return segments |
|
|
|
|
|
def transcribe_audio_stream(file): |
|
try: |
|
reduced_audio = reduce_audio_quality(file.name) |
|
if not reduced_audio: |
|
yield "Nie udało się zmniejszyć rozmiaru pliku." |
|
return |
|
|
|
segments = split_audio_to_segments(reduced_audio, segment_length=30) |
|
full_transcription = "" |
|
|
|
for segment in segments: |
|
result = transcriber(segment) |
|
full_transcription += result['text'] + " " |
|
os.remove(segment) |
|
yield full_transcription.strip() |
|
|
|
os.remove(reduced_audio) |
|
except Exception as e: |
|
yield f"Błąd: {e}" |
|
|
|
|
|
def translate_text(text): |
|
try: |
|
translation = translator(text)[0]['translation_text'] |
|
return translation.strip() |
|
except Exception as e: |
|
return f"Błąd podczas tłumaczenia: {e}" |
|
|
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("## Whisper Medium - Transkrypcja i Tłumaczenie") |
|
gr.Markdown("Prześlij plik audio/wideo, wygeneruj transkrypcję, popraw ją ręcznie i przetłumacz na angielski.") |
|
|
|
with gr.Row(): |
|
file_input = gr.File(label="Prześlij plik audio lub wideo (MOV, MP4, WAV, MP3)") |
|
transcribe_button = gr.Button("Wykonaj transkrypcję") |
|
|
|
transcription_output = gr.Textbox(label="Transkrypcja tekstowa (edytowalna)", lines=10) |
|
translate_button = gr.Button("Przetłumacz na angielski") |
|
translation_output = gr.Textbox(label="Tłumaczenie na angielski", lines=10) |
|
|
|
|
|
transcribe_button.click( |
|
transcribe_audio_stream, |
|
inputs=file_input, |
|
outputs=transcription_output |
|
) |
|
|
|
|
|
translate_button.click( |
|
translate_text, |
|
inputs=transcription_output, |
|
outputs=translation_output |
|
) |
|
|
|
app.launch() |
|
|