Spaces:

tomekstor9
/

openai-whisper-large-v3-turbo

Sleeping

File size: 3,282 Bytes

10fe697
9fad0db
9da5321
2ff7a4a
10fe697
b13244b
8c2dbdd
42295c8
10dbd97
eef635f
 
3c41d0b
da62944
933bac2
 
da62944
 
b13244b
da62944
933bac2
3c41d0b
933bac2
9fad0db
69610ab
 
da62944
 
6838e5c
da62944
 
 
 
 
 
b26b92f
 
42295c8
da62944
933bac2
b26b92f
 
 
69610ab
da62944
b26b92f
da62944
647ae53
da62944
6838e5c
3c41d0b
6838e5c
647ae53
42295c8
b26b92f
647ae53
3c41d0b
647ae53
 
 
 
 
 
 
 
 
3c41d0b
b26b92f
 
647ae53
 
 
b26b92f
647ae53
 
 
 
b26b92f
647ae53
b26b92f
647ae53
 
 
42295c8
b26b92f
647ae53
 
 
 
 
9fad0db
647ae53

import gradio as gr
from transformers import pipeline
from pydub import AudioSegment
import os

# Załaduj dokładniejszy model Whisper z ustawieniem języka na polski
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-medium")

# Załaduj model do tłumaczenia na angielski
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")

# Funkcja poprawy jakości audio: bitrate 128 kbps i 16 kHz
def reduce_audio_quality(input_path):
    try:
        audio = AudioSegment.from_file(input_path)
        reduced_audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
        reduced_path = "reduced_audio.wav"
        reduced_audio.export(reduced_path, format="wav", bitrate="128k")
        return reduced_path
    except Exception as e:
        print(f"Błąd podczas zmniejszania jakości pliku: {e}")
        return None

# Funkcja podziału audio na segmenty 30-sekundowe
def split_audio_to_segments(input_path, segment_length=30):
    audio = AudioSegment.from_file(input_path)
    segments = []
    for i in range(0, len(audio), segment_length * 1000):
        segment = audio[i:i + segment_length * 1000]
        segment_path = f"segment_{i // 1000}.wav"
        segment.export(segment_path, format="wav")
        segments.append(segment_path)
    return segments

# Funkcja przetwarzania pliku z użyciem streaming
def transcribe_audio_stream(file):
    try:
        reduced_audio = reduce_audio_quality(file.name)
        if not reduced_audio:
            yield "Nie udało się zmniejszyć rozmiaru pliku."
            return

        segments = split_audio_to_segments(reduced_audio, segment_length=30)
        full_transcription = ""

        for segment in segments:
            result = transcriber(segment)
            full_transcription += result['text'] + " "
            os.remove(segment)
            yield full_transcription.strip()  # Wyświetl częściową transkrypcję na bieżąco

        os.remove(reduced_audio)
    except Exception as e:
        yield f"Błąd: {e}"

# Funkcja tłumaczenia tekstu
def translate_text(text):
    try:
        translation = translator(text)[0]['translation_text']
        return translation.strip()
    except Exception as e:
        return f"Błąd podczas tłumaczenia: {e}"

# Interfejs Gradio
with gr.Blocks() as app:
    gr.Markdown("## Whisper Medium - Transkrypcja i Tłumaczenie")
    gr.Markdown("Prześlij plik audio/wideo, wygeneruj transkrypcję, popraw ją ręcznie i przetłumacz na angielski.")

    with gr.Row():
        file_input = gr.File(label="Prześlij plik audio lub wideo (MOV, MP4, WAV, MP3)")
        transcribe_button = gr.Button("Wykonaj transkrypcję")

    transcription_output = gr.Textbox(label="Transkrypcja tekstowa (edytowalna)", lines=10)
    translate_button = gr.Button("Przetłumacz na angielski")
    translation_output = gr.Textbox(label="Tłumaczenie na angielski", lines=10)

    # Streaming transkrypcji
    transcribe_button.click(
        transcribe_audio_stream, 
        inputs=file_input, 
        outputs=transcription_output
    )

    # Tłumaczenie tekstu po poprawkach
    translate_button.click(
        translate_text,
        inputs=transcription_output,
        outputs=translation_output
    )

app.launch()