Spaces:

tomekstor9
/

openai-whisper-large-v3-turbo

Sleeping

App Files Files Community

tomekstor9 commited on Dec 18, 2024

Commit

647ae53

verified ·

1 Parent(s): 69610ab

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -33

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import re
 # Załaduj mniejszy model Whisper do transkrypcji
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
 # Załaduj model do tłumaczenia na angielski
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")
@@ -42,49 +42,64 @@ def clean_text(text):
     text = text.strip()  # Usuń białe znaki na początku i końcu
     return text
-# Funkcja przetwarzania pliku z użyciem Gradio Streaming
-def transcribe_and_translate_stream(file):
     try:
-        # Zmniejszenie jakości audio
         reduced_audio = reduce_audio_quality(file.name)
         if not reduced_audio:
-            yield "Nie udało się zmniejszyć rozmiaru pliku.", ""
-            return
-        # Podziel plik na segmenty 30-sekundowe
         segments = split_audio_to_segments(reduced_audio, segment_length=30)
         full_transcription = ""
-        # Przetwarzanie każdego segmentu
         for segment in segments:
-            result = transcriber(segment, return_timestamps=True)
             full_transcription += result['text'] + " "
             os.remove(segment)  # Usuń segment po przetworzeniu
-            # Wyświetl częściowy wynik na bieżąco
-            cleaned_transcription = clean_text(full_transcription.strip())
-            yield cleaned_transcription, ""
-        os.remove(reduced_audio)  # Usuń zmniejszony plik
-        # Przetłumacz całą oczyszczoną transkrypcję
-        cleaned_transcription = clean_text(full_transcription.strip())
-        translation = translator(cleaned_transcription)[0]['translation_text']
-        yield cleaned_transcription, translation.strip()
     except Exception as e:
-        yield f"Błąd: {e}", ""
-# Interfejs Gradio ze streamingiem
-iface = gr.Interface(
-    fn=transcribe_and_translate_stream,
-    inputs=gr.File(label="Prześlij plik audio lub wideo (MOV, MP4, WAV, MP3)"),
-    outputs=[
-        gr.Textbox(label="Transkrypcja tekstowa"),
-        gr.Textbox(label="Tłumaczenie na angielski")
-    ],
-    title="Whisper Tiny - Transkrypcja i Tłumaczenie",
-    description="Aplikacja konwertuje pliki MOV/MP4 do mniejszej jakości audio, dzieli je na segmenty 60-sekundowe i wykonuje transkrypcję oraz tłumaczenie na angielski."
-)
-iface.launch()

 import re
 # Załaduj mniejszy model Whisper do transkrypcji
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small")
 # Załaduj model do tłumaczenia na angielski
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")
     text = text.strip()  # Usuń białe znaki na początku i końcu
     return text
+# Funkcja transkrypcji pliku audio
+def transcribe_audio(file):
     try:
         reduced_audio = reduce_audio_quality(file.name)
         if not reduced_audio:
+            return "Nie udało się zmniejszyć rozmiaru pliku."
         segments = split_audio_to_segments(reduced_audio, segment_length=30)
         full_transcription = ""
         for segment in segments:
+            result = transcriber(segment)
             full_transcription += result['text'] + " "
             os.remove(segment)  # Usuń segment po przetworzeniu
+        os.remove(reduced_audio)
+        return clean_text(full_transcription.strip())
     except Exception as e:
+        return f"Błąd: {e}"
+# Funkcja tłumaczenia tekstu
+def translate_text(text):
+    try:
+        translation = translator(text)[0]['translation_text']
+        return translation.strip()
+    except Exception as e:
+        return f"Błąd podczas tłumaczenia: {e}"
+# Interfejs Gradio
+with gr.Blocks() as app:
+    gr.Markdown("## Whisper Small - Transkrypcja i Tłumaczenie")
+    gr.Markdown(
+        "Aplikacja wykonuje transkrypcję plików audio/wideo za pomocą Whisper Small. "
+        "Użytkownik może poprawić wygenerowany tekst przed jego przetłumaczeniem na język angielski."
+    )
+    # Przesyłanie pliku i transkrypcja
+    with gr.Row():
+        file_input = gr.File(label="Prześlij plik audio lub wideo (MOV, MP4, WAV, MP3)")
+        transcribe_button = gr.Button("Wykonaj transkrypcję")
+    # Pole do edycji transkrypcji i przycisk tłumaczenia
+    transcription_output = gr.Textbox(label="Transkrypcja tekstowa (edytowalna)", lines=10)
+    translate_button = gr.Button("Przetłumacz na angielski")
+    translation_output = gr.Textbox(label="Tłumaczenie na angielski", lines=10)
+    # Logika transkrypcji
+    transcribe_button.click(
+        transcribe_audio,
+        inputs=file_input,
+        outputs=transcription_output
+    )
+    # Logika tłumaczenia
+    translate_button.click(
+        translate_text,
+        inputs=transcription_output,
+        outputs=translation_output
+    )
+app.launch()