Spaces:

tomekstor9
/

openai-whisper-large-v3-turbo

Sleeping

App Files Files Community

tomekstor9 commited on Dec 17, 2024

Commit

2e3e70e

verified ·

1 Parent(s): 2ff7a4a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -19

app.py CHANGED Viewed

@@ -9,56 +9,49 @@ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-sma
 # Załaduj model do tłumaczenia na angielski
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")
-# Funkcja konwersji audio do 16 kHz, obsługuje MOV, MP4 i inne formaty
 def convert_audio(audio):
     try:
-        # Sprawdź format pliku na podstawie rozszerzenia
         extension = os.path.splitext(audio)[1].lower()
-        # Wczytaj plik w zależności od formatu
         if extension in [".mov", ".mp4", ".m4a"]:
             sound = AudioSegment.from_file(audio, format="mov" if extension == ".mov" else "mp4")
         else:
             sound = AudioSegment.from_file(audio)
-        # Konwersja do formatu WAV (16 kHz, mono)
         sound = sound.set_frame_rate(16000).set_channels(1)
         temp_file = "converted.wav"
         sound.export(temp_file, format="wav")
         return temp_file
     except Exception as e:
-        print(f"Błąd konwersji pliku: {e}")
         return None
 # Funkcja transkrypcji i tłumaczenia
 def transcribe_and_translate(audio):
     try:
-        # Konwersja audio do odpowiedniego formatu
-        converted_audio = convert_audio(audio)
         if not converted_audio:
-            return "Nie udało się przetworzyć pliku audio.", ""
-        # Transkrypcja pliku audio
         result = transcriber(converted_audio)
         transcription = result['text']
-        # Tłumaczenie na angielski
         translation = translator(transcription)[0]['translation_text']
         return transcription, translation
     except Exception as e:
         return f"Błąd: {e}", ""
-# Interfejs Gradio
 iface = gr.Interface(
     fn=transcribe_and_translate,
-    inputs=gr.Audio(sources=["upload"], type="filepath"),
     outputs=[
-        gr.Textbox(label="Transkrypcja tekstowa"),   # Pierwsze okienko - tekst oryginalny
-        gr.Textbox(label="Tłumaczenie na angielski") # Drugie okienko - tekst przetłumaczony
     ],
     title="Whisper Small - Transkrypcja i Tłumaczenie",
-    description="Aplikacja obsługująca pliki MOV, MP4 i inne formaty audio/wideo."
 )
 iface.launch()

 # Załaduj model do tłumaczenia na angielski
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")
+# Funkcja konwersji plików MOV, MP4 i innych formatów do audio 16 kHz
 def convert_audio(audio):
     try:
         extension = os.path.splitext(audio)[1].lower()
         if extension in [".mov", ".mp4", ".m4a"]:
             sound = AudioSegment.from_file(audio, format="mov" if extension == ".mov" else "mp4")
         else:
             sound = AudioSegment.from_file(audio)
+        # Konwersja do WAV 16 kHz mono
         sound = sound.set_frame_rate(16000).set_channels(1)
         temp_file = "converted.wav"
         sound.export(temp_file, format="wav")
         return temp_file
     except Exception as e:
+        print(f"Błąd konwersji: {e}")
         return None
 # Funkcja transkrypcji i tłumaczenia
 def transcribe_and_translate(audio):
     try:
+        converted_audio = convert_audio(audio.name)  # Ścieżka do pliku
         if not converted_audio:
+            return "Nie udało się przetworzyć pliku.", ""
         result = transcriber(converted_audio)
         transcription = result['text']
         translation = translator(transcription)[0]['translation_text']
         return transcription, translation
     except Exception as e:
         return f"Błąd: {e}", ""
+# Interfejs Gradio z gr.File do przesyłania plików
 iface = gr.Interface(
     fn=transcribe_and_translate,
+    inputs=gr.File(label="Prześlij plik audio lub wideo (MOV, MP4, WAV, MP3)"),
     outputs=[
+        gr.Textbox(label="Transkrypcja tekstowa"),
+        gr.Textbox(label="Tłumaczenie na angielski")
     ],
     title="Whisper Small - Transkrypcja i Tłumaczenie",
+    description="Aplikacja obsługująca pliki MOV, MP4 i audio. Konwertuje plik na WAV i przetwarza treść."
 )
 iface.launch()