Spaces:

tomekstor9
/

openai-whisper-large-v3-turbo

Sleeping

App Files Files Community

tomekstor9 commited on Dec 17, 2024

Commit

aa713df

verified ·

1 Parent(s): 2e3e70e

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -26

app.py CHANGED Viewed

@@ -9,40 +9,38 @@ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-sma
 # Załaduj model do tłumaczenia na angielski
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")
-# Funkcja konwersji plików MOV, MP4 i innych formatów do audio 16 kHz
-def convert_audio(audio):
-    try:
-        extension = os.path.splitext(audio)[1].lower()
-        if extension in [".mov", ".mp4", ".m4a"]:
-            sound = AudioSegment.from_file(audio, format="mov" if extension == ".mov" else "mp4")
-        else:
-            sound = AudioSegment.from_file(audio)
-        # Konwersja do WAV 16 kHz mono
-        sound = sound.set_frame_rate(16000).set_channels(1)
-        temp_file = "converted.wav"
-        sound.export(temp_file, format="wav")
-        return temp_file
-    except Exception as e:
-        print(f"Błąd konwersji: {e}")
-        return None
 # Funkcja transkrypcji i tłumaczenia
 def transcribe_and_translate(audio):
     try:
-        converted_audio = convert_audio(audio.name)  # Ścieżka do pliku
-        if not converted_audio:
-            return "Nie udało się przetworzyć pliku.", ""
-        result = transcriber(converted_audio)
-        transcription = result['text']
-        translation = translator(transcription)[0]['translation_text']
-        return transcription, translation
     except Exception as e:
         return f"Błąd: {e}", ""
-# Interfejs Gradio z gr.File do przesyłania plików
 iface = gr.Interface(
     fn=transcribe_and_translate,
     inputs=gr.File(label="Prześlij plik audio lub wideo (MOV, MP4, WAV, MP3)"),
@@ -51,7 +49,7 @@ iface = gr.Interface(
         gr.Textbox(label="Tłumaczenie na angielski")
     ],
     title="Whisper Small - Transkrypcja i Tłumaczenie",
-    description="Aplikacja obsługująca pliki MOV, MP4 i audio. Konwertuje plik na WAV i przetwarza treść."
 )
 iface.launch()

 # Załaduj model do tłumaczenia na angielski
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")
+# Funkcja podziału pliku audio na segmenty 30-sekundowe
+def split_audio(audio_path, segment_length=30):
+    audio = AudioSegment.from_file(audio_path)
+    segments = []
+    for i in range(0, len(audio), segment_length * 1000):  # segment_length w milisekundach
+        segment = audio[i:i + segment_length * 1000]
+        temp_file = f"segment_{i // 1000}.wav"
+        segment.export(temp_file, format="wav")
+        segments.append(temp_file)
+    return segments
 # Funkcja transkrypcji i tłumaczenia
 def transcribe_and_translate(audio):
     try:
+        # Podziel plik na 30-sekundowe segmenty
+        segments = split_audio(audio.name)
+        full_transcription = ""
+        # Przetwarzanie każdego segmentu
+        for segment in segments:
+            result = transcriber(segment)
+            full_transcription += result['text'] + " "
+            os.remove(segment)  # Usuń segment po przetworzeniu
+        # Tłumaczenie na angielski
+        translation = translator(full_transcription)[0]['translation_text']
+        return full_transcription.strip(), translation.strip()
     except Exception as e:
         return f"Błąd: {e}", ""
+# Interfejs Gradio
 iface = gr.Interface(
     fn=transcribe_and_translate,
     inputs=gr.File(label="Prześlij plik audio lub wideo (MOV, MP4, WAV, MP3)"),
         gr.Textbox(label="Tłumaczenie na angielski")
     ],
     title="Whisper Small - Transkrypcja i Tłumaczenie",
+    description="Aplikacja obsługująca długie pliki MOV, MP4 i audio. Pliki są dzielone na segmenty 30-sekundowe."
 )
 iface.launch()