Spaces:

tomekstor9
/

openai-whisper-large-v3-turbo

Sleeping

App Files Files Community

tomekstor9 commited on Dec 17, 2024

Commit

da62944

verified ·

1 Parent(s): 933bac2

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -24

app.py CHANGED Viewed

@@ -9,42 +9,53 @@ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-sma
 # Załaduj model do tłumaczenia na angielski
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")
-# Funkcja konwersji pliku do mniejszego audio WAV (16 kHz, mono)
-def preprocess_and_reduce_audio(input_path):
     try:
-        # Wczytaj plik za pomocą Pydub
         audio = AudioSegment.from_file(input_path)
-        # Zmniejsz jakość dźwięku: 16 kHz, mono, mniejsza głośność (opcjonalnie)
-        audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
-        # Zapisz zmniejszoną ścieżkę audio
-        output_path = "reduced_audio.wav"
-        audio.export(output_path, format="wav", bitrate="64k")  # Zmniejszona jakość
-        return output_path
     except Exception as e:
-        print(f"Błąd konwersji audio: {e}")
         return None
-# Funkcja transkrypcji i tłumaczenia
 def transcribe_and_translate(file):
     try:
-        # Konwersja pliku wejściowego na zoptymalizowaną ścieżkę audio
-        reduced_audio = preprocess_and_reduce_audio(file.name)
         if not reduced_audio:
-            return "Nie udało się przetworzyć pliku.", ""
-        # Transkrypcja audio
-        result = transcriber(reduced_audio)
-        transcription = result['text']
-        # Tłumaczenie na angielski
-        translation = translator(transcription)[0]['translation_text']
-        # Usuń tymczasowy plik po zakończeniu
-        os.remove(reduced_audio)
-        return transcription, translation
     except Exception as e:
         return f"Błąd: {e}", ""
@@ -57,7 +68,7 @@ iface = gr.Interface(
         gr.Textbox(label="Tłumaczenie na angielski")
     ],
     title="Whisper Small - Transkrypcja i Tłumaczenie",
-    description="Aplikacja konwertuje pliki MOV/MP4 do mniejszej ścieżki audio i wykonuje transkrypcję oraz tłumaczenie."
 )
 iface.launch()

 # Załaduj model do tłumaczenia na angielski
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")
+# Funkcja zmniejszenia jakości audio i konwersji do WAV
+def reduce_audio_quality(input_path):
     try:
         audio = AudioSegment.from_file(input_path)
+        # Redukcja jakości: 16 kHz, mono, 64 kbps
+        reduced_audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
+        reduced_path = "reduced_audio.wav"
+        reduced_audio.export(reduced_path, format="wav", bitrate="64k")
+        return reduced_path
     except Exception as e:
+        print(f"Błąd podczas zmniejszania jakości pliku: {e}")
         return None
+# Funkcja podziału audio na segmenty 30-sekundowe
+def split_audio_to_segments(input_path, segment_length=30):
+    audio = AudioSegment.from_file(input_path)
+    segments = []
+    for i in range(0, len(audio), segment_length * 1000):  # segment_length w milisekundach
+        segment = audio[i:i + segment_length * 1000]
+        segment_path = f"segment_{i // 1000}.wav"
+        segment.export(segment_path, format="wav")
+        segments.append(segment_path)
+    return segments
+# Funkcja przetwarzania pliku: zmniejszenie rozmiaru, dzielenie, transkrypcja, tłumaczenie
 def transcribe_and_translate(file):
     try:
+        # Zmniejszenie jakości audio
+        reduced_audio = reduce_audio_quality(file.name)
         if not reduced_audio:
+            return "Nie udało się zmniejszyć rozmiaru pliku.", ""
+        # Podziel plik na 30-sekundowe segmenty
+        segments = split_audio_to_segments(reduced_audio)
+        full_transcription = ""
+        # Przetwarzanie każdego segmentu
+        for segment in segments:
+            result = transcriber(segment)
+            full_transcription += result['text'] + " "
+            os.remove(segment)  # Usuń segment po przetworzeniu
+        os.remove(reduced_audio)  # Usuń zmniejszony plik
+        translation = translator(full_transcription)[0]['translation_text']
+        return full_transcription.strip(), translation.strip()
     except Exception as e:
         return f"Błąd: {e}", ""
         gr.Textbox(label="Tłumaczenie na angielski")
     ],
     title="Whisper Small - Transkrypcja i Tłumaczenie",
+    description="Aplikacja konwertuje pliki MOV/MP4 do mniejszej jakości audio, dzieli je na segmenty 30-sekundowe i wykonuje transkrypcję oraz tłumaczenie."
 )
 iface.launch()