Spaces:

adnaniqbal001
/

Translation_app

Build error

App Files Files Community

adnaniqbal001 commited on Dec 21, 2024

Commit

054bba0

verified ·

1 Parent(s): 92f1a16

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -31

app.py CHANGED Viewed

@@ -1,57 +1,71 @@
-# app.py
 import streamlit as st
 import torch
-from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, MarianMTModel, MarianTokenizer
 import soundfile as sf
 import tempfile
 # Load models and tokenizers
 @st.cache_resource
 def load_models():
-    # Load ASR model (Wav2Vec2 for Urdu)
-    asr_processor = Wav2Vec2Processor.from_pretrained("m3hrdadfi/wav2vec2-large-xlsr-ur")
-    asr_model = Wav2Vec2ForCTC.from_pretrained("m3hrdadfi/wav2vec2-large-xlsr-ur")
-    # Load translation model (Urdu to German)
-    translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ur-de")
-    translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ur-de")
-    return asr_processor, asr_model, translation_tokenizer, translation_model
 asr_processor, asr_model, translation_tokenizer, translation_model = load_models()
-# Streamlit App UI
 st.title("Real-Time Urdu to German Voice Translator")
-st.markdown("Upload an Urdu audio file, and the app will translate it to German.")
-uploaded_file = st.file_uploader("Upload an audio file (in .wav format)", type=["wav"])
 if uploaded_file is not None:
     with tempfile.NamedTemporaryFile(delete=False) as temp_file:
         temp_file.write(uploaded_file.read())
         temp_file_path = temp_file.name
-    # Load audio file
-    audio_input, sample_rate = sf.read(temp_file_path)
-    # Ensure proper sampling rate
-    if sample_rate != 16000:
-        st.error("Please upload a .wav file with a sampling rate of 16kHz.")
-    else:
-        st.info("Processing the audio...")
-        # Convert speech to text (ASR)
-        input_values = asr_processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
-        with torch.no_grad():
-            logits = asr_model(input_values).logits
-            predicted_ids = torch.argmax(logits, dim=-1)
-            transcription = asr_processor.batch_decode(predicted_ids)[0]
-        st.text(f"Transcribed Urdu Text: {transcription}")
-        # Translate Urdu text to German
-        translated = translation_model.generate(**translation_tokenizer(transcription, return_tensors="pt", padding=True))
-        german_translation = translation_tokenizer.decode(translated[0], skip_special_tokens=True)
-        st.success(f"Translated German Text: {german_translation}")

 import streamlit as st
 import torch
+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, MarianMTModel, MarianTokenizer, Wav2Vec2CTCTokenizer
 import soundfile as sf
 import tempfile
+import numpy as np
 # Load models and tokenizers
 @st.cache_resource
 def load_models():
+    try:
+        # Load Wav2Vec2 for ASR (Multilingual model for Urdu support)
+        # Load the tokenizer directly using Wav2Vec2CTCTokenizer
+        tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-large-xlsr-53")
+        # Then, initialize the processor with the tokenizer
+        asr_processor = Wav2Vec2Processor(feature_extractor=asr_processor.feature_extractor, tokenizer=tokenizer)
+        asr_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")
+        # Load MarianMT for translation (Urdu to German)
+        translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ur-de")
+        translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ur-de")
+        return asr_processor, asr_model, translation_tokenizer, translation_model
+    except Exception as e:
+        st.error(f"Error loading models: {e}")
+        return None, None, None, None
+# Initialize models
 asr_processor, asr_model, translation_tokenizer, translation_model = load_models()
+# ... (rest of your app.py code remains the same)
+# Streamlit app interface
 st.title("Real-Time Urdu to German Voice Translator")
+st.markdown("Upload an Urdu audio file in `.wav` format, and the app will transcribe and translate it.")
+# File uploader
+uploaded_file = st.file_uploader("Upload your Urdu audio file (16kHz .wav)", type=["wav"])
 if uploaded_file is not None:
     with tempfile.NamedTemporaryFile(delete=False) as temp_file:
         temp_file.write(uploaded_file.read())
         temp_file_path = temp_file.name
+    try:
+        # Load and validate audio file
+        audio_input, sample_rate = sf.read(temp_file_path)
+        if sample_rate != 16000:
+            st.error("Audio file must have a sampling rate of 16kHz.")
+        else:
+            st.info("Processing the audio...")
+            # Step 1: Speech-to-Text (ASR)
+            input_values = asr_processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
+            with torch.no_grad():
+                logits = asr_model(input_values).logits
+                predicted_ids = torch.argmax(logits, dim=-1)
+                transcription = asr_processor.batch_decode(predicted_ids)[0]
+            st.text(f"Transcribed Urdu Text: {transcription}")
+            # Step 2: Translate Text (Urdu to German)
+            translated = translation_model.generate(**translation_tokenizer(transcription, return_tensors="pt", padding=True))
+            german_translation = translation_tokenizer.decode(translated[0], skip_special_tokens=True)
+            st.success(f"Translated German Text: {german_translation}")
+    except Exception as e:
+        st.error(f"An error occurred: {e}")