File size: 2,987 Bytes
c38543b
 
054bba0
c38543b
 
054bba0
c38543b
 
 
 
054bba0
 
 
 
 
 
 
 
 
 
 
c38543b
054bba0
c38543b
054bba0
 
 
c38543b
054bba0
 
c38543b
 
054bba0
 
 
c38543b
054bba0
c38543b
054bba0
 
c38543b
 
 
 
 
 
054bba0
 
 
 
 
 
 
c38543b
054bba0
 
 
 
 
 
c38543b
054bba0
c38543b
054bba0
 
 
c38543b
054bba0
c38543b
054bba0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import streamlit as st
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, MarianMTModel, MarianTokenizer, Wav2Vec2CTCTokenizer
import soundfile as sf
import tempfile
import numpy as np

# Load models and tokenizers
@st.cache_resource
def load_models():
    try:
        # Load Wav2Vec2 for ASR (Multilingual model for Urdu support)
        # Load the tokenizer directly using Wav2Vec2CTCTokenizer
        tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-large-xlsr-53")
        # Then, initialize the processor with the tokenizer
        asr_processor = Wav2Vec2Processor(feature_extractor=asr_processor.feature_extractor, tokenizer=tokenizer) 
        asr_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")

        # Load MarianMT for translation (Urdu to German)
        translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ur-de")
        translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ur-de")

        return asr_processor, asr_model, translation_tokenizer, translation_model

    except Exception as e:
        st.error(f"Error loading models: {e}")
        return None, None, None, None


# Initialize models
asr_processor, asr_model, translation_tokenizer, translation_model = load_models()

# ... (rest of your app.py code remains the same)

# Streamlit app interface
st.title("Real-Time Urdu to German Voice Translator")
st.markdown("Upload an Urdu audio file in `.wav` format, and the app will transcribe and translate it.")

# File uploader
uploaded_file = st.file_uploader("Upload your Urdu audio file (16kHz .wav)", type=["wav"])

if uploaded_file is not None:
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(uploaded_file.read())
        temp_file_path = temp_file.name

    try:
        # Load and validate audio file
        audio_input, sample_rate = sf.read(temp_file_path)
        if sample_rate != 16000:
            st.error("Audio file must have a sampling rate of 16kHz.")
        else:
            st.info("Processing the audio...")

            # Step 1: Speech-to-Text (ASR)
            input_values = asr_processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
            with torch.no_grad():
                logits = asr_model(input_values).logits
                predicted_ids = torch.argmax(logits, dim=-1)
                transcription = asr_processor.batch_decode(predicted_ids)[0]

            st.text(f"Transcribed Urdu Text: {transcription}")

            # Step 2: Translate Text (Urdu to German)
            translated = translation_model.generate(**translation_tokenizer(transcription, return_tensors="pt", padding=True))
            german_translation = translation_tokenizer.decode(translated[0], skip_special_tokens=True)

            st.success(f"Translated German Text: {german_translation}")

    except Exception as e:
        st.error(f"An error occurred: {e}")