Spaces:
Build error
Build error
File size: 2,987 Bytes
c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 c38543b 054bba0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import streamlit as st
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, MarianMTModel, MarianTokenizer, Wav2Vec2CTCTokenizer
import soundfile as sf
import tempfile
import numpy as np
# Load models and tokenizers
@st.cache_resource
def load_models():
try:
# Load Wav2Vec2 for ASR (Multilingual model for Urdu support)
# Load the tokenizer directly using Wav2Vec2CTCTokenizer
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-large-xlsr-53")
# Then, initialize the processor with the tokenizer
asr_processor = Wav2Vec2Processor(feature_extractor=asr_processor.feature_extractor, tokenizer=tokenizer)
asr_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")
# Load MarianMT for translation (Urdu to German)
translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ur-de")
translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ur-de")
return asr_processor, asr_model, translation_tokenizer, translation_model
except Exception as e:
st.error(f"Error loading models: {e}")
return None, None, None, None
# Initialize models
asr_processor, asr_model, translation_tokenizer, translation_model = load_models()
# ... (rest of your app.py code remains the same)
# Streamlit app interface
st.title("Real-Time Urdu to German Voice Translator")
st.markdown("Upload an Urdu audio file in `.wav` format, and the app will transcribe and translate it.")
# File uploader
uploaded_file = st.file_uploader("Upload your Urdu audio file (16kHz .wav)", type=["wav"])
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
try:
# Load and validate audio file
audio_input, sample_rate = sf.read(temp_file_path)
if sample_rate != 16000:
st.error("Audio file must have a sampling rate of 16kHz.")
else:
st.info("Processing the audio...")
# Step 1: Speech-to-Text (ASR)
input_values = asr_processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
with torch.no_grad():
logits = asr_model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = asr_processor.batch_decode(predicted_ids)[0]
st.text(f"Transcribed Urdu Text: {transcription}")
# Step 2: Translate Text (Urdu to German)
translated = translation_model.generate(**translation_tokenizer(transcription, return_tensors="pt", padding=True))
german_translation = translation_tokenizer.decode(translated[0], skip_special_tokens=True)
st.success(f"Translated German Text: {german_translation}")
except Exception as e:
st.error(f"An error occurred: {e}") |