|
import streamlit as st |
|
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips |
|
import whisper |
|
from transformers import MBartForConditionalGeneration, MBartTokenizer |
|
from gtts import gTTS |
|
import torch |
|
import tempfile |
|
import os |
|
import numpy as np |
|
from pydub import AudioSegment |
|
import librosa |
|
import warnings |
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
@st.cache_resource |
|
def load_models(): |
|
whisper_model = whisper.load_model("large") |
|
tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") |
|
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") |
|
return whisper_model, tokenizer, model |
|
|
|
|
|
TAMIL_CONFIG = { |
|
'code': 'ta', |
|
'whisper_code': 'tamil', |
|
'mbart_code': 'ta_IN', |
|
'gtts_code': 'ta', |
|
'voice_speed': 1.1, |
|
'sample_rate': 22050 |
|
} |
|
|
|
|
|
st.set_page_config(page_title="Tamil Video Dubbing AI", page_icon="π₯", layout="wide") |
|
|
|
def create_custom_style(): |
|
st.markdown(""" |
|
<style> |
|
.stApp { |
|
background-color: #f5f5f5; |
|
} |
|
.main { |
|
padding: 2rem; |
|
} |
|
.stButton>button { |
|
background-color: #FF4B4B; |
|
color: white; |
|
font-weight: bold; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
create_custom_style() |
|
|
|
def translate_text(text, tokenizer, model): |
|
"""Enhanced translation specifically for Tamil using MBart""" |
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
|
translated_tokens = model.generate( |
|
**inputs, |
|
forced_bos_token_id=tokenizer.lang_code_to_id["ta_IN"], |
|
num_beams=5, |
|
length_penalty=1.0, |
|
max_length=512, |
|
min_length=0, |
|
do_sample=True, |
|
temperature=0.7 |
|
) |
|
return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] |
|
|
|
def process_audio_for_sync(audio_path, target_speed=1.0): |
|
"""Process audio for better synchronization""" |
|
audio = AudioSegment.from_file(audio_path) |
|
|
|
|
|
if target_speed != 1.0: |
|
sound_with_altered_frame_rate = audio._spawn(audio.raw_data, overrides={ |
|
"frame_rate": int(audio.frame_rate * target_speed) |
|
}) |
|
audio = sound_with_altered_frame_rate.set_frame_rate(audio.frame_rate) |
|
|
|
return audio |
|
|
|
def main(): |
|
st.title("π₯ Tamil Video Dubbing AI") |
|
st.markdown("### Advanced Video Translation and Dubbing System") |
|
|
|
|
|
try: |
|
with st.spinner("Loading AI models..."): |
|
whisper_model, tokenizer, translation_model = load_models() |
|
st.success("Models loaded successfully! π") |
|
except Exception as e: |
|
st.error(f"Error loading models: {e}") |
|
return |
|
|
|
|
|
video_file = st.file_uploader("Upload your video file", type=["mp4", "mov", "avi"]) |
|
|
|
if video_file: |
|
|
|
st.video(video_file) |
|
|
|
|
|
with st.expander("Advanced Settings"): |
|
voice_speed = st.slider("Voice Speed", 0.5, 1.5, TAMIL_CONFIG['voice_speed'], 0.1) |
|
quality_level = st.select_slider( |
|
"Translation Quality", |
|
options=["Draft", "Standard", "High Quality"], |
|
value="Standard" |
|
) |
|
|
|
if st.button("Start Tamil Dubbing", key="start_dubbing"): |
|
try: |
|
with st.spinner("Processing your video..."): |
|
|
|
temp_video_path = tempfile.mktemp(suffix='.mp4') |
|
with open(temp_video_path, 'wb') as f: |
|
f.write(video_file.read()) |
|
|
|
|
|
progress_bar = st.progress(0) |
|
status_text = st.empty() |
|
|
|
|
|
status_text.text("Extracting audio...") |
|
video = VideoFileClip(temp_video_path) |
|
audio_path = tempfile.mktemp(suffix=".wav") |
|
video.audio.write_audiofile(audio_path, fps=TAMIL_CONFIG['sample_rate']) |
|
progress_bar.progress(20) |
|
|
|
|
|
status_text.text("Transcribing audio...") |
|
result = whisper_model.transcribe(audio_path, language=TAMIL_CONFIG['whisper_code']) |
|
original_text = result["text"] |
|
progress_bar.progress(40) |
|
|
|
|
|
status_text.text("Translating to Tamil...") |
|
translated_text = translate_text(original_text, tokenizer, translation_model) |
|
progress_bar.progress(60) |
|
|
|
|
|
status_text.text("Generating Tamil speech...") |
|
tts = gTTS(text=translated_text, lang=TAMIL_CONFIG['gtts_code']) |
|
translated_audio_path = tempfile.mktemp(suffix=".mp3") |
|
tts.save(translated_audio_path) |
|
progress_bar.progress(80) |
|
|
|
|
|
status_text.text("Creating final video...") |
|
dubbed_audio = process_audio_for_sync(translated_audio_path, voice_speed) |
|
final_audio_path = tempfile.mktemp(suffix=".wav") |
|
dubbed_audio.export(final_audio_path, format="wav") |
|
|
|
|
|
final_video_path = tempfile.mktemp(suffix=".mp4") |
|
final_audio = AudioFileClip(final_audio_path) |
|
final_video = video.set_audio(final_audio) |
|
final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac') |
|
progress_bar.progress(100) |
|
|
|
|
|
st.success("Video dubbed successfully! π") |
|
st.video(final_video_path) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
with open(final_video_path, "rb") as f: |
|
st.download_button( |
|
"Download Dubbed Video", |
|
f, |
|
file_name="tamil_dubbed_video.mp4", |
|
mime="video/mp4" |
|
) |
|
|
|
with col2: |
|
st.download_button( |
|
"Download Tamil Script", |
|
translated_text, |
|
file_name="tamil_script.txt", |
|
mime="text/plain" |
|
) |
|
|
|
|
|
for path in [temp_video_path, audio_path, translated_audio_path, |
|
final_audio_path, final_video_path]: |
|
if os.path.exists(path): |
|
os.remove(path) |
|
|
|
except Exception as e: |
|
st.error(f"An error occurred: {e}") |
|
st.info("Please try again with a different video or check your internet connection.") |
|
|
|
if __name__ == "__main__": |
|
main() |