Artificial-superintelligence commited on
Commit
4b6f416
·
verified ·
1 Parent(s): dc36981

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +345 -117
app.py CHANGED
@@ -1,126 +1,354 @@
1
  import streamlit as st
2
- from moviepy.editor import VideoFileClip, AudioFileClip
3
  import whisper
4
  from translate import Translator
5
  from gtts import gTTS
6
  import tempfile
7
  import os
8
  import numpy as np
9
- import time
10
-
11
- # Initialize Whisper model
12
- try:
13
- whisper_model = whisper.load_model("base")
14
- except Exception as e:
15
- st.error(f"Error loading Whisper model: {e}")
16
-
17
- # Language options
18
- LANGUAGES = {
19
- 'English': 'en',
20
- 'Tamil': 'ta',
21
- 'Sinhala': 'si',
22
- 'French': 'fr', # Add more languages as needed
 
 
 
 
 
 
 
 
 
23
  }
24
 
25
- st.title("AI Video Translator with Whisper and GTTS")
26
-
27
- # Step 1: Upload video file
28
- video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])
29
-
30
- if video_file:
31
- # Step 2: Select translation language
32
- target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))
33
-
34
- # Process when user clicks translate
35
- if st.button("Translate Video"):
36
- # Save video to a temporary file
37
- with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
38
- temp_video.write(video_file.read())
39
- temp_video_path = temp_video.name
40
-
41
- # Extract audio from video
42
- try:
43
- video = VideoFileClip(temp_video_path)
44
- audio_path = tempfile.mktemp(suffix=".wav")
45
- video.audio.write_audiofile(audio_path)
46
- except Exception as e:
47
- st.error(f"Error extracting audio from video: {e}")
48
- os.remove(temp_video_path)
49
- st.stop()
50
-
51
- # Function to transcribe audio in chunks
52
- def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
53
- audio_clip = whisper.load_audio(audio_path)
54
- audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE # Calculate duration in seconds
55
- segments = []
56
-
57
- for start in np.arange(0, audio_duration, chunk_length):
58
- end = min(start + chunk_length, audio_duration)
59
- segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
60
- result = model.transcribe(segment)
61
- segments.append(result['text'])
62
-
63
- return ' '.join(segments)
64
-
65
- # Function to translate text in chunks
66
- def translate_in_chunks(text, translator, max_length=500):
67
- words = text.split()
68
- chunks = []
69
- current_chunk = ""
70
-
71
- for word in words:
72
- if len(current_chunk) + len(word) + 1 <= max_length:
73
- current_chunk += " " + word if current_chunk else word
74
- else:
75
- chunks.append(current_chunk)
76
- current_chunk = word
77
-
78
- if current_chunk:
79
- chunks.append(current_chunk)
80
-
81
- translated_chunks = [translator.translate(chunk) for chunk in chunks]
82
- return ' '.join(translated_chunks)
83
-
84
- # Transcribe audio using Whisper
85
- try:
86
- original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
87
- st.write("Original Transcription:", original_text)
88
-
89
- # Translate text to the target language
90
- translator = Translator(to_lang=LANGUAGES[target_language])
91
- translated_text = translate_in_chunks(original_text, translator)
92
- st.write(f"Translated Text ({target_language}):", translated_text)
93
-
94
- # Convert translated text to speech
95
- tts = gTTS(text=translated_text, lang=LANGUAGES[target_language])
96
- translated_audio_path = tempfile.mktemp(suffix=".mp3")
97
- tts.save(translated_audio_path)
98
-
99
- # Merge translated audio with the original video
100
- final_video_path = tempfile.mktemp(suffix=".mp4")
101
- original_video = VideoFileClip(temp_video_path)
102
- translated_audio = AudioFileClip(translated_audio_path)
103
-
104
- final_video = original_video.set_audio(translated_audio)
105
- final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')
106
-
107
- # Display success message and provide download link
108
- st.success("Translation successful! Download your translated video below:")
109
- st.video(final_video_path)
110
-
111
- # Provide download link
112
- with open(final_video_path, "rb") as f:
113
- st.download_button("Download Translated Video", f, file_name="translated_video.mp4")
114
-
115
- except Exception as e:
116
- st.error(f"Error during transcription/translation: {e}")
117
- translated_audio_path = None # Ensure this variable is defined
118
- final_video_path = None # Ensure this variable is defined
119
-
120
- # Clean up temporary files
121
- os.remove(temp_video_path)
122
- os.remove(audio_path)
123
- if translated_audio_path: # Only remove if it was created
124
- os.remove(translated_audio_path)
125
- if final_video_path: # Only remove if it was created
126
- os.remove(final_video_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip
3
  import whisper
4
  from translate import Translator
5
  from gtts import gTTS
6
  import tempfile
7
  import os
8
  import numpy as np
9
+ from pydub import AudioSegment
10
+ import speech_recognition as sr
11
+ from datetime import timedelta
12
+ import json
13
+ import indic_transliteration
14
+ from indic_transliteration import sanscript
15
+ from indic_transliteration.sanscript import SchemeMap, SCHEMES, transliterate
16
+ import azure.cognitiveservices.speech as speechsdk
17
+
18
+ # Tamil-specific voice configurations
19
+ TAMIL_VOICES = {
20
+ 'Female 1': {'gender': 'female', 'age': 'adult', 'style': 'normal'},
21
+ 'Female 2': {'gender': 'female', 'age': 'adult', 'style': 'formal'},
22
+ 'Male 1': {'gender': 'male', 'age': 'adult', 'style': 'normal'},
23
+ 'Male 2': {'gender': 'male', 'age': 'adult', 'style': 'formal'},
24
+ }
25
+
26
+ # Tamil-specific pronunciations and replacements
27
+ TAMIL_PRONUNCIATIONS = {
28
+ 'zh': 'l', # Handle special Tamil character ழ
29
+ 'L': 'l', # Handle special Tamil character ள
30
+ 'N': 'n', # Handle special Tamil character ண
31
+ 'R': 'r', # Handle special Tamil character ற
32
  }
33
 
34
+ class TamilTextProcessor:
35
+ @staticmethod
36
+ def normalize_tamil_text(text):
37
+ """Normalize Tamil text for better pronunciation"""
38
+ # Convert Tamil numerals to English numerals
39
+ tamil_numerals = {'௦': '0', '௧': '1', '௨': '2', '௩': '3', '௪': '4',
40
+ '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'}
41
+ for tamil_num, eng_num in tamil_numerals.items():
42
+ text = text.replace(tamil_num, eng_num)
43
+
44
+ # Handle special characters and combinations
45
+ text = text.replace('ஜ்ஞ', 'க்ய') # Replace complex character combinations
46
+
47
+ return text
48
+
49
+ @staticmethod
50
+ def split_tamil_sentences(text):
51
+ """Split Tamil text into natural sentence boundaries"""
52
+ sentence_endings = ['।', '.', '!', '?', '॥']
53
+ sentences = []
54
+ current_sentence = ''
55
+
56
+ for char in text:
57
+ current_sentence += char
58
+ if char in sentence_endings:
59
+ sentences.append(current_sentence.strip())
60
+ current_sentence = ''
61
+
62
+ if current_sentence:
63
+ sentences.append(current_sentence.strip())
64
+
65
+ return sentences
66
+
67
+ class TamilAudioProcessor:
68
+ @staticmethod
69
+ def adjust_tamil_audio(audio_segment):
70
+ """Adjust audio characteristics for Tamil speech"""
71
+ # Enhance clarity of Tamil consonants
72
+ enhanced_audio = audio_segment.high_pass_filter(80)
73
+ enhanced_audio = enhanced_audio.low_pass_filter(8000)
74
+
75
+ # Adjust speed slightly for better comprehension
76
+ enhanced_audio = enhanced_audio.speedup(playback_speed=0.95)
77
+
78
+ return enhanced_audio
79
+
80
+ @staticmethod
81
+ def match_emotion(audio_segment, emotion_type):
82
+ """Adjust audio based on emotional context"""
83
+ if emotion_type == 'happy':
84
+ return audio_segment.apply_gain(2).high_pass_filter(100)
85
+ elif emotion_type == 'sad':
86
+ return audio_segment.apply_gain(-1).low_pass_filter(3000)
87
+ elif emotion_type == 'angry':
88
+ return audio_segment.apply_gain(4).high_pass_filter(200)
89
+ return audio_segment
90
+
91
+ class TamilVideoDubber:
92
+ def __init__(self, azure_key=None, azure_region=None):
93
+ self.whisper_model = whisper.load_model("base")
94
+ self.temp_files = []
95
+ self.azure_key = azure_key
96
+ self.azure_region = azure_region
97
+
98
+ def __enter__(self):
99
+ return self
100
+
101
+ def __exit__(self, exc_type, exc_val, exc_tb):
102
+ self.cleanup()
103
+
104
+ def cleanup(self):
105
+ for temp_file in self.temp_files:
106
+ if os.path.exists(temp_file):
107
+ os.remove(temp_file)
108
+
109
+ def create_temp_file(self, suffix):
110
+ temp_file = tempfile.mktemp(suffix=suffix)
111
+ self.temp_files.append(temp_file)
112
+ return temp_file
113
+
114
+ def extract_audio_segments(self, video_path):
115
+ """Extract audio segments with emotion detection"""
116
+ video = VideoFileClip(video_path)
117
+ result = self.whisper_model.transcribe(video_path)
118
+
119
+ segments = []
120
+ for segment in result["segments"]:
121
+ # Basic emotion detection based on punctuation and keywords
122
+ emotion = self.detect_emotion(segment["text"])
123
+ segments.append({
124
+ "text": segment["text"],
125
+ "start": segment["start"],
126
+ "end": segment["end"],
127
+ "duration": segment["end"] - segment["start"],
128
+ "emotion": emotion
129
+ })
130
+
131
+ return segments, video.duration
132
+
133
+ def detect_emotion(self, text):
134
+ """Simple emotion detection based on text analysis"""
135
+ happy_words = ['happy', 'joy', 'laugh', 'smile', 'மகிழ்ச்சி']
136
+ sad_words = ['sad', 'sorry', 'cry', 'வருத்தம்']
137
+ angry_words = ['angry', 'hate', 'கோபம்']
138
+
139
+ text_lower = text.lower()
140
+ if any(word in text_lower for word in happy_words):
141
+ return 'happy'
142
+ elif any(word in text_lower for word in sad_words):
143
+ return 'sad'
144
+ elif any(word in text_lower for word in angry_words):
145
+ return 'angry'
146
+ return 'neutral'
147
+
148
+ def translate_to_tamil(self, text):
149
+ """Translate text to Tamil with context preservation"""
150
+ translator = Translator(to_lang='ta')
151
+ translated = translator.translate(text)
152
+ return TamilTextProcessor.normalize_tamil_text(translated)
153
+
154
+ def generate_tamil_audio(self, text, voice_config, emotion='neutral'):
155
+ """Generate Tamil audio using Azure TTS or gTTS"""
156
+ if self.azure_key and self.azure_region:
157
+ return self._generate_azure_tamil_audio(text, voice_config, emotion)
158
+ else:
159
+ return self._generate_gtts_tamil_audio(text, emotion)
160
+
161
+ def _generate_azure_tamil_audio(self, text, voice_config, emotion):
162
+ """Generate Tamil audio using Azure Cognitive Services"""
163
+ speech_config = speechsdk.SpeechConfig(
164
+ subscription=self.azure_key, region=self.azure_region)
165
+
166
+ # Configure Tamil voice
167
+ speech_config.speech_synthesis_voice_name = "ta-IN-PallaviNeural"
168
+
169
+ # Create speech synthesizer
170
+ speech_synthesizer = speechsdk.SpeechSynthesizer(
171
+ speech_config=speech_config)
172
+
173
+ # Add SSML for emotion and style
174
+ ssml_text = f"""
175
+ <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis">
176
+ <voice name="ta-IN-PallaviNeural">
177
+ <prosody rate="{self._get_emotion_rate(emotion)}"
178
+ pitch="{self._get_emotion_pitch(emotion)}">
179
+ {text}
180
+ </prosody>
181
+ </voice>
182
+ </speak>
183
+ """
184
+
185
+ result = speech_synthesizer.speak_ssml_async(ssml_text).get()
186
+
187
+ if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
188
+ return AudioSegment.from_wav(io.BytesIO(result.audio_data))
189
+ else:
190
+ raise Exception("Speech synthesis failed")
191
+
192
+ def _generate_gtts_tamil_audio(self, text, emotion):
193
+ """Fallback to gTTS for Tamil audio generation"""
194
+ temp_path = self.create_temp_file(".mp3")
195
+ tts = gTTS(text=text, lang='ta')
196
+ tts.save(temp_path)
197
+
198
+ audio = AudioSegment.from_mp3(temp_path)
199
+ # Apply emotion-based adjustments
200
+ audio = TamilAudioProcessor.match_emotion(audio, emotion)
201
+ return audio
202
+
203
+ @staticmethod
204
+ def _get_emotion_rate(emotion):
205
+ """Get speech rate based on emotion"""
206
+ rates = {
207
+ 'happy': '1.1',
208
+ 'sad': '0.9',
209
+ 'angry': '1.2',
210
+ 'neutral': '1.0'
211
+ }
212
+ return rates.get(emotion, '1.0')
213
+
214
+ @staticmethod
215
+ def _get_emotion_pitch(emotion):
216
+ """Get pitch adjustment based on emotion"""
217
+ pitches = {
218
+ 'happy': '+1st',
219
+ 'sad': '-1st',
220
+ 'angry': '+2st',
221
+ 'neutral': '0st'
222
+ }
223
+ return pitches.get(emotion, '0st')
224
+
225
+ def main():
226
+ st.title("Tamil Movie Dubbing System")
227
+ st.sidebar.header("Settings")
228
+
229
+ # Video upload
230
+ video_file = st.file_uploader("Upload your video", type=['mp4', 'mov', 'avi'])
231
+ if not video_file:
232
+ return
233
+
234
+ # Voice selection
235
+ selected_voice = st.selectbox("Select Tamil voice", list(TAMIL_VOICES.keys()))
236
+
237
+ # Advanced settings
238
+ with st.expander("Advanced Settings"):
239
+ generate_subtitles = st.checkbox("Generate Tamil subtitles", value=True)
240
+ adjust_audio = st.checkbox("Enhance Tamil audio clarity", value=True)
241
+ emotion_detection = st.checkbox("Enable emotion detection", value=True)
242
+
243
+ # Tamil font selection for subtitles
244
+ tamil_fonts = ["Latha", "Vijaya", "Mukta Malar"]
245
+ selected_font = st.selectbox("Select Tamil font", tamil_fonts)
246
+
247
+ # Audio enhancement options
248
+ if adjust_audio:
249
+ clarity_level = st.slider("Audio clarity level", 1, 5, 3)
250
+ bass_boost = st.slider("Bass boost", 0, 100, 50)
251
+
252
+ if st.button("Start Tamil Dubbing"):
253
+ with st.spinner("Processing your video..."):
254
+ try:
255
+ with TamilVideoDubber() as dubber:
256
+ # Save uploaded video
257
+ temp_video_path = dubber.create_temp_file(".mp4")
258
+ with open(temp_video_path, "wb") as f:
259
+ f.write(video_file.read())
260
+
261
+ # Process video with progress tracking
262
+ progress_bar = st.progress(0)
263
+ status_text = st.empty()
264
+
265
+ # Extract and analyze segments
266
+ status_text.text("Analyzing video...")
267
+ segments, duration = dubber.extract_audio_segments(
268
+ temp_video_path)
269
+ progress_bar.progress(0.25)
270
+
271
+ # Translation and audio generation
272
+ status_text.text("Generating Tamil audio...")
273
+ final_audio = AudioSegment.empty()
274
+
275
+ for i, segment in enumerate(segments):
276
+ # Translate to Tamil
277
+ tamil_text = dubber.translate_to_tamil(segment["text"])
278
+
279
+ # Generate Tamil audio
280
+ segment_audio = dubber.generate_tamil_audio(
281
+ tamil_text,
282
+ TAMIL_VOICES[selected_voice],
283
+ segment["emotion"] if emotion_detection else 'neutral'
284
+ )
285
+
286
+ # Apply audio enhancements
287
+ if adjust_audio:
288
+ segment_audio = TamilAudioProcessor.adjust_tamil_audio(
289
+ segment_audio)
290
+
291
+ # Add to final audio
292
+ if len(final_audio) < segment["start"] * 1000:
293
+ silence_duration = (segment["start"] * 1000 -
294
+ len(final_audio))
295
+ final_audio += AudioSegment.silent(
296
+ duration=silence_duration)
297
+
298
+ final_audio += segment_audio
299
+
300
+ # Update progress
301
+ progress_bar.progress(0.25 + (0.5 * (i + 1) /
302
+ len(segments)))
303
+
304
+ # Generate final video with subtitles
305
+ status_text.text("Creating final video...")
306
+ output_path = dubber.create_temp_file(".mp4")
307
+
308
+ video = VideoFileClip(temp_video_path)
309
+ video = video.set_audio(AudioFileClip(final_audio))
310
+
311
+ if generate_subtitles:
312
+ # Add Tamil subtitles
313
+ subtitle_clips = []
314
+ for segment in segments:
315
+ tamil_text = dubber.translate_to_tamil(segment["text"])
316
+ subtitle_clip = TextClip(
317
+ tamil_text,
318
+ fontsize=24,
319
+ font=selected_font,
320
+ color='white',
321
+ stroke_color='black',
322
+ stroke_width=1
323
+ )
324
+ subtitle_clip = subtitle_clip.set_position(
325
+ ('center', 'bottom')
326
+ ).set_duration(
327
+ segment["end"] - segment["start"]
328
+ ).set_start(segment["start"])
329
+ subtitle_clips.append(subtitle_clip)
330
+
331
+ video = CompositeVideoClip([video] + subtitle_clips)
332
+
333
+ # Write final video
334
+ video.write_videofile(output_path, codec='libx264',
335
+ audio_codec='aac')
336
+ progress_bar.progress(1.0)
337
+
338
+ # Display result
339
+ st.success("Tamil dubbing completed!")
340
+ st.video(output_path)
341
+
342
+ # Provide download button
343
+ with open(output_path, "rb") as f:
344
+ st.download_button(
345
+ "Download Tamil Dubbed Video",
346
+ f,
347
+ file_name="tamil_dubbed_video.mp4"
348
+ )
349
+
350
+ except Exception as e:
351
+ st.error(f"An error occurred: {str(e)}")
352
+
353
+ if __name__ == "__main__":
354
+ main()