Artificial-superintelligence commited on
Commit
aaf4dac
·
verified ·
1 Parent(s): 4016726

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -262
app.py CHANGED
@@ -6,29 +6,19 @@ from gtts import gTTS
6
  import tempfile
7
  import os
8
  import numpy as np
9
- from pydub import AudioSegment
10
- import speech_recognition as sr
11
  from datetime import timedelta
12
  import json
13
- import indic_transliteration
14
  from indic_transliteration import sanscript
15
- from indic_transliteration.sanscript import SchemeMap, SCHEMES, transliterate
16
  import azure.cognitiveservices.speech as speechsdk
 
17
 
18
  # Tamil-specific voice configurations
19
  TAMIL_VOICES = {
20
- 'Female 1': {'gender': 'female', 'age': 'adult', 'style': 'normal'},
21
- 'Female 2': {'gender': 'female', 'age': 'adult', 'style': 'formal'},
22
- 'Male 1': {'gender': 'male', 'age': 'adult', 'style': 'normal'},
23
- 'Male 2': {'gender': 'male', 'age': 'adult', 'style': 'formal'},
24
- }
25
-
26
- # Tamil-specific pronunciations and replacements
27
- TAMIL_PRONUNCIATIONS = {
28
- 'zh': 'l', # Handle special Tamil character ழ
29
- 'L': 'l', # Handle special Tamil character ள
30
- 'N': 'n', # Handle special Tamil character ண
31
- 'R': 'r', # Handle special Tamil character ற
32
  }
33
 
34
  class TamilTextProcessor:
@@ -40,315 +30,225 @@ class TamilTextProcessor:
40
  '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'}
41
  for tamil_num, eng_num in tamil_numerals.items():
42
  text = text.replace(tamil_num, eng_num)
43
-
44
- # Handle special characters and combinations
45
- text = text.replace('ஜ்ஞ', 'க்ய') # Replace complex character combinations
46
-
47
  return text
48
 
49
  @staticmethod
50
- def split_tamil_sentences(text):
51
- """Split Tamil text into natural sentence boundaries"""
52
- sentence_endings = ['।', '.', '!', '?', '॥']
53
- sentences = []
54
- current_sentence = ''
55
-
56
- for char in text:
57
- current_sentence += char
58
- if char in sentence_endings:
59
- sentences.append(current_sentence.strip())
60
- current_sentence = ''
61
-
62
- if current_sentence:
63
- sentences.append(current_sentence.strip())
64
-
65
- return sentences
66
-
67
- class TamilAudioProcessor:
68
- @staticmethod
69
- def adjust_tamil_audio(audio_segment):
70
- """Adjust audio characteristics for Tamil speech"""
71
- # Enhance clarity of Tamil consonants
72
- enhanced_audio = audio_segment.high_pass_filter(80)
73
- enhanced_audio = enhanced_audio.low_pass_filter(8000)
74
-
75
- # Adjust speed slightly for better comprehension
76
- enhanced_audio = enhanced_audio.speedup(playback_speed=0.95)
77
-
78
- return enhanced_audio
79
-
80
- @staticmethod
81
- def match_emotion(audio_segment, emotion_type):
82
- """Adjust audio based on emotional context"""
83
- if emotion_type == 'happy':
84
- return audio_segment.apply_gain(2).high_pass_filter(100)
85
- elif emotion_type == 'sad':
86
- return audio_segment.apply_gain(-1).low_pass_filter(3000)
87
- elif emotion_type == 'angry':
88
- return audio_segment.apply_gain(4).high_pass_filter(200)
89
- return audio_segment
90
 
91
- class TamilVideoDubber:
92
- def __init__(self, azure_key=None, azure_region=None):
93
- self.whisper_model = whisper.load_model("base")
 
 
 
 
94
  self.temp_files = []
95
- self.azure_key = azure_key
96
- self.azure_region = azure_region
97
-
98
  def __enter__(self):
99
  return self
100
-
101
  def __exit__(self, exc_type, exc_val, exc_tb):
102
  self.cleanup()
103
 
104
  def cleanup(self):
105
  for temp_file in self.temp_files:
106
  if os.path.exists(temp_file):
107
- os.remove(temp_file)
 
 
 
108
 
109
  def create_temp_file(self, suffix):
110
  temp_file = tempfile.mktemp(suffix=suffix)
111
  self.temp_files.append(temp_file)
112
  return temp_file
113
 
114
- def extract_audio_segments(self, video_path):
115
- """Extract audio segments with emotion detection"""
116
- video = VideoFileClip(video_path)
117
- result = self.whisper_model.transcribe(video_path)
118
-
119
- segments = []
120
- for segment in result["segments"]:
121
- # Basic emotion detection based on punctuation and keywords
122
- emotion = self.detect_emotion(segment["text"])
123
- segments.append({
124
- "text": segment["text"],
125
- "start": segment["start"],
126
- "end": segment["end"],
127
- "duration": segment["end"] - segment["start"],
128
- "emotion": emotion
129
- })
130
-
131
- return segments, video.duration
132
-
133
- def detect_emotion(self, text):
134
- """Simple emotion detection based on text analysis"""
135
- happy_words = ['happy', 'joy', 'laugh', 'smile', 'மகிழ்ச்சி']
136
- sad_words = ['sad', 'sorry', 'cry', 'வருத்தம்']
137
- angry_words = ['angry', 'hate', 'கோபம்']
138
-
139
- text_lower = text.lower()
140
- if any(word in text_lower for word in happy_words):
141
- return 'happy'
142
- elif any(word in text_lower for word in sad_words):
143
- return 'sad'
144
- elif any(word in text_lower for word in angry_words):
145
- return 'angry'
146
- return 'neutral'
147
 
148
- def translate_to_tamil(self, text):
149
- """Translate text to Tamil with context preservation"""
150
  translator = Translator(to_lang='ta')
151
- translated = translator.translate(text)
152
- return TamilTextProcessor.normalize_tamil_text(translated)
153
-
154
- def generate_tamil_audio(self, text, voice_config, emotion='neutral'):
155
- """Generate Tamil audio using Azure TTS or gTTS"""
156
- if self.azure_key and self.azure_region:
157
- return self._generate_azure_tamil_audio(text, voice_config, emotion)
158
- else:
159
- return self._generate_gtts_tamil_audio(text, emotion)
160
-
161
- def _generate_azure_tamil_audio(self, text, voice_config, emotion):
162
- """Generate Tamil audio using Azure Cognitive Services"""
163
- speech_config = speechsdk.SpeechConfig(
164
- subscription=self.azure_key, region=self.azure_region)
165
-
166
- # Configure Tamil voice
167
- speech_config.speech_synthesis_voice_name = "ta-IN-PallaviNeural"
168
-
169
- # Create speech synthesizer
170
- speech_synthesizer = speechsdk.SpeechSynthesizer(
171
- speech_config=speech_config)
172
 
173
- # Add SSML for emotion and style
174
- ssml_text = f"""
175
- <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis">
176
- <voice name="ta-IN-PallaviNeural">
177
- <prosody rate="{self._get_emotion_rate(emotion)}"
178
- pitch="{self._get_emotion_pitch(emotion)}">
179
- {text}
180
- </prosody>
181
- </voice>
182
- </speak>
183
- """
184
-
185
- result = speech_synthesizer.speak_ssml_async(ssml_text).get()
186
-
187
- if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
188
- return AudioSegment.from_wav(io.BytesIO(result.audio_data))
189
- else:
190
- raise Exception("Speech synthesis failed")
191
-
192
- def _generate_gtts_tamil_audio(self, text, emotion):
193
- """Fallback to gTTS for Tamil audio generation"""
194
- temp_path = self.create_temp_file(".mp3")
195
- tts = gTTS(text=text, lang='ta')
196
- tts.save(temp_path)
197
-
198
- audio = AudioSegment.from_mp3(temp_path)
199
- # Apply emotion-based adjustments
200
- audio = TamilAudioProcessor.match_emotion(audio, emotion)
201
- return audio
202
-
203
- @staticmethod
204
- def _get_emotion_rate(emotion):
205
- """Get speech rate based on emotion"""
206
- rates = {
207
- 'happy': '1.1',
208
- 'sad': '0.9',
209
- 'angry': '1.2',
210
- 'neutral': '1.0'
211
- }
212
- return rates.get(emotion, '1.0')
213
-
214
- @staticmethod
215
- def _get_emotion_pitch(emotion):
216
- """Get pitch adjustment based on emotion"""
217
- pitches = {
218
- 'happy': '+1st',
219
- 'sad': '-1st',
220
- 'angry': '+2st',
221
- 'neutral': '0st'
222
- }
223
- return pitches.get(emotion, '0st')
224
 
225
  def main():
226
  st.title("Tamil Movie Dubbing System")
227
- st.sidebar.header("Settings")
228
 
229
- # Video upload
230
- video_file = st.file_uploader("Upload your video", type=['mp4', 'mov', 'avi'])
231
  if not video_file:
232
  return
233
 
234
- # Voice selection
235
- selected_voice = st.selectbox("Select Tamil voice", list(TAMIL_VOICES.keys()))
236
 
237
- # Advanced settings
238
  with st.expander("Advanced Settings"):
239
- generate_subtitles = st.checkbox("Generate Tamil subtitles", value=True)
240
- adjust_audio = st.checkbox("Enhance Tamil audio clarity", value=True)
241
- emotion_detection = st.checkbox("Enable emotion detection", value=True)
242
-
243
- # Tamil font selection for subtitles
244
- tamil_fonts = ["Latha", "Vijaya", "Mukta Malar"]
245
- selected_font = st.selectbox("Select Tamil font", tamil_fonts)
246
-
247
- # Audio enhancement options
248
- if adjust_audio:
249
- clarity_level = st.slider("Audio clarity level", 1, 5, 3)
250
- bass_boost = st.slider("Bass boost", 0, 100, 50)
251
 
252
  if st.button("Start Tamil Dubbing"):
253
- with st.spinner("Processing your video..."):
254
- try:
255
- with TamilVideoDubber() as dubber:
256
  # Save uploaded video
257
  temp_video_path = dubber.create_temp_file(".mp4")
258
  with open(temp_video_path, "wb") as f:
259
  f.write(video_file.read())
260
 
261
- # Process video with progress tracking
262
  progress_bar = st.progress(0)
263
  status_text = st.empty()
264
-
265
- # Extract and analyze segments
266
- status_text.text("Analyzing video...")
267
- segments, duration = dubber.extract_audio_segments(
268
- temp_video_path)
269
  progress_bar.progress(0.25)
270
 
271
- # Translation and audio generation
 
 
 
 
 
272
  status_text.text("Generating Tamil audio...")
273
- final_audio = AudioSegment.empty()
 
 
274
 
275
- for i, segment in enumerate(segments):
276
- # Translate to Tamil
277
- tamil_text = dubber.translate_to_tamil(segment["text"])
278
-
279
- # Generate Tamil audio
280
- segment_audio = dubber.generate_tamil_audio(
281
- tamil_text,
282
- TAMIL_VOICES[selected_voice],
283
- segment["emotion"] if emotion_detection else 'neutral'
284
- )
285
-
286
- # Apply audio enhancements
287
- if adjust_audio:
288
- segment_audio = TamilAudioProcessor.adjust_tamil_audio(
289
- segment_audio)
290
-
291
- # Add to final audio
292
- if len(final_audio) < segment["start"] * 1000:
293
- silence_duration = (segment["start"] * 1000 -
294
- len(final_audio))
295
- final_audio += AudioSegment.silent(
296
- duration=silence_duration)
297
-
298
- final_audio += segment_audio
299
-
300
- # Update progress
301
- progress_bar.progress(0.25 + (0.5 * (i + 1) /
302
- len(segments)))
303
 
304
- # Generate final video with subtitles
305
  status_text.text("Creating final video...")
306
  output_path = dubber.create_temp_file(".mp4")
307
 
308
- video = VideoFileClip(temp_video_path)
309
- video = video.set_audio(AudioFileClip(final_audio))
310
-
311
  if generate_subtitles:
312
- # Add Tamil subtitles
313
- subtitle_clips = []
314
- for segment in segments:
315
- tamil_text = dubber.translate_to_tamil(segment["text"])
316
- subtitle_clip = TextClip(
317
- tamil_text,
318
- fontsize=24,
319
- font=selected_font,
320
- color='white',
321
  stroke_color='black',
322
  stroke_width=1
323
  )
324
- subtitle_clip = subtitle_clip.set_position(
325
- ('center', 'bottom')
326
- ).set_duration(
327
- segment["end"] - segment["start"]
328
- ).set_start(segment["start"])
329
- subtitle_clips.append(subtitle_clip)
330
-
331
- video = CompositeVideoClip([video] + subtitle_clips)
 
 
 
 
332
 
333
  # Write final video
334
- video.write_videofile(output_path, codec='libx264',
335
- audio_codec='aac')
 
 
 
 
336
  progress_bar.progress(1.0)
337
 
338
  # Display result
339
- st.success("Tamil dubbing completed!")
340
  st.video(output_path)
341
-
342
- # Provide download button
343
  with open(output_path, "rb") as f:
344
  st.download_button(
345
- "Download Tamil Dubbed Video",
346
  f,
347
- file_name="tamil_dubbed_video.mp4"
 
348
  )
349
 
350
- except Exception as e:
351
- st.error(f"An error occurred: {str(e)}")
352
 
353
  if __name__ == "__main__":
354
  main()
 
6
  import tempfile
7
  import os
8
  import numpy as np
 
 
9
  from datetime import timedelta
10
  import json
 
11
  from indic_transliteration import sanscript
12
+ from indic_transliteration.sanscript import transliterate
13
  import azure.cognitiveservices.speech as speechsdk
14
+ import ffmpeg
15
 
16
  # Tamil-specific voice configurations
17
  TAMIL_VOICES = {
18
+ 'Female 1': {'name': 'ta-IN-PallaviNeural', 'style': 'normal'},
19
+ 'Female 2': {'name': 'ta-IN-PallaviNeural', 'style': 'formal'},
20
+ 'Male 1': {'name': 'ta-IN-ValluvarNeural', 'style': 'normal'},
21
+ 'Male 2': {'name': 'ta-IN-ValluvarNeural', 'style': 'formal'}
 
 
 
 
 
 
 
 
22
  }
23
 
24
  class TamilTextProcessor:
 
30
  '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'}
31
  for tamil_num, eng_num in tamil_numerals.items():
32
  text = text.replace(tamil_num, eng_num)
 
 
 
 
33
  return text
34
 
35
  @staticmethod
36
+ def process_for_tts(text):
37
+ """Process Tamil text for TTS"""
38
+ # Remove any unsupported characters
39
+ text = ''.join(char for char in text if ord(char) < 65535)
40
+ # Normalize whitespace
41
+ text = ' '.join(text.split())
42
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ class TamilDubber:
45
+ def __init__(self):
46
+ try:
47
+ self.whisper_model = whisper.load_model("base")
48
+ except Exception as e:
49
+ st.error(f"Error loading Whisper model: {e}")
50
+ raise
51
  self.temp_files = []
52
+
 
 
53
  def __enter__(self):
54
  return self
55
+
56
  def __exit__(self, exc_type, exc_val, exc_tb):
57
  self.cleanup()
58
 
59
  def cleanup(self):
60
  for temp_file in self.temp_files:
61
  if os.path.exists(temp_file):
62
+ try:
63
+ os.remove(temp_file)
64
+ except Exception:
65
+ pass
66
 
67
  def create_temp_file(self, suffix):
68
  temp_file = tempfile.mktemp(suffix=suffix)
69
  self.temp_files.append(temp_file)
70
  return temp_file
71
 
72
+ def extract_audio(self, video_path):
73
+ """Extract audio and transcribe using Whisper"""
74
+ try:
75
+ video = VideoFileClip(video_path)
76
+ audio_path = self.create_temp_file(".wav")
77
+ video.audio.write_audiofile(audio_path)
78
+
79
+ # Transcribe using Whisper
80
+ result = self.whisper_model.transcribe(audio_path)
81
+ return result["segments"], video.duration
82
+
83
+ except Exception as e:
84
+ st.error(f"Error in audio extraction: {e}")
85
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ def translate_segments(self, segments):
88
+ """Translate segments to Tamil"""
89
  translator = Translator(to_lang='ta')
90
+ translated_segments = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ for segment in segments:
93
+ try:
94
+ translated_text = translator.translate(segment["text"])
95
+ translated_text = TamilTextProcessor.normalize_tamil_text(translated_text)
96
+ translated_text = TamilTextProcessor.process_for_tts(translated_text)
97
+
98
+ translated_segments.append({
99
+ "text": translated_text,
100
+ "start": segment["start"],
101
+ "end": segment["end"],
102
+ "duration": segment["end"] - segment["start"]
103
+ })
104
+ except Exception as e:
105
+ st.warning(f"Translation warning for segment: {str(e)}")
106
+ # Keep original text if translation fails
107
+ translated_segments.append({
108
+ "text": segment["text"],
109
+ "start": segment["start"],
110
+ "end": segment["end"],
111
+ "duration": segment["end"] - segment["start"]
112
+ })
113
+
114
+ return translated_segments
115
+
116
+ def generate_audio(self, text, voice_style="normal"):
117
+ """Generate Tamil audio using gTTS"""
118
+ try:
119
+ temp_path = self.create_temp_file(".mp3")
120
+ tts = gTTS(text=text, lang='ta', slow=False)
121
+ tts.save(temp_path)
122
+ return temp_path
123
+ except Exception as e:
124
+ st.error(f"Error in audio generation: {e}")
125
+ raise
126
+
127
+ def create_subtitles(self, segments, output_path):
128
+ """Generate SRT subtitles"""
129
+ try:
130
+ with open(output_path, 'w', encoding='utf-8') as f:
131
+ for idx, segment in enumerate(segments, 1):
132
+ start_time = str(timedelta(seconds=int(segment["start"])))
133
+ end_time = str(timedelta(seconds=int(segment["end"])))
134
+ f.write(f"{idx}\n")
135
+ f.write(f"{start_time} --> {end_time}\n")
136
+ f.write(f"{segment['text']}\n\n")
137
+ except Exception as e:
138
+ st.error(f"Error creating subtitles: {e}")
139
+ raise
 
 
 
140
 
141
  def main():
142
  st.title("Tamil Movie Dubbing System")
143
+ st.sidebar.header("டப்பிங் அமைப்புகள்") # Dubbing Settings in Tamil
144
 
145
+ # File uploader
146
+ video_file = st.file_uploader("Upload Video File", type=['mp4', 'mov', 'avi'])
147
  if not video_file:
148
  return
149
 
150
+ # Settings
151
+ voice_type = st.selectbox("Select Voice", list(TAMIL_VOICES.keys()))
152
 
 
153
  with st.expander("Advanced Settings"):
154
+ generate_subtitles = st.checkbox("Generate Tamil Subtitles", value=True)
155
+ subtitle_size = st.slider("Subtitle Size", 16, 32, 24)
156
+ subtitle_color = st.color_picker("Subtitle Color", "#FFFFFF")
 
 
 
 
 
 
 
 
 
157
 
158
  if st.button("Start Tamil Dubbing"):
159
+ try:
160
+ with st.spinner("Processing video..."):
161
+ with TamilDubber() as dubber:
162
  # Save uploaded video
163
  temp_video_path = dubber.create_temp_file(".mp4")
164
  with open(temp_video_path, "wb") as f:
165
  f.write(video_file.read())
166
 
167
+ # Progress tracking
168
  progress_bar = st.progress(0)
169
  status_text = st.empty()
170
+
171
+ # Extract audio and transcribe
172
+ status_text.text("Extracting audio and transcribing...")
173
+ segments, video_duration = dubber.extract_audio(temp_video_path)
 
174
  progress_bar.progress(0.25)
175
 
176
+ # Translate segments
177
+ status_text.text("Translating to Tamil...")
178
+ translated_segments = dubber.translate_segments(segments)
179
+ progress_bar.progress(0.50)
180
+
181
+ # Generate Tamil audio
182
  status_text.text("Generating Tamil audio...")
183
+ output_segments = []
184
+ video = VideoFileClip(temp_video_path)
185
+ final_audio_path = dubber.create_temp_file(".mp3")
186
 
187
+ for idx, segment in enumerate(translated_segments):
188
+ audio_path = dubber.generate_audio(segment["text"])
189
+ output_segments.append({
190
+ "audio": audio_path,
191
+ "start": segment["start"],
192
+ "end": segment["end"]
193
+ })
194
+ progress_bar.progress(0.50 + (0.25 * (idx + 1) / len(translated_segments)))
195
+
196
+ # Generate subtitles if requested
197
+ if generate_subtitles:
198
+ subtitle_path = dubber.create_temp_file(".srt")
199
+ dubber.create_subtitles(translated_segments, subtitle_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ # Create final video
202
  status_text.text("Creating final video...")
203
  output_path = dubber.create_temp_file(".mp4")
204
 
205
+ # Add subtitles if enabled
 
 
206
  if generate_subtitles:
207
+ def create_subtitle_clip(txt):
208
+ return TextClip(
209
+ txt=txt,
210
+ fontsize=subtitle_size,
211
+ color=subtitle_color,
 
 
 
 
212
  stroke_color='black',
213
  stroke_width=1
214
  )
215
+
216
+ subtitle_clips = []
217
+ for segment in translated_segments:
218
+ clip = create_subtitle_clip(segment["text"])
219
+ clip = clip.set_position(('center', 'bottom'))
220
+ clip = clip.set_start(segment["start"])
221
+ clip = clip.set_duration(segment["duration"])
222
+ subtitle_clips.append(clip)
223
+
224
+ final_video = CompositeVideoClip([video] + subtitle_clips)
225
+ else:
226
+ final_video = video
227
 
228
  # Write final video
229
+ final_video.write_videofile(
230
+ output_path,
231
+ codec='libx264',
232
+ audio_codec='aac',
233
+ fps=video.fps
234
+ )
235
  progress_bar.progress(1.0)
236
 
237
  # Display result
238
+ st.success("டப்பிங் வெற்றிகரமாக முடிந்தது!") # Dubbing completed successfully in Tamil
239
  st.video(output_path)
240
+
241
+ # Download button
242
  with open(output_path, "rb") as f:
243
  st.download_button(
244
+ "Download Dubbed Video",
245
  f,
246
+ file_name="tamil_dubbed_video.mp4",
247
+ mime="video/mp4"
248
  )
249
 
250
+ except Exception as e:
251
+ st.error(f"An error occurred: {str(e)}")
252
 
253
  if __name__ == "__main__":
254
  main()