Spaces:

Artificial-superintelligence
/

Aita

Running

App Files Files Community

Artificial-superintelligence commited on Nov 12, 2024

Commit

aaf4dac

verified ·

1 Parent(s): 4016726

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -262

app.py CHANGED Viewed

@@ -6,29 +6,19 @@ from gtts import gTTS
 import tempfile
 import os
 import numpy as np
-from pydub import AudioSegment
-import speech_recognition as sr
 from datetime import timedelta
 import json
-import indic_transliteration
 from indic_transliteration import sanscript
-from indic_transliteration.sanscript import SchemeMap, SCHEMES, transliterate
 import azure.cognitiveservices.speech as speechsdk
 # Tamil-specific voice configurations
 TAMIL_VOICES = {
-    'Female 1': {'gender': 'female', 'age': 'adult', 'style': 'normal'},
-    'Female 2': {'gender': 'female', 'age': 'adult', 'style': 'formal'},
-    'Male 1': {'gender': 'male', 'age': 'adult', 'style': 'normal'},
-    'Male 2': {'gender': 'male', 'age': 'adult', 'style': 'formal'},
-}
-# Tamil-specific pronunciations and replacements
-TAMIL_PRONUNCIATIONS = {
-    'zh': 'l',  # Handle special Tamil character ழ
-    'L': 'l',   # Handle special Tamil character ள
-    'N': 'n',   # Handle special Tamil character ண
-    'R': 'r',   # Handle special Tamil character ற
 }
 class TamilTextProcessor:
@@ -40,315 +30,225 @@ class TamilTextProcessor:
                          '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'}
         for tamil_num, eng_num in tamil_numerals.items():
             text = text.replace(tamil_num, eng_num)
-        # Handle special characters and combinations
-        text = text.replace('ஜ்ஞ', 'க்ய')  # Replace complex character combinations
         return text
     @staticmethod
-    def split_tamil_sentences(text):
-        """Split Tamil text into natural sentence boundaries"""
-        sentence_endings = ['।', '.', '!', '?', '॥']
-        sentences = []
-        current_sentence = ''
-        for char in text:
-            current_sentence += char
-            if char in sentence_endings:
-                sentences.append(current_sentence.strip())
-                current_sentence = ''
-        if current_sentence:
-            sentences.append(current_sentence.strip())
-        return sentences
-class TamilAudioProcessor:
-    @staticmethod
-    def adjust_tamil_audio(audio_segment):
-        """Adjust audio characteristics for Tamil speech"""
-        # Enhance clarity of Tamil consonants
-        enhanced_audio = audio_segment.high_pass_filter(80)
-        enhanced_audio = enhanced_audio.low_pass_filter(8000)
-        # Adjust speed slightly for better comprehension
-        enhanced_audio = enhanced_audio.speedup(playback_speed=0.95)
-        return enhanced_audio
-    @staticmethod
-    def match_emotion(audio_segment, emotion_type):
-        """Adjust audio based on emotional context"""
-        if emotion_type == 'happy':
-            return audio_segment.apply_gain(2).high_pass_filter(100)
-        elif emotion_type == 'sad':
-            return audio_segment.apply_gain(-1).low_pass_filter(3000)
-        elif emotion_type == 'angry':
-            return audio_segment.apply_gain(4).high_pass_filter(200)
-        return audio_segment
-class TamilVideoDubber:
-    def __init__(self, azure_key=None, azure_region=None):
-        self.whisper_model = whisper.load_model("base")
         self.temp_files = []
-        self.azure_key = azure_key
-        self.azure_region = azure_region
     def __enter__(self):
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.cleanup()
     def cleanup(self):
         for temp_file in self.temp_files:
             if os.path.exists(temp_file):
-                os.remove(temp_file)
     def create_temp_file(self, suffix):
         temp_file = tempfile.mktemp(suffix=suffix)
         self.temp_files.append(temp_file)
         return temp_file
-    def extract_audio_segments(self, video_path):
-        """Extract audio segments with emotion detection"""
-        video = VideoFileClip(video_path)
-        result = self.whisper_model.transcribe(video_path)
-        segments = []
-        for segment in result["segments"]:
-            # Basic emotion detection based on punctuation and keywords
-            emotion = self.detect_emotion(segment["text"])
-            segments.append({
-                "text": segment["text"],
-                "start": segment["start"],
-                "end": segment["end"],
-                "duration": segment["end"] - segment["start"],
-                "emotion": emotion
-            })
-        return segments, video.duration
-    def detect_emotion(self, text):
-        """Simple emotion detection based on text analysis"""
-        happy_words = ['happy', 'joy', 'laugh', 'smile', 'மகிழ்ச்சி']
-        sad_words = ['sad', 'sorry', 'cry', 'வருத்தம்']
-        angry_words = ['angry', 'hate', 'கோபம்']
-        text_lower = text.lower()
-        if any(word in text_lower for word in happy_words):
-            return 'happy'
-        elif any(word in text_lower for word in sad_words):
-            return 'sad'
-        elif any(word in text_lower for word in angry_words):
-            return 'angry'
-        return 'neutral'
-    def translate_to_tamil(self, text):
-        """Translate text to Tamil with context preservation"""
         translator = Translator(to_lang='ta')
-        translated = translator.translate(text)
-        return TamilTextProcessor.normalize_tamil_text(translated)
-    def generate_tamil_audio(self, text, voice_config, emotion='neutral'):
-        """Generate Tamil audio using Azure TTS or gTTS"""
-        if self.azure_key and self.azure_region:
-            return self._generate_azure_tamil_audio(text, voice_config, emotion)
-        else:
-            return self._generate_gtts_tamil_audio(text, emotion)
-    def _generate_azure_tamil_audio(self, text, voice_config, emotion):
-        """Generate Tamil audio using Azure Cognitive Services"""
-        speech_config = speechsdk.SpeechConfig(
-            subscription=self.azure_key, region=self.azure_region)
-        # Configure Tamil voice
-        speech_config.speech_synthesis_voice_name = "ta-IN-PallaviNeural"
-        # Create speech synthesizer
-        speech_synthesizer = speechsdk.SpeechSynthesizer(
-            speech_config=speech_config)
-        # Add SSML for emotion and style
-        ssml_text = f"""
-        <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis">
-            <voice name="ta-IN-PallaviNeural">
-                <prosody rate="{self._get_emotion_rate(emotion)}"
-                         pitch="{self._get_emotion_pitch(emotion)}">
-                    {text}
-                </prosody>
-            </voice>
-        </speak>
-        """
-        result = speech_synthesizer.speak_ssml_async(ssml_text).get()
-        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
-            return AudioSegment.from_wav(io.BytesIO(result.audio_data))
-        else:
-            raise Exception("Speech synthesis failed")
-    def _generate_gtts_tamil_audio(self, text, emotion):
-        """Fallback to gTTS for Tamil audio generation"""
-        temp_path = self.create_temp_file(".mp3")
-        tts = gTTS(text=text, lang='ta')
-        tts.save(temp_path)
-        audio = AudioSegment.from_mp3(temp_path)
-        # Apply emotion-based adjustments
-        audio = TamilAudioProcessor.match_emotion(audio, emotion)
-        return audio
-    @staticmethod
-    def _get_emotion_rate(emotion):
-        """Get speech rate based on emotion"""
-        rates = {
-            'happy': '1.1',
-            'sad': '0.9',
-            'angry': '1.2',
-            'neutral': '1.0'
-        }
-        return rates.get(emotion, '1.0')
-    @staticmethod
-    def _get_emotion_pitch(emotion):
-        """Get pitch adjustment based on emotion"""
-        pitches = {
-            'happy': '+1st',
-            'sad': '-1st',
-            'angry': '+2st',
-            'neutral': '0st'
-        }
-        return pitches.get(emotion, '0st')
 def main():
     st.title("Tamil Movie Dubbing System")
-    st.sidebar.header("Settings")
-    # Video upload
-    video_file = st.file_uploader("Upload your video", type=['mp4', 'mov', 'avi'])
     if not video_file:
         return
-    # Voice selection
-    selected_voice = st.selectbox("Select Tamil voice", list(TAMIL_VOICES.keys()))
-    # Advanced settings
     with st.expander("Advanced Settings"):
-        generate_subtitles = st.checkbox("Generate Tamil subtitles", value=True)
-        adjust_audio = st.checkbox("Enhance Tamil audio clarity", value=True)
-        emotion_detection = st.checkbox("Enable emotion detection", value=True)
-        # Tamil font selection for subtitles
-        tamil_fonts = ["Latha", "Vijaya", "Mukta Malar"]
-        selected_font = st.selectbox("Select Tamil font", tamil_fonts)
-        # Audio enhancement options
-        if adjust_audio:
-            clarity_level = st.slider("Audio clarity level", 1, 5, 3)
-            bass_boost = st.slider("Bass boost", 0, 100, 50)
     if st.button("Start Tamil Dubbing"):
-        with st.spinner("Processing your video..."):
-            try:
-                with TamilVideoDubber() as dubber:
                     # Save uploaded video
                     temp_video_path = dubber.create_temp_file(".mp4")
                     with open(temp_video_path, "wb") as f:
                         f.write(video_file.read())
-                    # Process video with progress tracking
                     progress_bar = st.progress(0)
                     status_text = st.empty()
-                    # Extract and analyze segments
-                    status_text.text("Analyzing video...")
-                    segments, duration = dubber.extract_audio_segments(
-                        temp_video_path)
                     progress_bar.progress(0.25)
-                    # Translation and audio generation
                     status_text.text("Generating Tamil audio...")
-                    final_audio = AudioSegment.empty()
-                    for i, segment in enumerate(segments):
-                        # Translate to Tamil
-                        tamil_text = dubber.translate_to_tamil(segment["text"])
-                        # Generate Tamil audio
-                        segment_audio = dubber.generate_tamil_audio(
-                            tamil_text,
-                            TAMIL_VOICES[selected_voice],
-                            segment["emotion"] if emotion_detection else 'neutral'
-                        )
-                        # Apply audio enhancements
-                        if adjust_audio:
-                            segment_audio = TamilAudioProcessor.adjust_tamil_audio(
-                                segment_audio)
-                        # Add to final audio
-                        if len(final_audio) < segment["start"] * 1000:
-                            silence_duration = (segment["start"] * 1000 -
-                                len(final_audio))
-                            final_audio += AudioSegment.silent(
-                                duration=silence_duration)
-                        final_audio += segment_audio
-                        # Update progress
-                        progress_bar.progress(0.25 + (0.5 * (i + 1) /
-                            len(segments)))
-                    # Generate final video with subtitles
                     status_text.text("Creating final video...")
                     output_path = dubber.create_temp_file(".mp4")
-                    video = VideoFileClip(temp_video_path)
-                    video = video.set_audio(AudioFileClip(final_audio))
                     if generate_subtitles:
-                        # Add Tamil subtitles
-                        subtitle_clips = []
-                        for segment in segments:
-                            tamil_text = dubber.translate_to_tamil(segment["text"])
-                            subtitle_clip = TextClip(
-                                tamil_text,
-                                fontsize=24,
-                                font=selected_font,
-                                color='white',
                                 stroke_color='black',
                                 stroke_width=1
                             )
-                            subtitle_clip = subtitle_clip.set_position(
-                                ('center', 'bottom')
-                            ).set_duration(
-                                segment["end"] - segment["start"]
-                            ).set_start(segment["start"])
-                            subtitle_clips.append(subtitle_clip)
-                        video = CompositeVideoClip([video] + subtitle_clips)
                     # Write final video
-                    video.write_videofile(output_path, codec='libx264',
-                        audio_codec='aac')
                     progress_bar.progress(1.0)
                     # Display result
-                    st.success("Tamil dubbing completed!")
                     st.video(output_path)
-                    # Provide download button
                     with open(output_path, "rb") as f:
                         st.download_button(
-                            "Download Tamil Dubbed Video",
                             f,
-                            file_name="tamil_dubbed_video.mp4"
                         )
-            except Exception as e:
-                st.error(f"An error occurred: {str(e)}")
 if __name__ == "__main__":
     main()

 import tempfile
 import os
 import numpy as np
 from datetime import timedelta
 import json
 from indic_transliteration import sanscript
+from indic_transliteration.sanscript import transliterate
 import azure.cognitiveservices.speech as speechsdk
+import ffmpeg
 # Tamil-specific voice configurations
 TAMIL_VOICES = {
+    'Female 1': {'name': 'ta-IN-PallaviNeural', 'style': 'normal'},
+    'Female 2': {'name': 'ta-IN-PallaviNeural', 'style': 'formal'},
+    'Male 1': {'name': 'ta-IN-ValluvarNeural', 'style': 'normal'},
+    'Male 2': {'name': 'ta-IN-ValluvarNeural', 'style': 'formal'}
 }
 class TamilTextProcessor:
                          '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'}
         for tamil_num, eng_num in tamil_numerals.items():
             text = text.replace(tamil_num, eng_num)
         return text
     @staticmethod
+    def process_for_tts(text):
+        """Process Tamil text for TTS"""
+        # Remove any unsupported characters
+        text = ''.join(char for char in text if ord(char) < 65535)
+        # Normalize whitespace
+        text = ' '.join(text.split())
+        return text
+class TamilDubber:
+    def __init__(self):
+        try:
+            self.whisper_model = whisper.load_model("base")
+        except Exception as e:
+            st.error(f"Error loading Whisper model: {e}")
+            raise
         self.temp_files = []
     def __enter__(self):
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.cleanup()
     def cleanup(self):
         for temp_file in self.temp_files:
             if os.path.exists(temp_file):
+                try:
+                    os.remove(temp_file)
+                except Exception:
+                    pass
     def create_temp_file(self, suffix):
         temp_file = tempfile.mktemp(suffix=suffix)
         self.temp_files.append(temp_file)
         return temp_file
+    def extract_audio(self, video_path):
+        """Extract audio and transcribe using Whisper"""
+        try:
+            video = VideoFileClip(video_path)
+            audio_path = self.create_temp_file(".wav")
+            video.audio.write_audiofile(audio_path)
+            # Transcribe using Whisper
+            result = self.whisper_model.transcribe(audio_path)
+            return result["segments"], video.duration
+        except Exception as e:
+            st.error(f"Error in audio extraction: {e}")
+            raise
+    def translate_segments(self, segments):
+        """Translate segments to Tamil"""
         translator = Translator(to_lang='ta')
+        translated_segments = []
+        for segment in segments:
+            try:
+                translated_text = translator.translate(segment["text"])
+                translated_text = TamilTextProcessor.normalize_tamil_text(translated_text)
+                translated_text = TamilTextProcessor.process_for_tts(translated_text)
+                translated_segments.append({
+                    "text": translated_text,
+                    "start": segment["start"],
+                    "end": segment["end"],
+                    "duration": segment["end"] - segment["start"]
+                })
+            except Exception as e:
+                st.warning(f"Translation warning for segment: {str(e)}")
+                # Keep original text if translation fails
+                translated_segments.append({
+                    "text": segment["text"],
+                    "start": segment["start"],
+                    "end": segment["end"],
+                    "duration": segment["end"] - segment["start"]
+                })
+        return translated_segments
+    def generate_audio(self, text, voice_style="normal"):
+        """Generate Tamil audio using gTTS"""
+        try:
+            temp_path = self.create_temp_file(".mp3")
+            tts = gTTS(text=text, lang='ta', slow=False)
+            tts.save(temp_path)
+            return temp_path
+        except Exception as e:
+            st.error(f"Error in audio generation: {e}")
+            raise
+    def create_subtitles(self, segments, output_path):
+        """Generate SRT subtitles"""
+        try:
+            with open(output_path, 'w', encoding='utf-8') as f:
+                for idx, segment in enumerate(segments, 1):
+                    start_time = str(timedelta(seconds=int(segment["start"])))
+                    end_time = str(timedelta(seconds=int(segment["end"])))
+                    f.write(f"{idx}\n")
+                    f.write(f"{start_time} --> {end_time}\n")
+                    f.write(f"{segment['text']}\n\n")
+        except Exception as e:
+            st.error(f"Error creating subtitles: {e}")
+            raise
 def main():
     st.title("Tamil Movie Dubbing System")
+    st.sidebar.header("டப்பிங் அமைப்புகள்")  # Dubbing Settings in Tamil
+    # File uploader
+    video_file = st.file_uploader("Upload Video File", type=['mp4', 'mov', 'avi'])
     if not video_file:
         return
+    # Settings
+    voice_type = st.selectbox("Select Voice", list(TAMIL_VOICES.keys()))
     with st.expander("Advanced Settings"):
+        generate_subtitles = st.checkbox("Generate Tamil Subtitles", value=True)
+        subtitle_size = st.slider("Subtitle Size", 16, 32, 24)
+        subtitle_color = st.color_picker("Subtitle Color", "#FFFFFF")
     if st.button("Start Tamil Dubbing"):
+        try:
+            with st.spinner("Processing video..."):
+                with TamilDubber() as dubber:
                     # Save uploaded video
                     temp_video_path = dubber.create_temp_file(".mp4")
                     with open(temp_video_path, "wb") as f:
                         f.write(video_file.read())
+                    # Progress tracking
                     progress_bar = st.progress(0)
                     status_text = st.empty()
+                    # Extract audio and transcribe
+                    status_text.text("Extracting audio and transcribing...")
+                    segments, video_duration = dubber.extract_audio(temp_video_path)
                     progress_bar.progress(0.25)
+                    # Translate segments
+                    status_text.text("Translating to Tamil...")
+                    translated_segments = dubber.translate_segments(segments)
+                    progress_bar.progress(0.50)
+                    # Generate Tamil audio
                     status_text.text("Generating Tamil audio...")
+                    output_segments = []
+                    video = VideoFileClip(temp_video_path)
+                    final_audio_path = dubber.create_temp_file(".mp3")
+                    for idx, segment in enumerate(translated_segments):
+                        audio_path = dubber.generate_audio(segment["text"])
+                        output_segments.append({
+                            "audio": audio_path,
+                            "start": segment["start"],
+                            "end": segment["end"]
+                        })
+                        progress_bar.progress(0.50 + (0.25 * (idx + 1) / len(translated_segments)))
+                    # Generate subtitles if requested
+                    if generate_subtitles:
+                        subtitle_path = dubber.create_temp_file(".srt")
+                        dubber.create_subtitles(translated_segments, subtitle_path)
+                    # Create final video
                     status_text.text("Creating final video...")
                     output_path = dubber.create_temp_file(".mp4")
+                    # Add subtitles if enabled
                     if generate_subtitles:
+                        def create_subtitle_clip(txt):
+                            return TextClip(
+                                txt=txt,
+                                fontsize=subtitle_size,
+                                color=subtitle_color,
                                 stroke_color='black',
                                 stroke_width=1
                             )
+                        subtitle_clips = []
+                        for segment in translated_segments:
+                            clip = create_subtitle_clip(segment["text"])
+                            clip = clip.set_position(('center', 'bottom'))
+                            clip = clip.set_start(segment["start"])
+                            clip = clip.set_duration(segment["duration"])
+                            subtitle_clips.append(clip)
+                        final_video = CompositeVideoClip([video] + subtitle_clips)
+                    else:
+                        final_video = video
                     # Write final video
+                    final_video.write_videofile(
+                        output_path,
+                        codec='libx264',
+                        audio_codec='aac',
+                        fps=video.fps
+                    )
                     progress_bar.progress(1.0)
                     # Display result
+                    st.success("டப்பிங் வெற்றிகரமாக முடிந்தது!")  # Dubbing completed successfully in Tamil
                     st.video(output_path)
+                    # Download button
                     with open(output_path, "rb") as f:
                         st.download_button(
+                            "Download Dubbed Video",
                             f,
+                            file_name="tamil_dubbed_video.mp4",
+                            mime="video/mp4"
                         )
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
 if __name__ == "__main__":
     main()