Spaces:

AZLABS
/

Comic-2

Running

App Files Files Community

AZLABS commited on Nov 4, 2024

Commit

49f4a30

verified ·

1 Parent(s): bd0d6f3

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -146

app.py CHANGED Viewed

@@ -10,225 +10,221 @@ from hercai import Hercai
 import uuid
 import time
 import gradio as gr
-from typing import Tuple, List, Optional
 import numpy as np
-from concurrent.futures import ThreadPoolExecutor
-from functools import partial
-import tempfile
-import contextlib
-# Configure logging with console output and rotation
 logging.basicConfig(
     level=logging.INFO,
-    format='[%(asctime)s] [%(levelname)s] %(message)s',
     handlers=[
-        logging.FileHandler('app.log', mode='a'),
-        logging.StreamHandler(),
-        logging.handlers.RotatingFileHandler(
-            'app.log', maxBytes=1024*1024, backupCount=5
-        )
     ]
 )
 LOGGER = logging.getLogger(__name__)
-class ResourceManager:
-    """Manage temporary resources and cleanup."""
-    def __init__(self):
-        self.temp_files = set()
-    def add_temp_file(self, filepath: str) -> None:
-        self.temp_files.add(filepath)
-    def cleanup(self) -> None:
-        for file in self.temp_files:
-            try:
-                if os.path.exists(file):
-                    os.remove(file)
-            except Exception as e:
-                LOGGER.warning(f"Failed to remove temporary file {file}: {e}")
-        self.temp_files.clear()
 class Text2Video:
     def __init__(self) -> None:
         """Initialize the Text2Video class."""
         LOGGER.info("Initializing Text2Video application...")
         self.herc = Hercai()
-        self.resource_manager = ResourceManager()
-        self.max_workers = min(os.cpu_count() or 1, 4)  # Limit concurrent tasks
-        LOGGER.info("Initialization complete")
-    def _enhance_prompt(self, prompt: str) -> str:
-        """Enhance the prompt with comic-style elements."""
-        return (
-            f"{prompt}, comic book style, full scene composition, "
-            "vibrant colors, clear speech bubbles with text, "
-            "dramatic lighting, high contrast, detailed backgrounds, "
-            "comic book panel layout, professional illustration"
-        )
-    @staticmethod
-    def _create_unique_filename(prefix: str, suffix: str) -> str:
-        """Create a unique filename with given prefix and suffix."""
-        return f"{prefix}_{uuid.uuid4().hex[:8]}{suffix}"
-    async def get_image(self, img_prompt: str) -> Optional[str]:
         """Generate an image based on the provided text prompt."""
         try:
             LOGGER.info(f"🎨 Starting image generation for prompt: {img_prompt}")
-            enhanced_prompt = self._enhance_prompt(img_prompt)
-            image_result = await self.herc.draw_image(
-                model="v3",
-                prompt=enhanced_prompt,
                 negative_prompt="blurry, cropped, low quality, dark, gloomy"
             )
-            return image_result["url"]
         except Exception as e:
             LOGGER.error(f"❌ Error generating image: {str(e)}")
             raise
     def download_img_from_url(self, image_url: str, image_path: str) -> str:
-        """Download and process image from URL with improved error handling."""
         try:
-            LOGGER.info(f"📥 Downloading image from: {image_url}")
-            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-                urllib.request.urlretrieve(image_url, temp_file.name)
-                with Image.open(temp_file.name) as img:
-                    # Convert to RGB if necessary
-                    if img.mode != 'RGB':
-                        img = img.convert('RGB')
-                    # Resize with proper aspect ratio
-                    target_size = (1024, 1024)
-                    img.thumbnail(target_size, Image.Resampling.LANCZOS)
-                    # Create new image with padding
-                    new_img = Image.new('RGB', target_size, (255, 255, 255))
-                    offset = ((target_size[0] - img.size[0]) // 2,
-                             (target_size[1] - img.size[1]) // 2)
-                    new_img.paste(img, offset)
-                    # Save with optimization
-                    new_img.save(image_path, 'PNG', optimize=True)
-            self.resource_manager.add_temp_file(image_path)
             return image_path
         except Exception as e:
-            LOGGER.error(f"❌ Error processing image: {str(e)}")
             raise
-        finally:
-            if os.path.exists(temp_file.name):
-                os.unlink(temp_file.name)
-    async def process_scene(self, prompt: str, idx: int) -> Tuple[str, str]:
-        """Process a single scene (image and audio) concurrently."""
         try:
-            image_path = self._create_unique_filename(f"scene_{idx}", ".png")
-            audio_path = self._create_unique_filename(f"audio_{idx}", ".mp3")
-            # Generate image
-            image_url = await self.get_image(prompt)
-            image_path = self.download_img_from_url(image_url, image_path)
-            # Generate audio
-            audio_path = self.text_to_audio(prompt, audio_path)
-            return image_path, audio_path
         except Exception as e:
-            LOGGER.error(f"Error processing scene {idx}: {e}")
             raise
-    async def generate_video(self, text: str) -> str:
-        """Main function to generate video from text with improved concurrency."""
         try:
             LOGGER.info("🎬 Starting video generation process")
-            list_prompts = [s.strip() for s in text.split(",,") if s.strip()]
-            output_path = self._create_unique_filename("comic_video", ".mp4")
-            self.resource_manager.add_temp_file(output_path)
-            # Process scenes concurrently
-            scenes = []
-            async with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
-                for idx, prompt in enumerate(list_prompts, 1):
-                    scene = await self.process_scene(prompt, idx)
-                    scenes.append(scene)
-            # Create video
-            img_list = [scene[0] for scene in scenes]
-            audio_paths = [scene[1] for scene in scenes]
-            await self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
             return output_path
         except Exception as e:
             LOGGER.error(f"❌ Error in video generation: {str(e)}")
             raise
-        finally:
-            self.resource_manager.cleanup()
     def gradio_interface(self):
-        """Create Gradio interface with improved styling."""
         LOGGER.info("🌐 Initializing Gradio interface")
-        css = """
-        .gradio-container {
-            font-family: 'Arial', sans-serif;
-            max-width: 1200px;
-            margin: auto;
-        }
-        .header {
-            text-align: center;
-            padding: 2rem;
-            background: linear-gradient(135deg, #6e8efb, #a777e3);
-            color: white;
-            border-radius: 10px;
-            margin-bottom: 2rem;
-        }
-        """
-        with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
             gr.HTML("""
-                <div class="header">
-                    <h1>🎬 Comic Video Generator</h1>
-                    <p>Transform your story into an animated comic!</p>
-                </div>
             """)
             with gr.Row():
                 input_text = gr.Textbox(
                     label="Comic Script",
-                    placeholder="Enter your story (separate scenes with ,,)",
-                    lines=5
                 )
             with gr.Row():
-                generate_btn = gr.Button("🎬 Generate Video", variant="primary")
             with gr.Row():
                 output = gr.Video(label="Generated Comic Video")
-            example_txt = """Once upon a time in a magical forest,,
-                           A brave knight discovered a mysterious crystal,,
-                           The crystal began to glow with incredible power"""
             gr.Examples([[example_txt]], [input_text])
-            generate_btn.click(
-                fn=self.generate_video,
-                inputs=[input_text],
-                outputs=[output],
-                api_name="generate_video"
-            )
         LOGGER.info("✅ Gradio interface initialized")
-        demo.launch(debug=True, show_error=True)
 if __name__ == "__main__":
     text2video = Text2Video()

 import uuid
 import time
 import gradio as gr
+from typing import Tuple, List
 import numpy as np
+# Configure logging with console output
 logging.basicConfig(
     level=logging.INFO,
+    format='[%(asctime)s] %(message)s',
     handlers=[
+        logging.FileHandler('app.log'),
+        logging.StreamHandler()  # This will print to console
     ]
 )
 LOGGER = logging.getLogger(__name__)
 class Text2Video:
     def __init__(self) -> None:
         """Initialize the Text2Video class."""
         LOGGER.info("Initializing Text2Video application...")
         self.herc = Hercai()
+        LOGGER.info("Hercai API initialized successfully")
+    def get_image(self, img_prompt: str) -> str:
         """Generate an image based on the provided text prompt."""
         try:
             LOGGER.info(f"🎨 Starting image generation for prompt: {img_prompt}")
+            # Enhanced prompt for better comic-style results
+            comic_style_prompt = (
+                f"{img_prompt}, comic book style, full scene composition, "
+                "vibrant colors, clear speech bubbles with text, "
+                "dramatic lighting, high contrast, detailed backgrounds, "
+                "comic book panel layout, professional illustration"
+            )
+            LOGGER.info("📝 Enhanced prompt with comic style elements")
+            LOGGER.info(f"🔄 Sending request to Hercai API...")
+            image_result = self.herc.draw_image(
+                model="simurg",
+                prompt=comic_style_prompt,
                 negative_prompt="blurry, cropped, low quality, dark, gloomy"
             )
+            image_url = image_result["url"]
+            LOGGER.info(f"✅ Image generated successfully: {image_url}")
+            return image_url
         except Exception as e:
             LOGGER.error(f"❌ Error generating image: {str(e)}")
             raise
     def download_img_from_url(self, image_url: str, image_path: str) -> str:
+        """Download and process image from URL."""
         try:
+            urllib.request.urlretrieve(image_url, image_path)
+            # Image processing for consistent quality
+            img = Image.open(image_path)
+            target_size = (1792, 1024)
+            img = img.resize(target_size, Image.Resampling.LANCZOS)
+            img.save(image_path, quality=95)
+            LOGGER.info(f"Successfully downloaded and processed image: {image_path}")
             return image_path
         except Exception as e:
+            LOGGER.error(f"Error downloading image: {e}")
             raise
+    def text_to_audio(self, img_prompt: str, audio_path: str) -> str:
+        """Convert text to speech with enhanced quality."""
         try:
+            LOGGER.info(f"🔊 Converting text to audio: {img_prompt}")
+            # Create audio with enhanced parameters
+            tts = gTTS(text=img_prompt, lang='en', slow=False)
+            LOGGER.info("📝 Audio conversion complete")
+            # Save audio file
+            tts.save(audio_path)
+            LOGGER.info(f"✅ Audio saved to: {audio_path}")
+            return audio_path
+        except Exception as e:
+            LOGGER.error(f"❌ Error in audio conversion: {str(e)}")
+            raise
+    def get_images_and_audio(self, list_prompts: List[str]) -> Tuple[List[str], List[str]]:
+        """Process multiple prompts to generate images and audio."""
+        img_list = []
+        audio_paths = []
+        LOGGER.info(f"🎬 Starting batch processing of {len(list_prompts)} prompts")
+        for idx, img_prompt in enumerate(list_prompts, 1):
+            try:
+                LOGGER.info(f"📍 Processing prompt {idx}/{len(list_prompts)}")
+                # Generate unique identifier
+                unique_id = uuid.uuid4().hex[:8]
+                # Process image
+                image_path = f"scene_{idx}_{unique_id}.png"
+                img_url = self.get_image(img_prompt)
+                image = self.download_img_from_url(img_url, image_path)
+                img_list.append(image)
+                # Process audio
+                audio_path = f"audio_{idx}_{unique_id}.mp3"
+                audio = self.text_to_audio(img_prompt, audio_path)
+                audio_paths.append(audio)
+                LOGGER.info(f"✅ Completed processing prompt {idx}")
+            except Exception as e:
+                LOGGER.error(f"❌ Error processing prompt {idx}: {str(e)}")
+                raise
+        return img_list, audio_paths
+    def create_video_from_images_and_audio(self, image_files: List[str],
+                                         audio_files: List[str],
+                                         output_path: str) -> None:
+        """Create final video with enhanced quality."""
+        try:
+            LOGGER.info("🎥 Starting video creation process")
+            if len(image_files) != len(audio_files):
+                raise ValueError("Number of images and audio files don't match")
+            video_clips = []
+            for idx, (image_file, audio_file) in enumerate(zip(image_files, audio_files), 1):
+                LOGGER.info(f"🔄 Processing scene {idx}/{len(image_files)}")
+                # Load audio and create video clip
+                audio_clip = mp.AudioFileClip(audio_file)
+                video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
+                video_clip = video_clip.set_audio(audio_clip)
+                video_clips.append(video_clip)
+                LOGGER.info(f"✅ Scene {idx} processed successfully")
+            LOGGER.info("🔄 Concatenating all scenes")
+            final_clip = mp.concatenate_videoclips(video_clips)
+            LOGGER.info("💾 Writing final video file")
+            final_clip.write_videofile(
+                output_path,
+                codec='libx264',
+                fps=24,
+                audio_codec='aac',
+                audio_bitrate='192k',
+                preset='medium'
+            )
+            LOGGER.info("✅ Video created successfully")
         except Exception as e:
+            LOGGER.error(f"❌ Error in video creation: {str(e)}")
             raise
+    def generate_video(self, text: str) -> str:
+        """Main function to generate video from text."""
         try:
             LOGGER.info("🎬 Starting video generation process")
+            # Split text into prompts
+            list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
+            LOGGER.info(f"📝 Processed {len(list_prompts)} scenes from input text")
+            output_path = f"comic_video_{uuid.uuid4().hex[:8]}.mp4"
+            # Generate images and audio
+            img_list, audio_paths = self.get_images_and_audio(list_prompts)
+            # Create final video
+            self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
+            LOGGER.info(f"✅ Video generation completed: {output_path}")
             return output_path
         except Exception as e:
             LOGGER.error(f"❌ Error in video generation: {str(e)}")
             raise
     def gradio_interface(self):
+        """Create Gradio interface."""
         LOGGER.info("🌐 Initializing Gradio interface")
+        with gr.Blocks(theme='abidlabs/dracula_revamped') as demo:
             gr.HTML("""
+                <center><h1 style="color:#fff">Comic Video Generator</h1></center>
             """)
             with gr.Row():
                 input_text = gr.Textbox(
                     label="Comic Script",
+                    placeholder="Enter your story (separate scenes with ,,)"
                 )
             with gr.Row():
+                generate_btn = gr.Button("🎬 Generate Video")
             with gr.Row():
                 output = gr.Video(label="Generated Comic Video")
+            # Example text
+            example_txt = """Once upon a time in a magical forest,, A brave knight discovered a mysterious crystal,, The crystal began to glow with incredible power"""
             gr.Examples([[example_txt]], [input_text])
+            generate_btn.click(self.generate_video, inputs=[input_text], outputs=[output])
         LOGGER.info("✅ Gradio interface initialized")
+        demo.launch(debug=True)
 if __name__ == "__main__":
     text2video = Text2Video()