import os import json import urllib.request from PIL import Image from gtts import gTTS import cv2 import moviepy.editor as mp import logging from hercai import Hercai import uuid import time import gradio as gr from typing import Tuple, List, Optional import numpy as np from concurrent.futures import ThreadPoolExecutor from functools import partial import tempfile import contextlib # Configure logging with console output and rotation logging.basicConfig( level=logging.INFO, format='[%(asctime)s] [%(levelname)s] %(message)s', handlers=[ logging.FileHandler('app.log', mode='a'), logging.StreamHandler(), logging.handlers.RotatingFileHandler( 'app.log', maxBytes=1024*1024, backupCount=5 ) ] ) LOGGER = logging.getLogger(__name__) class ResourceManager: """Manage temporary resources and cleanup.""" def __init__(self): self.temp_files = set() def add_temp_file(self, filepath: str) -> None: self.temp_files.add(filepath) def cleanup(self) -> None: for file in self.temp_files: try: if os.path.exists(file): os.remove(file) except Exception as e: LOGGER.warning(f"Failed to remove temporary file {file}: {e}") self.temp_files.clear() class Text2Video: def __init__(self) -> None: """Initialize the Text2Video class.""" LOGGER.info("Initializing Text2Video application...") self.herc = Hercai() self.resource_manager = ResourceManager() self.max_workers = min(os.cpu_count() or 1, 4) # Limit concurrent tasks LOGGER.info("Initialization complete") def _enhance_prompt(self, prompt: str) -> str: """Enhance the prompt with comic-style elements.""" return ( f"{prompt}, comic book style, full scene composition, " "vibrant colors, clear speech bubbles with text, " "dramatic lighting, high contrast, detailed backgrounds, " "comic book panel layout, professional illustration" ) @staticmethod def _create_unique_filename(prefix: str, suffix: str) -> str: """Create a unique filename with given prefix and suffix.""" return f"{prefix}_{uuid.uuid4().hex[:8]}{suffix}" async def get_image(self, img_prompt: str) -> Optional[str]: """Generate an image based on the provided text prompt.""" try: LOGGER.info(f"🎨 Starting image generation for prompt: {img_prompt}") enhanced_prompt = self._enhance_prompt(img_prompt) image_result = await self.herc.draw_image( model="v3", prompt=enhanced_prompt, negative_prompt="blurry, cropped, low quality, dark, gloomy" ) return image_result["url"] except Exception as e: LOGGER.error(f"❌ Error generating image: {str(e)}") raise def download_img_from_url(self, image_url: str, image_path: str) -> str: """Download and process image from URL with improved error handling.""" try: LOGGER.info(f"📥 Downloading image from: {image_url}") with tempfile.NamedTemporaryFile(delete=False) as temp_file: urllib.request.urlretrieve(image_url, temp_file.name) with Image.open(temp_file.name) as img: # Convert to RGB if necessary if img.mode != 'RGB': img = img.convert('RGB') # Resize with proper aspect ratio target_size = (1024, 1024) img.thumbnail(target_size, Image.Resampling.LANCZOS) # Create new image with padding new_img = Image.new('RGB', target_size, (255, 255, 255)) offset = ((target_size[0] - img.size[0]) // 2, (target_size[1] - img.size[1]) // 2) new_img.paste(img, offset) # Save with optimization new_img.save(image_path, 'PNG', optimize=True) self.resource_manager.add_temp_file(image_path) return image_path except Exception as e: LOGGER.error(f"❌ Error processing image: {str(e)}") raise finally: if os.path.exists(temp_file.name): os.unlink(temp_file.name) async def process_scene(self, prompt: str, idx: int) -> Tuple[str, str]: """Process a single scene (image and audio) concurrently.""" try: image_path = self._create_unique_filename(f"scene_{idx}", ".png") audio_path = self._create_unique_filename(f"audio_{idx}", ".mp3") # Generate image image_url = await self.get_image(prompt) image_path = self.download_img_from_url(image_url, image_path) # Generate audio audio_path = self.text_to_audio(prompt, audio_path) return image_path, audio_path except Exception as e: LOGGER.error(f"Error processing scene {idx}: {e}") raise async def generate_video(self, text: str) -> str: """Main function to generate video from text with improved concurrency.""" try: LOGGER.info("🎬 Starting video generation process") list_prompts = [s.strip() for s in text.split(",,") if s.strip()] output_path = self._create_unique_filename("comic_video", ".mp4") self.resource_manager.add_temp_file(output_path) # Process scenes concurrently scenes = [] async with ThreadPoolExecutor(max_workers=self.max_workers) as executor: for idx, prompt in enumerate(list_prompts, 1): scene = await self.process_scene(prompt, idx) scenes.append(scene) # Create video img_list = [scene[0] for scene in scenes] audio_paths = [scene[1] for scene in scenes] await self.create_video_from_images_and_audio(img_list, audio_paths, output_path) return output_path except Exception as e: LOGGER.error(f"❌ Error in video generation: {str(e)}") raise finally: self.resource_manager.cleanup() def gradio_interface(self): """Create Gradio interface with improved styling.""" LOGGER.info("🌐 Initializing Gradio interface") css = """ .gradio-container { font-family: 'Arial', sans-serif; max-width: 1200px; margin: auto; } .header { text-align: center; padding: 2rem; background: linear-gradient(135deg, #6e8efb, #a777e3); color: white; border-radius: 10px; margin-bottom: 2rem; } """ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: gr.HTML("""
Transform your story into an animated comic!