import os import json import urllib.request from PIL import Image import cv2 import moviepy.editor as mp import logging import requests import uuid import time import gradio as gr from moviepy.editor import * from moviepy.video.tools.subtitles import SubtitlesClip from hercai import Hercai # Import the hercai library from gtts import gTTS # Import gTTS for text-to-speech from hercai import Hercai # Import the hercai module # Configure logging for both file and console log_dir = os.getenv('LOG_DIRECTORY', './') LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log') # Create a formatter formatter = logging.Formatter( '[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s', datefmt='%Y-%b-%d %H:%M:%S' ) # Create and configure the logger LOGGER = logging.getLogger(__name__) # Create file handler file_handler = logging.FileHandler(LOGGER_FILE_PATH, mode='a') file_handler.setFormatter(formatter) # Create console handler console_handler = logging.StreamHandler() console_handler.setFormatter(formatter) # Add both handlers to logger LOGGER.addHandler(file_handler) LOGGER.addHandler(console_handler) # Set log level log_level_env = os.getenv('LOG_LEVEL', 'INFO') log_level_dict = { 'DEBUG': logging.DEBUG, 'INFO': logging.INFO, 'WARNING': logging.WARNING, 'ERROR': logging.ERROR, 'CRITICAL': logging.CRITICAL } if log_level_env in log_level_dict: log_level = log_level_dict[log_level_env] else: log_level = log_level_dict['INFO'] LOGGER.setLevel(log_level) # Set the path to the ImageMagick binary # os.environ["IMAGE_MAGICK_BINARY"] = "/usr/local/bin/convert" # Update this path as needed (No longer needed) class Text2Video: """A class to generate videos from text prompts.""" def __init__(self) -> None: """Initialize the Text2Video class.""" self.hercai_api_key = "YOUR_HERCAI_API_KEY" # Replace with your Hercai API key self.hercai = Hercai(self.hercai_api_key) # Initialize Hercai self.hercai_base_url = "https://hercai.onrender.com/v3/text2image" LOGGER.info("🚀 Text2Video class initialized.") def generate_text(self, prompt: str) -> str: """Generate text using GPT-3.5-turbo.""" LOGGER.info(f"📝 Generating text for prompt: {prompt}") # ... (This part is not needed for Hercai) ... return "" def get_image(self, img_prompt: str) -> str: """Generate an image based on the provided text prompt.""" LOGGER.info(f"🖼️ Generating image for prompt: {img_prompt}") try: image_result = self.hercai.draw_image( model="simurg", # Choose a Hercai model prompt=img_prompt, negative_prompt="Dark and gloomy" ) image_url = image_result['url'] LOGGER.info(f"✅ Generated image URL: {image_url}") return image_url except requests.exceptions.RequestException as e: LOGGER.error(f"❌ Error generating image: {str(e)}") return "" def download_img_from_url(self, image_url: str, image_path: str) -> str: """Download an image from a URL.""" LOGGER.info(f"⬇️ Downloading image from URL: {image_url} to path: {image_path}") try: urllib.request.urlretrieve(image_url, image_path) # Resize after downloading img = Image.open(image_path) img = img.resize((640, 480)) # Adjust resolution for reduced size img.save(image_path) LOGGER.info(f"✅ Image downloaded to: {image_path}") return image_path except Exception as e: LOGGER.error(f"❌ Error downloading image from URL: {e}") return "" def text_to_audio(self, text: str, audio_path: str) -> str: """Convert text to speech using gTTS.""" LOGGER.info(f"🔊 Converting text to audio for text: {text}") try: tts = gTTS(text=text, lang='en') # You can change the language ('en' for English) tts.save(audio_path, bitrate="128k") # Reduce `bitrate` for smaller file size LOGGER.info(f"✅ Audio saved to: {audio_path}") return audio_path except Exception as e: LOGGER.error(f"❌ Error generating speech: {str(e)}") return "" # The transcription part has been removed as it's no longer needed def get_images_and_audio(self, list_prompts: list) -> tuple: """Generate images and corresponding audio files from a list of prompts.""" LOGGER.info(f"🖼️🔊 Generating images and audio for prompts: {list_prompts}") img_list = [] audio_paths = [] for img_prompt in list_prompts: try: unique_id = uuid.uuid4().hex image_path = f"{img_prompt[:9]}_{unique_id}.png" img_url = self.get_image(img_prompt) if img_url: # Only process if img_url exists & is not None image = self.download_img_from_url(img_url, image_path) img_list.append(image) audio_path = f"{img_prompt[:9]}_{unique_id}.mp3" audio = self.text_to_audio(img_prompt, audio_path) if audio: # Only process if audio exists audio_paths.append(audio) LOGGER.info(f"✅ Processed prompt: {img_prompt}, Image: {image}, Audio: {audio}") except Exception as e: LOGGER.error(f"❌ Error processing prompt: {img_prompt}, {e}") return img_list, audio_paths def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None: """Create a video from images and corresponding audio files with pop-up bubbles.""" LOGGER.info(f"🎥 Creating video from images: {image_files}, audio files: {audio_files}") try: if len(image_files) != len(audio_files): LOGGER.error("❌ Error: Number of images and audio files don't match.") return video_clips = [] for image_file, audio_file in zip(image_files, audio_files): # Create the base video from the image and audio if not os.path.exists(audio_file) or not os.path.exists(image_file): LOGGER.error(f"❌ Audio or image file missing for {image_file} or {audio_file}") continue audio_clip = mp.AudioFileClip(audio_file) video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration) video_clip = video_clip.set_audio(audio_clip) # Apply image cropping and bubble creation try: img = Image.open(image_file) width, height = img.size cropped_image = img.crop((0, 0, int(width * 0.80), height)) # Cropping image to make space for bubble bubble_clip = mp.ImageClip(image_file).resize((150, 150)) bubble_clip = bubble_clip.set_position((int(width * 0.90), 0)) # Now combine the original video with the bubble video_clip = CompositeVideoClip([video_clip, bubble_clip]) except Exception as ex: LOGGER.error(f"❌ Error adding comic bubble: {str(ex)}") video_clips.append(video_clip) LOGGER.info(f"✅ Created video clip for image: {image_file}, audio: {audio_file}") # Combine the generated clips into a single video if video_clips: # Only process if clip exists final_clip = mp.concatenate_videoclips(video_clips) final_clip.write_videofile(output_path, codec='libx264', fps=24) # Optimized codec and fps settings LOGGER.info(f"✅ Video created successfully at: {output_path}") except Exception as e: LOGGER.error(f"❌ Error creating video: {str(e)}") def generate_video(self, text: str) -> str: """ Generate a video from a list of text prompts. Args: text (str): Text prompts separated by double commas. Returns: str: Path to the generated video file. """ LOGGER.info(f"🎬 Generating video for text: {text}") try: list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()] LOGGER.info(f"📝 List of prompts: {list_prompts}") # Set the output path for the generated video output_path = f"output_video_{uuid.uuid4().hex[:8]}.mp4" LOGGER.info(f"📁 Output path for video: {output_path}") # Generate images and audio img_list, audio_paths = self.get_images_and_audio(list_prompts) # Create video from images and audio self.create_video_from_images_and_audio(img_list, audio_paths, output_path) return output_path except Exception as e: LOGGER.error(f"❌ Error generating video: {str(e)}") return "" def gradio_interface(self): """Create and launch the Gradio interface.""" LOGGER.info("🌐 Launching Gradio interface.") with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo: example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town. 'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,, That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,""" gr.HTML("""

Comics Video Generator

""") with gr.Row(elem_id="col-container"): input_text = gr.Textbox(label="Comics Text", placeholder="Enter the comics by double comma separated") with gr.Row(elem_id="col-container"): button = gr.Button("Generate Video") with gr.Row(elem_id="col-container"): output = gr.Video() with gr.Row(elem_id="col-container"): example = gr.Examples([example_txt], input_text) button.click(self.generate_video, [input_text], output) demo.launch(debug=True) LOGGER.info("✅ Gradio interface launched.") if __name__ == "__main__": LOGGER.info("🚀 Starting Text2Video application.") text2video = Text2Video() text2video.gradio_interface()