Comic-2 / app.py
AZLABS's picture
Update app.py
7c3a89a verified
raw
history blame
10.8 kB
import os
import json
import urllib.request
from PIL import Image
import cv2
import moviepy.editor as mp
import logging
import requests
import uuid
import time
import gradio as gr
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
from hercai import Hercai # Import the hercai library
from gtts import gTTS # Import gTTS for text-to-speech
from hercai import Hercai # Import the hercai module
# Configure logging for both file and console
log_dir = os.getenv('LOG_DIRECTORY', './')
LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')
# Create a formatter
formatter = logging.Formatter(
'[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
datefmt='%Y-%b-%d %H:%M:%S'
)
# Create and configure the logger
LOGGER = logging.getLogger(__name__)
# Create file handler
file_handler = logging.FileHandler(LOGGER_FILE_PATH, mode='a')
file_handler.setFormatter(formatter)
# Create console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
# Add both handlers to logger
LOGGER.addHandler(file_handler)
LOGGER.addHandler(console_handler)
# Set log level
log_level_env = os.getenv('LOG_LEVEL', 'INFO')
log_level_dict = {
'DEBUG': logging.DEBUG,
'INFO': logging.INFO,
'WARNING': logging.WARNING,
'ERROR': logging.ERROR,
'CRITICAL': logging.CRITICAL
}
if log_level_env in log_level_dict:
log_level = log_level_dict[log_level_env]
else:
log_level = log_level_dict['INFO']
LOGGER.setLevel(log_level)
# Set the path to the ImageMagick binary
# os.environ["IMAGE_MAGICK_BINARY"] = "/usr/local/bin/convert" # Update this path as needed (No longer needed)
class Text2Video:
"""A class to generate videos from text prompts."""
def __init__(self) -> None:
"""Initialize the Text2Video class."""
self.hercai_api_key = "YOUR_HERCAI_API_KEY" # Replace with your Hercai API key
self.hercai = Hercai(self.hercai_api_key) # Initialize Hercai
self.hercai_base_url = "https://hercai.onrender.com/v3/text2image"
LOGGER.info("πŸš€ Text2Video class initialized.")
def generate_text(self, prompt: str) -> str:
"""Generate text using GPT-3.5-turbo."""
LOGGER.info(f"πŸ“ Generating text for prompt: {prompt}")
# ... (This part is not needed for Hercai) ...
return ""
def get_image(self, img_prompt: str) -> str:
"""Generate an image based on the provided text prompt."""
LOGGER.info(f"πŸ–ΌοΈ Generating image for prompt: {img_prompt}")
try:
image_result = self.hercai.draw_image(
model="simurg", # Choose a Hercai model
prompt=img_prompt,
negative_prompt="Dark and gloomy"
)
image_url = image_result['url']
LOGGER.info(f"βœ… Generated image URL: {image_url}")
return image_url
except requests.exceptions.RequestException as e:
LOGGER.error(f"❌ Error generating image: {str(e)}")
return ""
def download_img_from_url(self, image_url: str, image_path: str) -> str:
"""Download an image from a URL."""
LOGGER.info(f"⬇️ Downloading image from URL: {image_url} to path: {image_path}")
try:
urllib.request.urlretrieve(image_url, image_path)
# Resize after downloading
img = Image.open(image_path)
img = img.resize((640, 480)) # Adjust resolution for reduced size
img.save(image_path)
LOGGER.info(f"βœ… Image downloaded to: {image_path}")
return image_path
except Exception as e:
LOGGER.error(f"❌ Error downloading image from URL: {e}")
return ""
def text_to_audio(self, text: str, audio_path: str) -> str:
"""Convert text to speech using gTTS."""
LOGGER.info(f"πŸ”Š Converting text to audio for text: {text}")
try:
tts = gTTS(text=text, lang='en') # You can change the language ('en' for English)
tts.save(audio_path, bitrate="128k") # Reduce `bitrate` for smaller file size
LOGGER.info(f"βœ… Audio saved to: {audio_path}")
return audio_path
except Exception as e:
LOGGER.error(f"❌ Error generating speech: {str(e)}")
return ""
# The transcription part has been removed as it's no longer needed
def get_images_and_audio(self, list_prompts: list) -> tuple:
"""Generate images and corresponding audio files from a list of prompts."""
LOGGER.info(f"πŸ–ΌοΈπŸ”Š Generating images and audio for prompts: {list_prompts}")
img_list = []
audio_paths = []
for img_prompt in list_prompts:
try:
unique_id = uuid.uuid4().hex
image_path = f"{img_prompt[:9]}_{unique_id}.png"
img_url = self.get_image(img_prompt)
if img_url: # Only process if img_url exists & is not None
image = self.download_img_from_url(img_url, image_path)
img_list.append(image)
audio_path = f"{img_prompt[:9]}_{unique_id}.mp3"
audio = self.text_to_audio(img_prompt, audio_path)
if audio: # Only process if audio exists
audio_paths.append(audio)
LOGGER.info(f"βœ… Processed prompt: {img_prompt}, Image: {image}, Audio: {audio}")
except Exception as e:
LOGGER.error(f"❌ Error processing prompt: {img_prompt}, {e}")
return img_list, audio_paths
def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
"""Create a video from images and corresponding audio files with pop-up bubbles."""
LOGGER.info(f"πŸŽ₯ Creating video from images: {image_files}, audio files: {audio_files}")
try:
if len(image_files) != len(audio_files):
LOGGER.error("❌ Error: Number of images and audio files don't match.")
return
video_clips = []
for image_file, audio_file in zip(image_files, audio_files):
# Create the base video from the image and audio
if not os.path.exists(audio_file) or not os.path.exists(image_file):
LOGGER.error(f"❌ Audio or image file missing for {image_file} or {audio_file}")
continue
audio_clip = mp.AudioFileClip(audio_file)
video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
video_clip = video_clip.set_audio(audio_clip)
# Apply image cropping and bubble creation
try:
img = Image.open(image_file)
width, height = img.size
cropped_image = img.crop((0, 0, int(width * 0.80), height)) # Cropping image to make space for bubble
bubble_clip = mp.ImageClip(image_file).resize((150, 150))
bubble_clip = bubble_clip.set_position((int(width * 0.90), 0))
# Now combine the original video with the bubble
video_clip = CompositeVideoClip([video_clip, bubble_clip])
except Exception as ex:
LOGGER.error(f"❌ Error adding comic bubble: {str(ex)}")
video_clips.append(video_clip)
LOGGER.info(f"βœ… Created video clip for image: {image_file}, audio: {audio_file}")
# Combine the generated clips into a single video
if video_clips: # Only process if clip exists
final_clip = mp.concatenate_videoclips(video_clips)
final_clip.write_videofile(output_path, codec='libx264', fps=24) # Optimized codec and fps settings
LOGGER.info(f"βœ… Video created successfully at: {output_path}")
except Exception as e:
LOGGER.error(f"❌ Error creating video: {str(e)}")
def generate_video(self, text: str) -> str:
"""
Generate a video from a list of text prompts.
Args:
text (str): Text prompts separated by double commas.
Returns:
str: Path to the generated video file.
"""
LOGGER.info(f"🎬 Generating video for text: {text}")
try:
list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
LOGGER.info(f"πŸ“ List of prompts: {list_prompts}")
# Set the output path for the generated video
output_path = f"output_video_{uuid.uuid4().hex[:8]}.mp4"
LOGGER.info(f"πŸ“ Output path for video: {output_path}")
# Generate images and audio
img_list, audio_paths = self.get_images_and_audio(list_prompts)
# Create video from images and audio
self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
return output_path
except Exception as e:
LOGGER.error(f"❌ Error generating video: {str(e)}")
return ""
def gradio_interface(self):
"""Create and launch the Gradio interface."""
LOGGER.info("🌐 Launching Gradio interface.")
with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo:
example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town.
'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,,
That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,"""
gr.HTML("""
<center><h1 style="color:#fff">Comics Video Generator</h1></center>""")
with gr.Row(elem_id="col-container"):
input_text = gr.Textbox(label="Comics Text", placeholder="Enter the comics by double comma separated")
with gr.Row(elem_id="col-container"):
button = gr.Button("Generate Video")
with gr.Row(elem_id="col-container"):
output = gr.Video()
with gr.Row(elem_id="col-container"):
example = gr.Examples([example_txt], input_text)
button.click(self.generate_video, [input_text], output)
demo.launch(debug=True)
LOGGER.info("βœ… Gradio interface launched.")
if __name__ == "__main__":
LOGGER.info("πŸš€ Starting Text2Video application.")
text2video = Text2Video()
text2video.gradio_interface()