|
import os |
|
import json |
|
import urllib.request |
|
from PIL import Image |
|
import cv2 |
|
import moviepy.editor as mp |
|
import logging |
|
import requests |
|
import uuid |
|
import time |
|
import gradio as gr |
|
from moviepy.editor import * |
|
from moviepy.video.tools.subtitles import SubtitlesClip |
|
from hercai import Hercai |
|
from gtts import gTTS |
|
from hercai import Hercai |
|
|
|
|
|
log_dir = os.getenv('LOG_DIRECTORY', './') |
|
LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log') |
|
|
|
|
|
formatter = logging.Formatter( |
|
'[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s', |
|
datefmt='%Y-%b-%d %H:%M:%S' |
|
) |
|
|
|
|
|
LOGGER = logging.getLogger(__name__) |
|
|
|
|
|
file_handler = logging.FileHandler(LOGGER_FILE_PATH, mode='a') |
|
file_handler.setFormatter(formatter) |
|
|
|
|
|
console_handler = logging.StreamHandler() |
|
console_handler.setFormatter(formatter) |
|
|
|
|
|
LOGGER.addHandler(file_handler) |
|
LOGGER.addHandler(console_handler) |
|
|
|
|
|
log_level_env = os.getenv('LOG_LEVEL', 'INFO') |
|
log_level_dict = { |
|
'DEBUG': logging.DEBUG, |
|
'INFO': logging.INFO, |
|
'WARNING': logging.WARNING, |
|
'ERROR': logging.ERROR, |
|
'CRITICAL': logging.CRITICAL |
|
} |
|
if log_level_env in log_level_dict: |
|
log_level = log_level_dict[log_level_env] |
|
else: |
|
log_level = log_level_dict['INFO'] |
|
LOGGER.setLevel(log_level) |
|
|
|
|
|
|
|
|
|
|
|
class Text2Video: |
|
"""A class to generate videos from text prompts.""" |
|
|
|
def __init__(self) -> None: |
|
"""Initialize the Text2Video class.""" |
|
self.hercai_api_key = "YOUR_HERCAI_API_KEY" |
|
self.hercai = Hercai(self.hercai_api_key) |
|
self.hercai_base_url = "https://hercai.onrender.com/v3/text2image" |
|
LOGGER.info("π Text2Video class initialized.") |
|
|
|
def generate_text(self, prompt: str) -> str: |
|
"""Generate text using GPT-3.5-turbo.""" |
|
LOGGER.info(f"π Generating text for prompt: {prompt}") |
|
|
|
return "" |
|
|
|
def get_image(self, img_prompt: str) -> str: |
|
"""Generate an image based on the provided text prompt.""" |
|
LOGGER.info(f"πΌοΈ Generating image for prompt: {img_prompt}") |
|
try: |
|
image_result = self.hercai.draw_image( |
|
model="simurg", |
|
prompt=img_prompt, |
|
negative_prompt="Dark and gloomy" |
|
) |
|
image_url = image_result['url'] |
|
LOGGER.info(f"β
Generated image URL: {image_url}") |
|
return image_url |
|
except requests.exceptions.RequestException as e: |
|
LOGGER.error(f"β Error generating image: {str(e)}") |
|
return "" |
|
|
|
def download_img_from_url(self, image_url: str, image_path: str) -> str: |
|
"""Download an image from a URL.""" |
|
LOGGER.info(f"β¬οΈ Downloading image from URL: {image_url} to path: {image_path}") |
|
try: |
|
urllib.request.urlretrieve(image_url, image_path) |
|
|
|
img = Image.open(image_path) |
|
img = img.resize((640, 480)) |
|
img.save(image_path) |
|
LOGGER.info(f"β
Image downloaded to: {image_path}") |
|
return image_path |
|
except Exception as e: |
|
LOGGER.error(f"β Error downloading image from URL: {e}") |
|
return "" |
|
|
|
def text_to_audio(self, text: str, audio_path: str) -> str: |
|
"""Convert text to speech using gTTS.""" |
|
LOGGER.info(f"π Converting text to audio for text: {text}") |
|
try: |
|
tts = gTTS(text=text, lang='en') |
|
tts.save(audio_path, bitrate="128k") |
|
LOGGER.info(f"β
Audio saved to: {audio_path}") |
|
return audio_path |
|
except Exception as e: |
|
LOGGER.error(f"β Error generating speech: {str(e)}") |
|
return "" |
|
|
|
|
|
|
|
def get_images_and_audio(self, list_prompts: list) -> tuple: |
|
"""Generate images and corresponding audio files from a list of prompts.""" |
|
LOGGER.info(f"πΌοΈπ Generating images and audio for prompts: {list_prompts}") |
|
img_list = [] |
|
audio_paths = [] |
|
for img_prompt in list_prompts: |
|
try: |
|
unique_id = uuid.uuid4().hex |
|
image_path = f"{img_prompt[:9]}_{unique_id}.png" |
|
img_url = self.get_image(img_prompt) |
|
|
|
if img_url: |
|
image = self.download_img_from_url(img_url, image_path) |
|
img_list.append(image) |
|
|
|
audio_path = f"{img_prompt[:9]}_{unique_id}.mp3" |
|
audio = self.text_to_audio(img_prompt, audio_path) |
|
|
|
if audio: |
|
audio_paths.append(audio) |
|
LOGGER.info(f"β
Processed prompt: {img_prompt}, Image: {image}, Audio: {audio}") |
|
|
|
except Exception as e: |
|
LOGGER.error(f"β Error processing prompt: {img_prompt}, {e}") |
|
|
|
return img_list, audio_paths |
|
|
|
|
|
def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None: |
|
"""Create a video from images and corresponding audio files with pop-up bubbles.""" |
|
LOGGER.info(f"π₯ Creating video from images: {image_files}, audio files: {audio_files}") |
|
try: |
|
if len(image_files) != len(audio_files): |
|
LOGGER.error("β Error: Number of images and audio files don't match.") |
|
return |
|
|
|
video_clips = [] |
|
|
|
for image_file, audio_file in zip(image_files, audio_files): |
|
|
|
if not os.path.exists(audio_file) or not os.path.exists(image_file): |
|
LOGGER.error(f"β Audio or image file missing for {image_file} or {audio_file}") |
|
continue |
|
|
|
audio_clip = mp.AudioFileClip(audio_file) |
|
video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration) |
|
video_clip = video_clip.set_audio(audio_clip) |
|
|
|
|
|
try: |
|
img = Image.open(image_file) |
|
width, height = img.size |
|
cropped_image = img.crop((0, 0, int(width * 0.80), height)) |
|
|
|
bubble_clip = mp.ImageClip(image_file).resize((150, 150)) |
|
bubble_clip = bubble_clip.set_position((int(width * 0.90), 0)) |
|
|
|
|
|
video_clip = CompositeVideoClip([video_clip, bubble_clip]) |
|
except Exception as ex: |
|
LOGGER.error(f"β Error adding comic bubble: {str(ex)}") |
|
|
|
video_clips.append(video_clip) |
|
LOGGER.info(f"β
Created video clip for image: {image_file}, audio: {audio_file}") |
|
|
|
|
|
if video_clips: |
|
final_clip = mp.concatenate_videoclips(video_clips) |
|
final_clip.write_videofile(output_path, codec='libx264', fps=24) |
|
LOGGER.info(f"β
Video created successfully at: {output_path}") |
|
|
|
except Exception as e: |
|
LOGGER.error(f"β Error creating video: {str(e)}") |
|
|
|
def generate_video(self, text: str) -> str: |
|
""" |
|
Generate a video from a list of text prompts. |
|
Args: |
|
text (str): Text prompts separated by double commas. |
|
Returns: |
|
str: Path to the generated video file. |
|
""" |
|
LOGGER.info(f"π¬ Generating video for text: {text}") |
|
try: |
|
list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()] |
|
LOGGER.info(f"π List of prompts: {list_prompts}") |
|
|
|
|
|
output_path = f"output_video_{uuid.uuid4().hex[:8]}.mp4" |
|
LOGGER.info(f"π Output path for video: {output_path}") |
|
|
|
|
|
img_list, audio_paths = self.get_images_and_audio(list_prompts) |
|
|
|
|
|
self.create_video_from_images_and_audio(img_list, audio_paths, output_path) |
|
|
|
return output_path |
|
except Exception as e: |
|
LOGGER.error(f"β Error generating video: {str(e)}") |
|
return "" |
|
|
|
def gradio_interface(self): |
|
"""Create and launch the Gradio interface.""" |
|
LOGGER.info("π Launching Gradio interface.") |
|
with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo: |
|
example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town. |
|
'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,, |
|
That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,""" |
|
|
|
gr.HTML(""" |
|
<center><h1 style="color:#fff">Comics Video Generator</h1></center>""") |
|
|
|
with gr.Row(elem_id="col-container"): |
|
input_text = gr.Textbox(label="Comics Text", placeholder="Enter the comics by double comma separated") |
|
|
|
with gr.Row(elem_id="col-container"): |
|
button = gr.Button("Generate Video") |
|
|
|
with gr.Row(elem_id="col-container"): |
|
output = gr.Video() |
|
|
|
with gr.Row(elem_id="col-container"): |
|
example = gr.Examples([example_txt], input_text) |
|
|
|
button.click(self.generate_video, [input_text], output) |
|
|
|
demo.launch(debug=True) |
|
LOGGER.info("β
Gradio interface launched.") |
|
|
|
|
|
if __name__ == "__main__": |
|
LOGGER.info("π Starting Text2Video application.") |
|
text2video = Text2Video() |
|
text2video.gradio_interface() |