|
import os |
|
import json |
|
import urllib.request |
|
from PIL import Image |
|
from gtts import gTTS |
|
import cv2 |
|
import moviepy.editor as mp |
|
import logging |
|
from hercai import Hercai |
|
import uuid |
|
import time |
|
import gradio as gr |
|
import requests |
|
|
|
|
|
log_dir = os.getenv('LOG_DIRECTORY', './') |
|
LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log') |
|
|
|
logging.basicConfig( |
|
filename=LOGGER_FILE_PATH, |
|
filemode='a', |
|
format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s', |
|
datefmt='%Y-%b-%d %H:%M:%S' |
|
) |
|
LOGGER = logging.getLogger(__name__) |
|
|
|
log_level_env = os.getenv('LOG_LEVEL', 'INFO') |
|
log_level_dict = { |
|
'DEBUG': logging.DEBUG, |
|
'INFO': logging.INFO, |
|
'WARNING': logging.WARNING, |
|
'ERROR': logging.ERROR, |
|
'CRITICAL': logging.CRITICAL |
|
} |
|
|
|
if log_level_env in log_level_dict: |
|
log_level = log_level_dict[log_level_env] |
|
else: |
|
log_level = log_level_dict['INFO'] |
|
LOGGER.setLevel(log_level) |
|
|
|
|
|
class Text2Video: |
|
""" |
|
A class to generate videos from text prompts, with detailed logging, model selection, and a user-friendly interface. |
|
""" |
|
|
|
def __init__(self) -> None: |
|
""" |
|
Initialize the Text2Video class. |
|
""" |
|
LOGGER.info("Initializing Text2Video class") |
|
self.herc = Hercai("") |
|
LOGGER.info("Hercai initialized successfully") |
|
|
|
def get_image(self, img_prompt: str, image_generator: str, image_model: str) -> str: |
|
""" |
|
Generate an image from a text prompt using the selected AI model, with detailed logging and comic book styling. |
|
|
|
Args: |
|
img_prompt (str): The text prompt to generate the image from. |
|
image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations). |
|
image_model (str): The specific model to use within the selected AI image generation service. |
|
|
|
Returns: |
|
str: The URL of the generated image. Returns an empty string if an error occurred. |
|
""" |
|
LOGGER.info(f"Generating image for prompt: {img_prompt}") |
|
try: |
|
|
|
modified_prompt = f"Generate a comic book style image with speech bubbles containing the following text: '{img_prompt}'. " \ |
|
f"Include elements like vibrant colors, onomatopoeia, and exaggerated expressions to enhance the comic book aesthetic." |
|
|
|
LOGGER.info(f"Modified prompt for {image_generator}: {modified_prompt}") |
|
image_url = "" |
|
|
|
if image_generator == "Hercai": |
|
|
|
LOGGER.info(f"Using Hercai model: {image_model}") |
|
|
|
|
|
image_result = self.herc.draw_image(model=image_model, prompt=modified_prompt, negative_prompt="Dark and gloomy") |
|
|
|
image_url = image_result["url"] |
|
|
|
elif image_generator == "Prodia": |
|
|
|
LOGGER.info(f"Using Prodia model: {image_model}") |
|
|
|
api_url = "https://api.prodia.com/v1/generate" |
|
payload = { |
|
"model": image_model, |
|
"prompt": modified_prompt, |
|
"negative_prompt": "Dark and gloomy" |
|
} |
|
headers = { |
|
"Authorization": "Bearer YOUR_PRODIA_API_KEY" |
|
} |
|
response = requests.post(api_url, json=payload, headers=headers) |
|
if response.status_code == 200: |
|
image_url = response.json()["url"] |
|
|
|
LOGGER.info(f"Image generated successfully using Prodia: {image_url}") |
|
else: |
|
|
|
LOGGER.error(f"Error generating image using Prodia: {response.text}") |
|
|
|
elif image_generator == "Pollinations": |
|
|
|
LOGGER.info(f"Using Pollinations model: {image_model}") |
|
|
|
|
|
|
|
|
|
|
|
LOGGER.info(f"Image generated successfully: {image_url}") |
|
return image_url |
|
|
|
except Exception as e: |
|
|
|
LOGGER.error(f"Error generating image for prompt '{img_prompt}' using {image_generator}: {e}") |
|
return "" |
|
|
|
def download_img_from_url(self, image_url: str, image_path: str) -> str: |
|
""" |
|
Download an image from a URL to a local file path. |
|
|
|
Args: |
|
image_url (str): The URL of the image to download. |
|
image_path (str): The local file path to save the downloaded image. |
|
|
|
Returns: |
|
str: The local file path of the downloaded image. Returns an empty string if an error occurred. |
|
""" |
|
LOGGER.info(f"Downloading image from URL: {image_url}") |
|
try: |
|
|
|
urllib.request.urlretrieve(image_url, image_path) |
|
|
|
LOGGER.info(f"Image downloaded and saved to: {image_path}") |
|
return image_path |
|
|
|
except Exception as e: |
|
|
|
LOGGER.error(f"Error downloading image from URL '{image_url}': {e}") |
|
return "" |
|
|
|
def text_to_audio(self, img_prompt: str, audio_path: str) -> str: |
|
""" |
|
Convert text to speech using gTTS and save it as an audio file. |
|
|
|
Args: |
|
img_prompt (str): The text to convert to speech. |
|
audio_path (str): The local file path to save the generated audio file. |
|
|
|
Returns: |
|
str: The local file path of the saved audio file. Returns an empty string if an error occurred. |
|
""" |
|
LOGGER.info(f"Converting text to audio: {img_prompt}") |
|
try: |
|
|
|
language = 'en' |
|
|
|
|
|
myobj = gTTS(text=img_prompt, lang=language, slow=False) |
|
|
|
|
|
myobj.save(audio_path) |
|
|
|
LOGGER.info(f"Audio saved to: {audio_path}") |
|
return audio_path |
|
except Exception as e: |
|
|
|
LOGGER.error(f"Error converting text '{img_prompt}' to audio: {e}") |
|
return "" |
|
|
|
def get_images_and_audio(self, list_prompts: list, image_generator: str, image_model: str) -> tuple: |
|
""" |
|
Generate images and corresponding audio files for a list of text prompts using the selected AI model. |
|
|
|
Args: |
|
list_prompts (list): A list of text prompts. |
|
image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations). |
|
image_model (str): The specific model to use within the selected AI image generation service. |
|
|
|
Returns: |
|
tuple: A tuple containing two lists: image paths and audio paths. |
|
""" |
|
LOGGER.info("Generating images and audio for prompts") |
|
img_list = [] |
|
audio_paths = [] |
|
for img_prompt in list_prompts: |
|
LOGGER.info(f"Processing prompt: {img_prompt}") |
|
try: |
|
|
|
unique_id = uuid.uuid4().hex |
|
|
|
|
|
image_path = f"{img_prompt[:9]}_{unique_id}.png" |
|
|
|
|
|
img_url = self.get_image(img_prompt, image_generator, image_model) |
|
|
|
|
|
image = self.download_img_from_url(img_url, image_path) |
|
|
|
|
|
img_list.append(image) |
|
|
|
|
|
audio_path = f"{img_prompt[:9]}_{unique_id}.mp3" |
|
|
|
|
|
audio = self.text_to_audio(img_prompt, audio_path) |
|
|
|
|
|
audio_paths.append(audio) |
|
|
|
except Exception as e: |
|
|
|
LOGGER.error(f"Error processing prompt '{img_prompt}': {e}") |
|
|
|
|
|
LOGGER.info("Images and audio generated successfully") |
|
return img_list, audio_paths |
|
|
|
def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None: |
|
""" |
|
Generate a video from a list of image files and corresponding audio files. |
|
|
|
Args: |
|
image_files (list): A list of local file paths to image files. |
|
audio_files (list): A list of local file paths to audio files. |
|
output_path (str): The local file path where the generated video will be saved. |
|
""" |
|
LOGGER.info("Creating video from images and audio") |
|
try: |
|
|
|
if len(image_files) != len(audio_files): |
|
|
|
LOGGER.error("Error: Number of images doesn't match the number of audio files.") |
|
return |
|
|
|
|
|
video_clips = [] |
|
|
|
|
|
for image_file, audio_file in zip(image_files, audio_files): |
|
LOGGER.info(f"Processing image: {image_file}, audio: {audio_file}") |
|
|
|
|
|
frame = cv2.imread(image_file) |
|
|
|
|
|
audio_clip = mp.AudioFileClip(audio_file) |
|
|
|
|
|
video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration) |
|
|
|
|
|
video_clip = video_clip.set_audio(audio_clip) |
|
|
|
|
|
video_clips.append(video_clip) |
|
|
|
|
|
final_clip = mp.concatenate_videoclips(video_clips) |
|
|
|
|
|
final_clip.write_videofile(output_path, codec='libx264', fps=24) |
|
|
|
LOGGER.info(f"Video created successfully at: {output_path}") |
|
|
|
except Exception as e: |
|
|
|
LOGGER.error(f"Error creating video: {e}") |
|
|
|
def generate_video(self, text: str, image_generator: str, image_model: str) -> str: |
|
""" |
|
Generate a video from a comma-separated string of text prompts using the selected AI model. |
|
|
|
Args: |
|
text (str): A comma-separated string of text prompts, where each prompt represents a scene or frame in the video. |
|
image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations). |
|
image_model (str): The specific model to use within the selected AI image generation service. |
|
|
|
Returns: |
|
str: The file path of the generated video file. Returns an empty string if an error occurred. |
|
""" |
|
LOGGER.info("Generating video from text") |
|
try: |
|
|
|
list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()] |
|
LOGGER.info(f"Prompts extracted from text: {list_prompts}") |
|
|
|
|
|
output_path = "output_video.mp4" |
|
|
|
|
|
img_list, audio_paths = self.get_images_and_audio(list_prompts, image_generator, image_model) |
|
|
|
|
|
self.create_video_from_images_and_audio(img_list, audio_paths, output_path) |
|
|
|
LOGGER.info(f"Video generated successfully: {output_path}") |
|
return output_path |
|
|
|
except Exception as e: |
|
|
|
LOGGER.error(f"Error generating video from text '{text}': {e}") |
|
return "" |
|
|
|
def gradio_interface(self): |
|
""" |
|
Creates a user-friendly Gradio interface for the video generation application. |
|
""" |
|
LOGGER.info("Launching Gradio interface") |
|
with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo: |
|
|
|
gr.HTML(""" |
|
<center><h1 style="color:#fff">Comics Video Generator</h1></center>""") |
|
|
|
|
|
with gr.Row(elem_id="col-container"): |
|
input_text = gr.Textbox(label="Comics Text", |
|
placeholder="Enter the comics text, separating scenes with double commas (,,)") |
|
|
|
|
|
with gr.Row(elem_id="col-container"): |
|
image_generator = gr.Dropdown(label="Image Generator", |
|
choices=["Hercai", "Prodia", "Pollinations"], |
|
value="Hercai", |
|
interactive=True) |
|
|
|
|
|
with gr.Row(elem_id="col-container"): |
|
image_model = gr.Dropdown(label="Image Model", |
|
choices=["v1", "v2", "v3", "simurg", "animefy", "raava", "shonin"], |
|
value="v3", |
|
interactive=True) |
|
|
|
|
|
with gr.Row(elem_id="col-container"): |
|
button = gr.Button("Generate Video") |
|
|
|
|
|
with gr.Row(elem_id="col-container"): |
|
output = gr.PlayableVideo() |
|
|
|
|
|
with gr.Row(elem_id="col-container"): |
|
example_txt = """Once upon a time there was a village. It was a nice place to live, except for one thing. People did not like to share.,, |
|
One day a visitor came to town. 'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,, |
|
'That's okay', said the visitor. 'I will make stone soup for everyone'. Then he took a stone and dropped it into a giant pot,,""" |
|
example = gr.Examples([example_txt], input_text) |
|
|
|
|
|
button.click(self.generate_video, [input_text, image_generator, image_model], output) |
|
|
|
LOGGER.info("Gradio interface launched successfully") |
|
|
|
demo.launch(debug=True) |
|
|
|
|
|
if __name__ == "__main__": |
|
LOGGER.info("Starting application") |
|
text2video = Text2Video() |
|
text2video.gradio_interface() |
|
|