Spaces:

AZLABS
/

Comic-2

Running

App Files Files Community

Comic-2 / app.py

AZLABS

Update app.py

248f920 verified 3 months ago

raw

history blame

17 kB

	import os
	import json
	import urllib.request
	from PIL import Image
	from gtts import gTTS
	import cv2
	import moviepy.editor as mp
	import logging
	from hercai import Hercai
	import uuid
	import time
	import gradio as gr
	import requests

	# Configure detailed logging
	log_dir = os.getenv('LOG_DIRECTORY', './') # Get log directory from environment variable, default to current directory
	LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log') # Construct the full path to the log file

	logging.basicConfig(
	filename=LOGGER_FILE_PATH,
	filemode='a', # Append to the log file
	format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s', # Log format
	datefmt='%Y-%b-%d %H:%M:%S' # Date and time format
	)
	LOGGER = logging.getLogger(__name__) # Get the logger instance

	log_level_env = os.getenv('LOG_LEVEL', 'INFO') # Get log level from environment variable, default to INFO
	log_level_dict = { # Dictionary mapping log level names to their corresponding numerical values
	'DEBUG': logging.DEBUG,
	'INFO': logging.INFO,
	'WARNING': logging.WARNING,
	'ERROR': logging.ERROR,
	'CRITICAL': logging.CRITICAL
	}
	# Set the log level based on the environment variable or default to INFO
	if log_level_env in log_level_dict:
	log_level = log_level_dict[log_level_env]
	else:
	log_level = log_level_dict['INFO']
	LOGGER.setLevel(log_level) # Set the log level for the logger instance


	class Text2Video:
	"""
	A class to generate videos from text prompts, with detailed logging, model selection, and a user-friendly interface.
	"""

	def __init__(self) -> None:
	"""
	Initialize the Text2Video class.
	"""
	LOGGER.info("Initializing Text2Video class")
	self.herc = Hercai("") # Replace "" with your actual Hercai API key if you have one
	LOGGER.info("Hercai initialized successfully")

	def get_image(self, img_prompt: str, image_generator: str, image_model: str) -> str:
	"""
	Generate an image from a text prompt using the selected AI model, with detailed logging and comic book styling.

	Args:
	img_prompt (str): The text prompt to generate the image from.
	image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations).
	image_model (str): The specific model to use within the selected AI image generation service.

	Returns:
	str: The URL of the generated image. Returns an empty string if an error occurred.
	"""
	LOGGER.info(f"Generating image for prompt: {img_prompt}")
	try:
	# Create a comic book style prompt
	modified_prompt = f"Generate a comic book style image with speech bubbles containing the following text: '{img_prompt}'. " \
	f"Include elements like vibrant colors, onomatopoeia, and exaggerated expressions to enhance the comic book aesthetic."
	# Log the modified prompt
	LOGGER.info(f"Modified prompt for {image_generator}: {modified_prompt}")
	image_url = ""

	if image_generator == "Hercai":
	# Log the selected Hercai model
	LOGGER.info(f"Using Hercai model: {image_model}")

	# Generate the image using Hercai
	image_result = self.herc.draw_image(model=image_model, prompt=modified_prompt, negative_prompt="Dark and gloomy")
	# Extract the image URL from the result
	image_url = image_result["url"]

	elif image_generator == "Prodia":
	# Log the selected Prodia model
	LOGGER.info(f"Using Prodia model: {image_model}")
	# Create the Prodia API call
	api_url = "https://api.prodia.com/v1/generate"
	payload = {
	"model": image_model,
	"prompt": modified_prompt,
	"negative_prompt": "Dark and gloomy"
	}
	headers = {
	"Authorization": "Bearer YOUR_PRODIA_API_KEY" # Replace YOUR_PRODIA_API_KEY with your actual Prodia API key
	}
	response = requests.post(api_url, json=payload, headers=headers)
	if response.status_code == 200:
	image_url = response.json()["url"]
	# Log the generated image URL
	LOGGER.info(f"Image generated successfully using Prodia: {image_url}")
	else:
	# Log an error if the Prodia API call failed
	LOGGER.error(f"Error generating image using Prodia: {response.text}")

	elif image_generator == "Pollinations":
	# Log the selected Pollinations model
	LOGGER.info(f"Using Pollinations model: {image_model}")
	# Implement Pollinations API call here, similar to Prodia
	# Replace the following placeholder with your Pollinations API call
	# ...

	# Log the generated image URL
	LOGGER.info(f"Image generated successfully: {image_url}")
	return image_url

	except Exception as e:
	# Log any errors encountered during image generation
	LOGGER.error(f"Error generating image for prompt '{img_prompt}' using {image_generator}: {e}")
	return ""

	def download_img_from_url(self, image_url: str, image_path: str) -> str:
	"""
	Download an image from a URL to a local file path.

	Args:
	image_url (str): The URL of the image to download.
	image_path (str): The local file path to save the downloaded image.

	Returns:
	str: The local file path of the downloaded image. Returns an empty string if an error occurred.
	"""
	LOGGER.info(f"Downloading image from URL: {image_url}")
	try:
	# Download the image from the URL and save it to the specified path
	urllib.request.urlretrieve(image_url, image_path)

	LOGGER.info(f"Image downloaded and saved to: {image_path}")
	return image_path

	except Exception as e:
	# Log any errors encountered during image download
	LOGGER.error(f"Error downloading image from URL '{image_url}': {e}")
	return ""

	def text_to_audio(self, img_prompt: str, audio_path: str) -> str:
	"""
	Convert text to speech using gTTS and save it as an audio file.

	Args:
	img_prompt (str): The text to convert to speech.
	audio_path (str): The local file path to save the generated audio file.

	Returns:
	str: The local file path of the saved audio file. Returns an empty string if an error occurred.
	"""
	LOGGER.info(f"Converting text to audio: {img_prompt}")
	try:
	# Set the language for speech synthesis (English in this case)
	language = 'en'

	# Create a gTTS object to convert text to speech
	myobj = gTTS(text=img_prompt, lang=language, slow=False)

	# Save the audio file at the specified path
	myobj.save(audio_path)

	LOGGER.info(f"Audio saved to: {audio_path}")
	return audio_path
	except Exception as e:
	# Log any errors encountered during text-to-audio conversion
	LOGGER.error(f"Error converting text '{img_prompt}' to audio: {e}")
	return ""

	def get_images_and_audio(self, list_prompts: list, image_generator: str, image_model: str) -> tuple:
	"""
	Generate images and corresponding audio files for a list of text prompts using the selected AI model.

	Args:
	list_prompts (list): A list of text prompts.
	image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations).
	image_model (str): The specific model to use within the selected AI image generation service.

	Returns:
	tuple: A tuple containing two lists: image paths and audio paths.
	"""
	LOGGER.info("Generating images and audio for prompts")
	img_list = [] # List to store image paths
	audio_paths = [] # List to store audio paths
	for img_prompt in list_prompts:
	LOGGER.info(f"Processing prompt: {img_prompt}")
	try:
	# Generate a unique identifier for the image and audio files
	unique_id = uuid.uuid4().hex

	# Construct the image path using the unique identifier
	image_path = f"{img_prompt[:9]}_{unique_id}.png"

	# Generate the image URL using the selected AI model
	img_url = self.get_image(img_prompt, image_generator, image_model)

	# Download the image from the generated URL
	image = self.download_img_from_url(img_url, image_path)

	# Add the image path to the list
	img_list.append(image)

	# Construct the audio path using the unique identifier
	audio_path = f"{img_prompt[:9]}_{unique_id}.mp3"

	# Convert the text to audio and save it
	audio = self.text_to_audio(img_prompt, audio_path)

	# Add the audio path to the list
	audio_paths.append(audio)

	except Exception as e:
	# Log any errors encountered during the process
	LOGGER.error(f"Error processing prompt '{img_prompt}': {e}")

	# Return the lists of image paths and audio paths
	LOGGER.info("Images and audio generated successfully")
	return img_list, audio_paths

	def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
	"""
	Generate a video from a list of image files and corresponding audio files.

	Args:
	image_files (list): A list of local file paths to image files.
	audio_files (list): A list of local file paths to audio files.
	output_path (str): The local file path where the generated video will be saved.
	"""
	LOGGER.info("Creating video from images and audio")
	try:
	# Check if the number of images and audio files match
	if len(image_files) != len(audio_files):
	# Log an error if the number of image files and audio files don't match
	LOGGER.error("Error: Number of images doesn't match the number of audio files.")
	return

	# Create an empty list to store video clips
	video_clips = []

	# Loop through each image file and corresponding audio file
	for image_file, audio_file in zip(image_files, audio_files):
	LOGGER.info(f"Processing image: {image_file}, audio: {audio_file}")

	# Read the image file using OpenCV
	frame = cv2.imread(image_file)

	# Load the audio clip using MoviePy
	audio_clip = mp.AudioFileClip(audio_file)

	# Create a video clip from the image and set its duration to the audio clip's duration
	video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)

	# Set the audio for the video clip
	video_clip = video_clip.set_audio(audio_clip)

	# Append the video clip to the list of video clips
	video_clips.append(video_clip)

	# Concatenate all the video clips into a single video clip
	final_clip = mp.concatenate_videoclips(video_clips)

	# Write the final video clip to a file using the specified output path
	final_clip.write_videofile(output_path, codec='libx264', fps=24)

	LOGGER.info(f"Video created successfully at: {output_path}")

	except Exception as e:
	# Log any errors encountered during video creation
	LOGGER.error(f"Error creating video: {e}")

	def generate_video(self, text: str, image_generator: str, image_model: str) -> str:
	"""
	Generate a video from a comma-separated string of text prompts using the selected AI model.

	Args:
	text (str): A comma-separated string of text prompts, where each prompt represents a scene or frame in the video.
	image_generator (str): The name of the AI image generation service (Hercai, Prodia, or Pollinations).
	image_model (str): The specific model to use within the selected AI image generation service.

	Returns:
	str: The file path of the generated video file. Returns an empty string if an error occurred.
	"""
	LOGGER.info("Generating video from text")
	try:
	# Split the input text into a list of prompts
	list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
	LOGGER.info(f"Prompts extracted from text: {list_prompts}")

	# Define the output path for the generated video
	output_path = "output_video.mp4"

	# Generate images and corresponding audio files for each prompt using the selected AI model
	img_list, audio_paths = self.get_images_and_audio(list_prompts, image_generator, image_model)

	# Create the video from the generated images and audio files
	self.create_video_from_images_and_audio(img_list, audio_paths, output_path)

	LOGGER.info(f"Video generated successfully: {output_path}")
	return output_path

	except Exception as e:
	# Log any errors encountered during video generation
	LOGGER.error(f"Error generating video from text '{text}': {e}")
	return ""

	def gradio_interface(self):
	"""
	Creates a user-friendly Gradio interface for the video generation application.
	"""
	LOGGER.info("Launching Gradio interface")
	with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo:
	# Set the title of the application
	gr.HTML("""
	<center><h1 style="color:#fff">Comics Video Generator</h1></center>""")

	# Create a text box for user input, allowing them to enter comic book text
	with gr.Row(elem_id="col-container"):
	input_text = gr.Textbox(label="Comics Text",
	placeholder="Enter the comics text, separating scenes with double commas (,,)")

	# Create a dropdown menu for selecting the AI image generation service
	with gr.Row(elem_id="col-container"):
	image_generator = gr.Dropdown(label="Image Generator",
	choices=["Hercai", "Prodia", "Pollinations"],
	value="Hercai",
	interactive=True)

	# Create a dropdown menu for selecting the specific model within the chosen service
	with gr.Row(elem_id="col-container"):
	image_model = gr.Dropdown(label="Image Model",
	choices=["v1", "v2", "v3", "simurg", "animefy", "raava", "shonin"],
	value="v3",
	interactive=True)

	# Create a button that triggers the video generation process
	with gr.Row(elem_id="col-container"):
	button = gr.Button("Generate Video")

	# Create a component to display the generated video
	with gr.Row(elem_id="col-container"):
	output = gr.PlayableVideo()

	# Provide an example to guide users on how to format their input
	with gr.Row(elem_id="col-container"):
	example_txt = """Once upon a time there was a village. It was a nice place to live, except for one thing. People did not like to share.,,
	One day a visitor came to town. 'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,,
	'That's okay', said the visitor. 'I will make stone soup for everyone'. Then he took a stone and dropped it into a giant pot,,"""
	example = gr.Examples([example_txt], input_text)

	# Define the button's click event to call the generate_video function with the user's input and model selection
	button.click(self.generate_video, [input_text, image_generator, image_model], output)

	LOGGER.info("Gradio interface launched successfully")
	# Launch the Gradio interface
	demo.launch(debug=True)


	if __name__ == "__main__":
	LOGGER.info("Starting application")
	text2video = Text2Video() # Create an instance of the Text2Video class
	text2video.gradio_interface() # Launch the Gradio interface