Spaces:

AZLABS
/

Comic-2

Running

App Files Files Community

AZLABS commited on Nov 4, 2024

Commit

dffab80

verified ·

1 Parent(s): 5b09ba9

Update app.py

Browse files

Files changed (1) hide show

app.py +149 -62

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import json
 import urllib.request
@@ -11,101 +12,117 @@ import uuid
 import time
 import gradio as gr
-# Configure logging
-log_dir = os.getenv('LOG_DIRECTORY', './')
-LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')
 logging.basicConfig(
     filename=LOGGER_FILE_PATH,
-    filemode='a',
-    format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
-    datefmt='%Y-%b-%d %H:%M:%S'
 )
-LOGGER = logging.getLogger(__name__)
-log_level_env = os.getenv('LOG_LEVEL', 'INFO')
-log_level_dict = {
     'DEBUG': logging.DEBUG,
     'INFO': logging.INFO,
     'WARNING': logging.WARNING,
     'ERROR': logging.ERROR,
     'CRITICAL': logging.CRITICAL
 }
 if log_level_env in log_level_dict:
     log_level = log_level_dict[log_level_env]
 else:
     log_level = log_level_dict['INFO']
-LOGGER.setLevel(log_level)
 class Text2Video:
-    """A class to generate videos from text prompts."""
     def __init__(self) -> None:
         """
         Initialize the Text2Video class.
         """
-        self.herc = Hercai("")  # Replace with your Hercai API key if you have one
     def get_image(self, img_prompt: str) -> str:
         """
-        Generate an image based on the provided text prompt, ensuring the text is in speech bubbles.
         Args:
-            img_prompt (str): Text prompt for generating the image.
         Returns:
-            str: URL of the generated image.
         """
         try:
-            LOGGER.info(f"Generating image with prompt: {img_prompt}")
-            # Generate image using Hercai
-            # Modified prompt to include comic-style elements
-            modified_prompt = f"A comic book style image with speech bubbles containing the following text: '{img_prompt}'. Include vibrant colors and onomatopoeia where appropriate."
             image_result = self.herc.draw_image(model="simurg", prompt=modified_prompt, negative_prompt="Dark and gloomy")
             image_url = image_result["url"]
             LOGGER.info(f"Image generated successfully: {image_url}")
             return image_url
         except Exception as e:
             # Log any errors encountered during image generation
-            LOGGER.error(f"Error generating image: {e}")
             return ""
     def download_img_from_url(self, image_url: str, image_path: str) -> str:
         """
-        Download an image from a URL.
         Args:
-            image_url (str): URL of the image to download.
-            image_path (str): Path to save the downloaded image.
         Returns:
-            str: Path of the downloaded image.
         """
         try:
-            LOGGER.info(f"Downloading image from: {image_url}")
-            # Download the image from the provided URL and save it to the specified path
             urllib.request.urlretrieve(image_url, image_path)
             LOGGER.info(f"Image downloaded and saved to: {image_path}")
             return image_path
         except Exception as e:
             # Log any errors encountered during image download
-            LOGGER.error(f"Error downloading image from URL: {e}")
             return ""
     def text_to_audio(self, img_prompt: str, audio_path: str) -> str:
         """
-        Convert text to speech and save it as an audio file.
         Args:
-            img_prompt (str): Text to convert to speech.
-            audio_path (str): Path to save the audio file.
         Returns:
-            str: Path of the saved audio file.
         """
         try:
-            LOGGER.info(f"Converting text to audio: {img_prompt}")
             language = 'en'
             # Create a gTTS object to convert text to speech
@@ -113,117 +130,187 @@ class Text2Video:
             # Save the audio file at the specified path
             myobj.save(audio_path)
             LOGGER.info(f"Audio saved to: {audio_path}")
             return audio_path
         except Exception as e:
             # Log any errors encountered during text-to-audio conversion
-            LOGGER.error(f"Error converting text to audio: {e}")
             return ""
     def get_images_and_audio(self, list_prompts: list) -> tuple:
         """
-        Generate images and corresponding audio files from a list of prompts.
         Args:
-            list_prompts (list): List of text prompts.
         Returns:
-            tuple: A tuple containing lists of image paths and audio paths.
         """
-        img_list = []
-        audio_paths = []
         for img_prompt in list_prompts:
             try:
-                LOGGER.info(f"Processing prompt: {img_prompt}")
                 unique_id = uuid.uuid4().hex
                 image_path = f"{img_prompt[:9]}_{unique_id}.png"
                 img_url = self.get_image(img_prompt)
                 image = self.download_img_from_url(img_url, image_path)
                 img_list.append(image)
                 audio_path = f"{img_prompt[:9]}_{unique_id}.mp3"
                 audio = self.text_to_audio(img_prompt, audio_path)
                 audio_paths.append(audio)
             except Exception as e:
-                LOGGER.error(f"Error processing prompt: {img_prompt}, {e}")
         return img_list, audio_paths
     def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
         """
-        Create a video from images and corresponding audio files.
         Args:
-            image_files (list): List of image files.
-            audio_files (list): List of audio files.
-            output_path (str): Path to save the output video file.
         """
         try:
-            LOGGER.info(f"Creating video from images and audio, output path: {output_path}")
             if len(image_files) != len(audio_files):
                 LOGGER.error("Error: Number of images doesn't match the number of audio files.")
                 return
             video_clips = []
             for image_file, audio_file in zip(image_files, audio_files):
-                LOGGER.info(f"Processing image: {image_file} and audio: {audio_file}")
                 frame = cv2.imread(image_file)
                 audio_clip = mp.AudioFileClip(audio_file)
                 video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
                 video_clip = video_clip.set_audio(audio_clip)
                 video_clips.append(video_clip)
             final_clip = mp.concatenate_videoclips(video_clips)
             final_clip.write_videofile(output_path, codec='libx264', fps=24)
-            LOGGER.info("Video created successfully.")
         except Exception as e:
             # Log any errors encountered during video creation
             LOGGER.error(f"Error creating video: {e}")
     def generate_video(self, text: str) -> str:
         """
-        Generate a video from a list of text prompts.
         Args:
-            text (str): Text prompts separated by double commas.
         Returns:
-            str: Path to the generated video.
         """
         try:
-            LOGGER.info(f"Generating video from text: {text}")
             list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
             output_path = "output_video.mp4"
             img_list, audio_paths = self.get_images_and_audio(list_prompts)
             self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
             LOGGER.info(f"Video generated successfully: {output_path}")
             return output_path
         except Exception as e:
-            LOGGER.error(f"Error generating video: {e}")
             return ""
     def gradio_interface(self):
         with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo:
-            example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town.
-            'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,,
-            That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,"""
             gr.HTML("""
-                <center><h1 style="color:#fff">Comics Video Generator</h1></center>""")
             with gr.Row(elem_id="col-container"):
-                input_text = gr.Textbox(label="Comics Text", placeholder="Enter the comics by double comma separated")
             with gr.Row(elem_id="col-container"):
                 button = gr.Button("Generate Video")
             with gr.Row(elem_id="col-container"):
                 output = gr.PlayableVideo()
             with gr.Row(elem_id="col-container"):
                 example = gr.Examples([example_txt], input_text)
             button.click(self.generate_video, [input_text], output)
             demo.launch(debug=True)
 if __name__ == "__main__":
-    text2video = Text2Video()
-    text2video.gradio_interface()

 import os
 import json
 import urllib.request
 import time
 import gradio as gr
+# Configure detailed logging
+log_dir = os.getenv('LOG_DIRECTORY', './')  # Get log directory from environment variable, default to current directory
+LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')  # Construct the full path to the log file
 logging.basicConfig(
     filename=LOGGER_FILE_PATH,
+    filemode='a',  # Append to the log file
+    format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',  # Log format
+    datefmt='%Y-%b-%d %H:%M:%S'  # Date and time format
 )
+LOGGER = logging.getLogger(__name__)  # Get the logger instance
+log_level_env = os.getenv('LOG_LEVEL', 'INFO')  # Get log level from environment variable, default to INFO
+log_level_dict = {  # Dictionary mapping log level names to their corresponding numerical values
     'DEBUG': logging.DEBUG,
     'INFO': logging.INFO,
     'WARNING': logging.WARNING,
     'ERROR': logging.ERROR,
     'CRITICAL': logging.CRITICAL
 }
+# Set the log level based on the environment variable or default to INFO
 if log_level_env in log_level_dict:
     log_level = log_level_dict[log_level_env]
 else:
     log_level = log_level_dict['INFO']
+LOGGER.setLevel(log_level)  # Set the log level for the logger instance
 class Text2Video:
+    """
+    A class to generate videos from text prompts, with detailed logging and a user-friendly interface.
+    """
     def __init__(self) -> None:
         """
         Initialize the Text2Video class.
         """
+        LOGGER.info("Initializing Text2Video class")
+        self.herc = Hercai("")  # Replace "" with your actual Hercai API key if you have one
+        LOGGER.info("Hercai initialized successfully")
     def get_image(self, img_prompt: str) -> str:
         """
+        Generate an image from a text prompt using Hercai, with detailed logging and comic book styling.
         Args:
+            img_prompt (str): The text prompt to generate the image from.
         Returns:
+            str: The URL of the generated image. Returns an empty string if an error occurred.
         """
+        LOGGER.info(f"Generating image for prompt: {img_prompt}")
         try:
+            # Create a comic book style prompt
+            modified_prompt = f"Generate a comic book style image with speech bubbles containing the following text: '{img_prompt}'. " \
+                              f"Include elements like vibrant colors, onomatopoeia, and exaggerated expressions to enhance the comic book aesthetic."
+            # Log the modified prompt
+            LOGGER.info(f"Modified prompt for Hercai: {modified_prompt}")
+            # Generate the image using Hercai
             image_result = self.herc.draw_image(model="simurg", prompt=modified_prompt, negative_prompt="Dark and gloomy")
+            # Extract the image URL from the result
             image_url = image_result["url"]
+            # Log the generated image URL
             LOGGER.info(f"Image generated successfully: {image_url}")
             return image_url
         except Exception as e:
             # Log any errors encountered during image generation
+            LOGGER.error(f"Error generating image for prompt '{img_prompt}': {e}")
             return ""
     def download_img_from_url(self, image_url: str, image_path: str) -> str:
         """
+        Download an image from a URL to a local file path.
         Args:
+            image_url (str): The URL of the image to download.
+            image_path (str): The local file path to save the downloaded image.
         Returns:
+            str: The local file path of the downloaded image. Returns an empty string if an error occurred.
         """
+        LOGGER.info(f"Downloading image from URL: {image_url}")
         try:
+            # Download the image from the URL and save it to the specified path
             urllib.request.urlretrieve(image_url, image_path)
             LOGGER.info(f"Image downloaded and saved to: {image_path}")
             return image_path
         except Exception as e:
             # Log any errors encountered during image download
+            LOGGER.error(f"Error downloading image from URL '{image_url}': {e}")
             return ""
     def text_to_audio(self, img_prompt: str, audio_path: str) -> str:
         """
+        Convert text to speech using gTTS and save it as an audio file.
         Args:
+            img_prompt (str): The text to convert to speech.
+            audio_path (str): The local file path to save the generated audio file.
         Returns:
+            str: The local file path of the saved audio file. Returns an empty string if an error occurred.
         """
+        LOGGER.info(f"Converting text to audio: {img_prompt}")
         try:
+            # Set the language for speech synthesis (English in this case)
             language = 'en'
             # Create a gTTS object to convert text to speech
             # Save the audio file at the specified path
             myobj.save(audio_path)
             LOGGER.info(f"Audio saved to: {audio_path}")
             return audio_path
         except Exception as e:
             # Log any errors encountered during text-to-audio conversion
+            LOGGER.error(f"Error converting text '{img_prompt}' to audio: {e}")
             return ""
     def get_images_and_audio(self, list_prompts: list) -> tuple:
         """
+        Generate images and corresponding audio files for a list of text prompts.
         Args:
+            list_prompts (list): A list of text prompts.
         Returns:
+            tuple: A tuple containing two lists: image paths and audio paths.
         """
+        LOGGER.info("Generating images and audio for prompts")
+        img_list = []  # List to store image paths
+        audio_paths = []  # List to store audio paths
         for img_prompt in list_prompts:
+            LOGGER.info(f"Processing prompt: {img_prompt}")
             try:
+                # Generate a unique identifier for the image and audio files
                 unique_id = uuid.uuid4().hex
+                # Construct the image path using the unique identifier
                 image_path = f"{img_prompt[:9]}_{unique_id}.png"
+                # Generate the image URL using Hercai
                 img_url = self.get_image(img_prompt)
+                # Download the image from the generated URL
                 image = self.download_img_from_url(img_url, image_path)
+                # Add the image path to the list
                 img_list.append(image)
+                # Construct the audio path using the unique identifier
                 audio_path = f"{img_prompt[:9]}_{unique_id}.mp3"
+                # Convert the text to audio and save it
                 audio = self.text_to_audio(img_prompt, audio_path)
+                # Add the audio path to the list
                 audio_paths.append(audio)
             except Exception as e:
+                # Log any errors encountered during the process
+                LOGGER.error(f"Error processing prompt '{img_prompt}': {e}")
+        # Return the lists of image paths and audio paths
+        LOGGER.info("Images and audio generated successfully")
         return img_list, audio_paths
     def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
         """
+        Generate a video from a list of image files and corresponding audio files.
         Args:
+            image_files (list): A list of local file paths to image files.
+            audio_files (list): A list of local file paths to audio files.
+            output_path (str): The local file path where the generated video will be saved.
         """
+        LOGGER.info("Creating video from images and audio")
         try:
+            # Check if the number of images and audio files match
             if len(image_files) != len(audio_files):
+                # Log an error if the number of image files and audio files don't match
                 LOGGER.error("Error: Number of images doesn't match the number of audio files.")
                 return
+            # Create an empty list to store video clips
             video_clips = []
+            # Loop through each image file and corresponding audio file
             for image_file, audio_file in zip(image_files, audio_files):
+                LOGGER.info(f"Processing image: {image_file}, audio: {audio_file}")
+                # Read the image file using OpenCV
                 frame = cv2.imread(image_file)
+                # Load the audio clip using MoviePy
                 audio_clip = mp.AudioFileClip(audio_file)
+                # Create a video clip from the image and set its duration to the audio clip's duration
                 video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
+                # Set the audio for the video clip
                 video_clip = video_clip.set_audio(audio_clip)
+                # Append the video clip to the list of video clips
                 video_clips.append(video_clip)
+            # Concatenate all the video clips into a single video clip
             final_clip = mp.concatenate_videoclips(video_clips)
+            # Write the final video clip to a file using the specified output path
             final_clip.write_videofile(output_path, codec='libx264', fps=24)
+            LOGGER.info(f"Video created successfully at: {output_path}")
         except Exception as e:
             # Log any errors encountered during video creation
             LOGGER.error(f"Error creating video: {e}")
     def generate_video(self, text: str) -> str:
         """
+        Generate a video from a comma-separated string of text prompts
         Args:
+            text (str): A comma-separated string of text prompts, where each prompt represents a scene or frame in the video.
         Returns:
+            str: The file path of the generated video file. Returns an empty string if an error occurred.
         """
+        LOGGER.info("Generating video from text")
         try:
+            # Split the input text into a list of prompts
             list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
+            LOGGER.info(f"Prompts extracted from text: {list_prompts}")
+            # Define the output path for the generated video
             output_path = "output_video.mp4"
+            # Generate images and corresponding audio files for each prompt
             img_list, audio_paths = self.get_images_and_audio(list_prompts)
+            # Create the video from the generated images and audio files
             self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
             LOGGER.info(f"Video generated successfully: {output_path}")
             return output_path
         except Exception as e:
+            # Log any errors encountered during video generation
+            LOGGER.error(f"Error generating video from text '{text}': {e}")
             return ""
     def gradio_interface(self):
+        """
+        Creates a user-friendly Gradio interface for the video generation application.
+        """
+        LOGGER.info("Launching Gradio interface")
         with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo:
+            # Set the title of the application
             gr.HTML("""
+                    <center><h1 style="color:#fff">Comics Video Generator</h1></center>""")
+            # Create a text box for user input, allowing them to enter comic book text
             with gr.Row(elem_id="col-container"):
+                input_text = gr.Textbox(label="Comics Text",
+                                       placeholder="Enter the comics text, separating scenes with double commas (,,)")
+            # Create a button that triggers the video generation process
             with gr.Row(elem_id="col-container"):
                 button = gr.Button("Generate Video")
+            # Create a component to display the generated video
             with gr.Row(elem_id="col-container"):
                 output = gr.PlayableVideo()
+            # Provide an example to guide users on how to format their input
             with gr.Row(elem_id="col-container"):
+                example_txt = """Once upon a time there was a village. It was a nice place to live, except for one thing. People did not like to share.,,
+                One day a visitor came to town. 'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,,
+                'That's okay', said the visitor. 'I will make stone soup for everyone'. Then he took a stone and dropped it into a giant pot,,"""
                 example = gr.Examples([example_txt], input_text)
+            # Define the button's click event to call the generate_video function with the user's input
             button.click(self.generate_video, [input_text], output)
+            LOGGER.info("Gradio interface launched successfully")
+            # Launch the Gradio interface
             demo.launch(debug=True)
 if __name__ == "__main__":
+    LOGGER.info("Starting application")
+    text2video = Text2Video()  # Create an instance of the Text2Video class
+    text2video.gradio_interface()  # Launch the Gradio interface