import os
import json
import urllib.request
from PIL import Image
import cv2
import moviepy.editor as mp
import logging
import requests
import uuid
import time
import gradio as gr
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
from hercai import Hercai  # Import the hercai library
from gtts import gTTS  # Import gTTS for text-to-speech
from hercai import Hercai  # Import the hercai module

# Configure logging for both file and console
log_dir = os.getenv('LOG_DIRECTORY', './')
LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')

# Create a formatter
formatter = logging.Formatter(
    '[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
    datefmt='%Y-%b-%d %H:%M:%S'
)

# Create and configure the logger
LOGGER = logging.getLogger(__name__)

# Create file handler
file_handler = logging.FileHandler(LOGGER_FILE_PATH, mode='a')
file_handler.setFormatter(formatter)

# Create console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)

# Add both handlers to logger
LOGGER.addHandler(file_handler)
LOGGER.addHandler(console_handler)

# Set log level
log_level_env = os.getenv('LOG_LEVEL', 'INFO')
log_level_dict = {
    'DEBUG': logging.DEBUG,
    'INFO': logging.INFO,
    'WARNING': logging.WARNING,
    'ERROR': logging.ERROR,
    'CRITICAL': logging.CRITICAL
}
if log_level_env in log_level_dict:
    log_level = log_level_dict[log_level_env]
else:
    log_level = log_level_dict['INFO']
LOGGER.setLevel(log_level)

# Set the path to the ImageMagick binary
# os.environ["IMAGE_MAGICK_BINARY"] = "/usr/local/bin/convert"  # Update this path as needed (No longer needed)


class Text2Video:
    """A class to generate videos from text prompts."""

    def __init__(self) -> None:
        """Initialize the Text2Video class."""
        self.hercai_api_key = "YOUR_HERCAI_API_KEY"  # Replace with your Hercai API key
        self.hercai = Hercai(self.hercai_api_key)  # Initialize Hercai
        self.hercai_base_url = "https://hercai.onrender.com/v3/text2image"
        LOGGER.info("🚀 Text2Video class initialized.")

    def generate_text(self, prompt: str) -> str:
        """Generate text using GPT-3.5-turbo."""
        LOGGER.info(f"📝 Generating text for prompt: {prompt}")
        # ... (This part is not needed for Hercai) ...
        return ""

    def get_image(self, img_prompt: str) -> str:
        """Generate an image based on the provided text prompt."""
        LOGGER.info(f"🖼️ Generating image for prompt: {img_prompt}")
        try:
            image_result = self.hercai.draw_image(
                model="simurg",  # Choose a Hercai model
                prompt=img_prompt,
                negative_prompt="Dark and gloomy"
            )
            image_url = image_result['url']
            LOGGER.info(f"✅ Generated image URL: {image_url}")
            return image_url
        except requests.exceptions.RequestException as e:
            LOGGER.error(f"❌ Error generating image: {str(e)}")
            return ""

    def download_img_from_url(self, image_url: str, image_path: str) -> str:
        """Download an image from a URL."""
        LOGGER.info(f"⬇️ Downloading image from URL: {image_url} to path: {image_path}")
        try:
            urllib.request.urlretrieve(image_url, image_path)
            # Resize after downloading
            img = Image.open(image_path)
            img = img.resize((640, 480))  # Adjust resolution for reduced size
            img.save(image_path)
            LOGGER.info(f"✅ Image downloaded to: {image_path}")
            return image_path
        except Exception as e:
            LOGGER.error(f"❌ Error downloading image from URL: {e}")
            return ""

    def text_to_audio(self, text: str, audio_path: str) -> str:
        """Convert text to speech using gTTS."""
        LOGGER.info(f"🔊 Converting text to audio for text: {text}")
        try:
            tts = gTTS(text=text, lang='en')  # You can change the language ('en' for English)
            tts.save(audio_path, bitrate="128k")  # Reduce `bitrate` for smaller file size
            LOGGER.info(f"✅ Audio saved to: {audio_path}")
            return audio_path
        except Exception as e:
            LOGGER.error(f"❌ Error generating speech: {str(e)}")
            return ""

    #  The transcription part has been removed as it's no longer needed

    def get_images_and_audio(self, list_prompts: list) -> tuple:
        """Generate images and corresponding audio files from a list of prompts."""
        LOGGER.info(f"🖼️🔊 Generating images and audio for prompts: {list_prompts}")
        img_list = []
        audio_paths = []
        for img_prompt in list_prompts:
            try:
                unique_id = uuid.uuid4().hex
                image_path = f"{img_prompt[:9]}_{unique_id}.png"
                img_url = self.get_image(img_prompt)

                if img_url:  # Only process if img_url exists & is not None
                    image = self.download_img_from_url(img_url, image_path)
                    img_list.append(image)

                audio_path = f"{img_prompt[:9]}_{unique_id}.mp3"
                audio = self.text_to_audio(img_prompt, audio_path)

                if audio:  # Only process if audio exists 
                    audio_paths.append(audio)
                LOGGER.info(f"✅ Processed prompt: {img_prompt}, Image: {image}, Audio: {audio}")

            except Exception as e:
                LOGGER.error(f"❌ Error processing prompt: {img_prompt}, {e}")

        return img_list, audio_paths
        

    def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
        """Create a video from images and corresponding audio files with pop-up bubbles."""
        LOGGER.info(f"🎥 Creating video from images: {image_files}, audio files: {audio_files}")
        try:
            if len(image_files) != len(audio_files):
                LOGGER.error("❌ Error: Number of images and audio files don't match.")
                return

            video_clips = []

            for image_file, audio_file in zip(image_files, audio_files):
                # Create the base video from the image and audio
                if not os.path.exists(audio_file) or not os.path.exists(image_file):
                    LOGGER.error(f"❌ Audio or image file missing for {image_file} or {audio_file}")
                    continue

                audio_clip = mp.AudioFileClip(audio_file)
                video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
                video_clip = video_clip.set_audio(audio_clip)

                # Apply image cropping and bubble creation
                try:
                    img = Image.open(image_file)
                    width, height = img.size
                    cropped_image = img.crop((0, 0, int(width * 0.80), height))  # Cropping image to make space for bubble

                    bubble_clip = mp.ImageClip(image_file).resize((150, 150))
                    bubble_clip = bubble_clip.set_position((int(width * 0.90), 0))

                    # Now combine the original video with the bubble
                    video_clip = CompositeVideoClip([video_clip, bubble_clip])                
                except Exception as ex:
                    LOGGER.error(f"❌ Error adding comic bubble: {str(ex)}")

                video_clips.append(video_clip)
                LOGGER.info(f"✅ Created video clip for image: {image_file}, audio: {audio_file}")

            # Combine the generated clips into a single video
            if video_clips:  # Only process if clip exists
                final_clip = mp.concatenate_videoclips(video_clips)
                final_clip.write_videofile(output_path, codec='libx264', fps=24) # Optimized codec and fps settings 
                LOGGER.info(f"✅ Video created successfully at: {output_path}")

        except Exception as e:
            LOGGER.error(f"❌ Error creating video: {str(e)}")

    def generate_video(self, text: str) -> str:
        """
        Generate a video from a list of text prompts.
        Args:
            text (str): Text prompts separated by double commas.
        Returns:
            str: Path to the generated video file.
        """
        LOGGER.info(f"🎬 Generating video for text: {text}")
        try:
            list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
            LOGGER.info(f"📝 List of prompts: {list_prompts}")

            # Set the output path for the generated video
            output_path = f"output_video_{uuid.uuid4().hex[:8]}.mp4"
            LOGGER.info(f"📁 Output path for video: {output_path}")

            # Generate images and audio
            img_list, audio_paths = self.get_images_and_audio(list_prompts)

            # Create video from images and audio
            self.create_video_from_images_and_audio(img_list, audio_paths, output_path)

            return output_path
        except Exception as e:
            LOGGER.error(f"❌ Error generating video: {str(e)}")
            return ""

    def gradio_interface(self):
        """Create and launch the Gradio interface."""
        LOGGER.info("🌐 Launching Gradio interface.")
        with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo:
            example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town. 
            'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,,
            That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,"""

            gr.HTML("""
                <center><h1 style="color:#fff">Comics Video Generator</h1></center>""")

            with gr.Row(elem_id="col-container"):
                input_text = gr.Textbox(label="Comics Text", placeholder="Enter the comics by double comma separated")

            with gr.Row(elem_id="col-container"):
                button = gr.Button("Generate Video")

            with gr.Row(elem_id="col-container"):
                output = gr.Video()

            with gr.Row(elem_id="col-container"):
                example = gr.Examples([example_txt], input_text)

            button.click(self.generate_video, [input_text], output)

        demo.launch(debug=True)
        LOGGER.info("✅ Gradio interface launched.")


if __name__ == "__main__":
    LOGGER.info("🚀 Starting Text2Video application.")
    text2video = Text2Video()
    text2video.gradio_interface()