Spaces:

prithivMLmods
/

Imagineo-Chat

Running on Zero

App Files Files Community

prithivMLmods commited on 5 days ago

Commit

786abd0

verified ·

1 Parent(s): eb72d75

Update app.py

Browse files

Files changed (1) hide show

app.py +259 -454

app.py CHANGED Viewed

@@ -2,15 +2,71 @@ import os
 import random
 import uuid
 import json
 import gradio as gr
-import numpy as np
-from PIL import Image
 import spaces
 import torch
-from diffusers import DiffusionPipeline, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
-from typing import Tuple
-# Load restricted words
 bad_words = json.loads(os.getenv('BAD_WORDS', "[]"))
 bad_words_negative = json.loads(os.getenv('BAD_WORDS_NEGATIVE', "[]"))
 default_negative = os.getenv("default_negative", "")
@@ -24,218 +80,11 @@ def check_text(prompt, negative=""):
             return True
     return False
-# Quality/Style--------------------------------------------------------------------
-style_list = [
-    {
-        "name": "3840 x 2160",
-        "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
-        "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
-    },
-    {
-        "name": "2560 x 1440",
-        "prompt": "hyper-realistic 4K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
-        "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
-    },
-    {
-        "name": "HD+",
-        "prompt": "hyper-realistic 2K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
-        "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
-    },
-    {
-        "name": "Style Zero",
-        "prompt": "{prompt}",
-        "negative_prompt": "",
-    },
-]
-# Collage styles--------------------------------------------------------------------
-collage_style_list = [
-    {
-        "name": "Hi-Res",
-        "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
-        "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
-    },
-    {
-        "name": "B & W",
-        "prompt": "black and white collage of {prompt}. monochromatic, timeless, classic, dramatic contrast",
-        "negative_prompt": "colorful, vibrant, bright, flashy",
-    },
-    {
-        "name": "Polaroid",
-        "prompt": "collage of polaroid photos featuring {prompt}. vintage style, high contrast, nostalgic, instant film aesthetic",
-        "negative_prompt": "digital, modern, low quality, blurry",
-    },
-    {
-        "name": "Watercolor",
-        "prompt": "watercolor collage of {prompt}. soft edges, translucent colors, painterly effects",
-        "negative_prompt": "digital, sharp lines, solid colors",
-    },
-    {
-        "name": "Cinematic",
-        "prompt": "cinematic collage of {prompt}. film stills, movie posters, dramatic lighting",
-        "negative_prompt": "static, lifeless, mundane",
-    },
-    {
-        "name": "Nostalgic",
-        "prompt": "nostalgic collage of {prompt}. retro imagery, vintage objects, sentimental journey",
-        "negative_prompt": "contemporary, futuristic, forward-looking",
-    },
-    {
-        "name": "Vintage",
-        "prompt": "vintage collage of {prompt}. aged paper, sepia tones, retro imagery, antique vibes",
-        "negative_prompt": "modern, contemporary, futuristic, high-tech",
-    },
-    {
-        "name": "Scrapbook",
-        "prompt": "scrapbook style collage of {prompt}. mixed media, hand-cut elements, textures, paper, stickers, doodles",
-        "negative_prompt": "clean, digital, modern, low quality",
-    },
-    {
-        "name": "NeoNGlow",
-        "prompt": "neon glow collage of {prompt}. vibrant colors, glowing effects, futuristic vibes",
-        "negative_prompt": "dull, muted colors, vintage, retro",
-    },
-    {
-        "name": "Geometric",
-        "prompt": "geometric collage of {prompt}. abstract shapes, colorful, sharp edges, modern design, high quality",
-        "negative_prompt": "blurry, low quality, traditional, dull",
-    },
-    {
-        "name": "Thematic",
-        "prompt": "thematic collage of {prompt}. cohesive theme, well-organized, matching colors, creative layout",
-        "negative_prompt": "random, messy, unorganized, clashing colors",
-    },
-    {
-        "name": "Cherry",
-        "prompt": "Duotone style Cherry tone applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Fuchsia",
-        "prompt": "Duotone style Fuchsia tone applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Pop",
-        "prompt": "Duotone style Pop tone applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Violet",
-        "prompt": "Duotone style Violet applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Sea Blue",
-        "prompt": "Duotone style Sea Blue applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Sea Green",
-        "prompt": "Duotone style Sea Green applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Mustard",
-        "prompt": "Duotone style Mustard applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Amber",
-        "prompt": "Duotone style Amber applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Pomelo",
-        "prompt": "Duotone style Pomelo applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Peppermint",
-        "prompt": "Duotone style Peppermint applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Mystic",
-        "prompt": "Duotone style Mystic tone applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Pastel",
-        "prompt": "Duotone style Pastel applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "Coral",
-        "prompt": "Duotone style Coral applied to {prompt}",
-        "negative_prompt": "",
-    },
-    {
-        "name": "No Style",
-        "prompt": "{prompt}",
-        "negative_prompt": "",
-    },
-]
-# Filters--------------------------------------------------------------------
-filters = {
-    "Vivid": {
-        "prompt": "extra vivid {prompt}",
-        "negative_prompt": "washed out, dull"
-    },
-    "Playa": {
-        "prompt": "{prompt} set in a vast playa",
-        "negative_prompt": "forest, mountains"
-    },
-    "Desert": {
-        "prompt": "{prompt} set in a desert landscape",
-        "negative_prompt": "ocean, city"
-    },
-    "West": {
-        "prompt": "{prompt} with a western theme",
-        "negative_prompt": "eastern, modern"
-    },
-    "Blush": {
-        "prompt": "{prompt} with a soft blush color palette",
-        "negative_prompt": "harsh colors, neon"
-    },
-    "Minimalist": {
-        "prompt": "{prompt} with a minimalist design",
-        "negative_prompt": "cluttered, ornate"
-    },
-    "Zero filter": {
-        "prompt": "{prompt}",
-        "negative_prompt": ""
-    },
-}
-styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
-collage_styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in collage_style_list}
-filter_styles = {k: (v["prompt"], v["negative_prompt"]) for k, v in filters.items()}
-STYLE_NAMES = list(styles.keys())
-COLLAGE_STYLE_NAMES = list(collage_styles.keys())
-FILTER_NAMES = list(filters.keys())
-DEFAULT_STYLE_NAME = "3840 x 2160"
-DEFAULT_COLLAGE_STYLE_NAME = "Hi-Res"
-DEFAULT_FILTER_NAME = "Zero filter"
-def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
-    if style_name in styles:
-        p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
-    elif style_name in collage_styles:
-        p, n = collage_styles.get(style_name, collage_styles[DEFAULT_COLLAGE_STYLE_NAME])
-    elif style_name in filter_styles:
-        p, n = filter_styles.get(style_name, filter_styles[DEFAULT_FILTER_NAME])
-    else:
-        p, n = styles[DEFAULT_STYLE_NAME]
-    if not negative:
-        negative = ""
-    return p.replace("{prompt}", positive), n + negative
-if not torch.cuda.is_available():
-    DESCRIPTION = "\n<p>⚠️Running on CPU, This may not work on CPU.</p>"
 MAX_SEED = np.iinfo(np.int32).max
 CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
@@ -243,53 +92,46 @@ MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Set dtype based on device: half for CUDA, float32 for CPU
 dtype = torch.float16 if device.type == "cuda" else torch.float32
-# Load primary model (RealVisXL_V5.0_Lightning)
 if torch.cuda.is_available():
     pipe = StableDiffusionXLPipeline.from_pretrained(
-        #"SG161222/RealVisXL_V5.0",
         "SG161222/RealVisXL_V5.0_Lightning",
         torch_dtype=dtype,
         use_safetensors=True,
         add_watermarker=False
     ).to(device)
-    # Ensure text encoder uses half precision on GPU
     pipe.text_encoder = pipe.text_encoder.half()
     if ENABLE_CPU_OFFLOAD:
         pipe.enable_model_cpu_offload()
     else:
         pipe.to(device)
         print("Loaded RealVisXL_V5.0_Lightning on Device!")
     if USE_TORCH_COMPILE:
         pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
         print("Model RealVisXL_V5.0_Lightning Compiled!")
-    # Load second model (RealVisXL_V4.0)
     pipe2 = StableDiffusionXLPipeline.from_pretrained(
-        #"SG161222/RealVisXL_V4.0",
         "SG161222/RealVisXL_V4.0_Lightning",
         torch_dtype=dtype,
         use_safetensors=True,
         add_watermarker=False,
     ).to(device)
     pipe2.text_encoder = pipe2.text_encoder.half()
     if ENABLE_CPU_OFFLOAD:
         pipe2.enable_model_cpu_offload()
     else:
         pipe2.to(device)
         print("Loaded RealVisXL_V4.0 on Device!")
     if USE_TORCH_COMPILE:
         pipe2.unet = torch.compile(pipe2.unet, mode="reduce-overhead", fullgraph=True)
         print("Model RealVisXL_V4.0 Compiled!")
-    # Load third model
     pipe3 = StableDiffusionXLPipeline.from_pretrained(
         "SG161222/RealVisXL_V3.0_Turbo",
         torch_dtype=dtype,
@@ -297,18 +139,15 @@ if torch.cuda.is_available():
         add_watermarker=False,
     ).to(device)
     pipe3.text_encoder = pipe3.text_encoder.half()
     if ENABLE_CPU_OFFLOAD:
         pipe3.enable_model_cpu_offload()
     else:
         pipe3.to(device)
         print("Loaded Animagine XL 4.0 on Device!")
     if USE_TORCH_COMPILE:
         pipe3.unet = torch.compile(pipe3.unet, mode="reduce-overhead", fullgraph=True)
         print("Model Animagine XL 4.0 Compiled!")
 else:
-    # On CPU, load all models in float32
     pipe = StableDiffusionXLPipeline.from_pretrained(
         "SG161222/RealVisXL_V5.0_Lightning",
         torch_dtype=dtype,
@@ -329,7 +168,7 @@ else:
     ).to(device)
     print("Running on CPU; models loaded in float32.")
-# A dictionary to easily choose the model based on selection.
 DEFAULT_MODEL = "Lightning 5"
 MODEL_CHOICES = [DEFAULT_MODEL, "Lightning 4", "Turbo v3"]
 models = {
@@ -338,266 +177,232 @@ models = {
     "Turbo v3": pipe3
 }
-def save_image(img, path):
-    img.save(path)
-def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    return seed
-@spaces.GPU
-def generate(
-    prompt: str,
-    negative_prompt: str = "",
-    use_negative_prompt: bool = False,
-    style: str = DEFAULT_STYLE_NAME,
-    collage_style: str = DEFAULT_COLLAGE_STYLE_NAME,
-    filter_name: str = DEFAULT_FILTER_NAME,
-    grid_size: str = "2x2",
-    seed: int = 0,
-    width: int = 1024,
-    height: int = 1024,
-    guidance_scale: float = 3,
-    randomize_seed: bool = False,
-    model_choice: str = DEFAULT_MODEL,
-    use_resolution_binning: bool = True,
-    progress=gr.Progress(track_tqdm=True),
-):
-    if check_text(prompt, negative_prompt):
         raise ValueError("Prompt contains restricted words.")
-    if collage_style != "No Style":
-        prompt, negative_prompt = apply_style(collage_style, prompt, negative_prompt)
-    elif filter_name != "No Filter":
-        prompt, negative_prompt = apply_style(filter_name, prompt, negative_prompt)
-    else:
-        prompt, negative_prompt = apply_style(style, prompt, negative_prompt)
     seed = int(randomize_seed_fn(seed, randomize_seed))
     generator = torch.Generator(device=device).manual_seed(seed)
-    if not use_negative_prompt:
-        negative_prompt = ""
-    negative_prompt += default_negative
     grid_sizes = {
         "2x1": (2, 1),
         "1x2": (1, 2),
         "2x2": (2, 2),
-        "2x3": (2, 3),
-        "3x2": (3, 2),
         "1x1": (1, 1)
     }
-    grid_size_x, grid_size_y = grid_sizes.get(grid_size, (2, 2))
-    num_images = grid_size_x * grid_size_y
     options = {
         "prompt": prompt,
-        "negative_prompt": negative_prompt,
         "width": width,
         "height": height,
         "guidance_scale": guidance_scale,
         "num_inference_steps": 30,
         "generator": generator,
         "num_images_per_prompt": num_images,
-        "use_resolution_binning": use_resolution_binning,
         "output_type": "pil",
     }
     if device.type == "cuda":
         torch.cuda.empty_cache()
-    # Choose pipeline based on user selection
     selected_pipe = models.get(model_choice, pipe)
     images = selected_pipe(**options).images
-    grid_img = Image.new('RGB', (width * grid_size_x, height * grid_size_y))
     for i, img in enumerate(images[:num_images]):
-        grid_img.paste(img, (i % grid_size_x * width, i // grid_size_x * height))
     unique_name = str(uuid.uuid4()) + ".png"
-    save_image(grid_img, unique_name)
     return [unique_name], seed
-examples = [
-    "Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic oil --ar 2:3 --q 2 --s 750 --v 5",
-    "3d image, cute girl, in the style of Pixar --ar 1:2 --stylize 750, 4K resolution highlights, Sharp focus, octane render, ray tracing, Ultra-High-Definition, 8k, UHD, HDR, (Masterpiece:1.5), (best quality:1.5)",
-    "Cold coffee in a cup bokeh --ar 85:128 --v 6.0 --style raw5, 4k hdr, retro",
-    "Super Realism, High-resolution photograph, woman, UHD, photorealistic, shot on a Sony A7III --chaos 20 --ar 1:2 --style raw --stylize 250 --realism --soft"
-]
 css = '''
-.gradio-container {
-    max-width: 888px !important;
-    margin: 0 auto !important;
-    display: flex;
-    flex-direction: column;
-    align-items: center;
-}
 h1 {
-    text-align: center;
 }
-'''
-title = """<h1 align="center">IMAGINEO 4K : SDXL🔥</h1>
-<p><center>
-<a href="https://huggingface.co/SG161222/RealVisXL_V4.0_Lightning" target="_blank">[Lightning 4]</a>
-<a href="https://huggingface.co/SG161222/RealVisXL_V5.0_Lightning" target="_blank">[Lightning 5]</a>
-<a href="https://huggingface.co/SG161222/RealVisXL_V3.0_Turbo" target="_blank">[Turbo v3]</a>
-</center></p>
-"""
-with gr.Blocks(theme="YTheme/Minecraft", css=css) as demo:
-    gr.HTML(title)
-    with gr.Row():
-        with gr.Column(scale=1):
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Generate Image ( 1024 x 1024 ) 🧤", scale=0)
-            with gr.Row(visible=True):
-                model_selection = gr.Dropdown(
-                    choices=MODEL_CHOICES,
-                    value=DEFAULT_MODEL,
-                    label="Model Selection",
-                )
-            with gr.Row(visible=True):
-                grid_size_selection = gr.Dropdown(
-                    choices=["2x1", "1x2", "2x2", "2x3", "3x2", "1x1"],
-                    value="1x1",
-                    label="Grid Size"
-                )
-            with gr.Row(visible=True):
-                filter_selection = gr.Dropdown(
-                    show_label=True,
-                    container=True,
-                    interactive=True,
-                    choices=FILTER_NAMES,
-                    value=DEFAULT_FILTER_NAME,
-                    label="Filter Type",
-                )
-            with gr.Row(visible=True):
-                collage_style_selection = gr.Dropdown(
-                    show_label=True,
-                    container=True,
-                    interactive=True,
-                    choices=COLLAGE_STYLE_NAMES,
-                    value=DEFAULT_COLLAGE_STYLE_NAME,
-                    label="Collage Template + Duotone Canvas",
-                )
-            with gr.Row(visible=True):
-                style_selection = gr.Dropdown(
-                    show_label=True,
-                    container=True,
-                    interactive=True,
-                    choices=STYLE_NAMES,
-                    value=DEFAULT_STYLE_NAME,
-                    label="Quality Style",
-                )
-            with gr.Accordion("Advanced options", open=False):
-                use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True, visible=True)
-                negative_prompt = gr.Text(
-                    label="Negative prompt",
-                    max_lines=1,
-                    placeholder="Enter a negative prompt",
-                    value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
-                    visible=True,
-                )
-                with gr.Row():
-                    num_inference_steps = gr.Slider(
-                        label="Steps",
-                        minimum=10,
-                        maximum=60,
-                        step=1,
-                        value=30,
-                    )
-                with gr.Row():
-                    num_images_per_prompt = gr.Slider(
-                        label="Images",
-                        minimum=1,
-                        maximum=5,
-                        step=1,
-                        value=2,
-                    )
-                seed = gr.Slider(
-                    label="Seed",
-                    minimum=0,
-                    maximum=MAX_SEED,
-                    step=1,
-                    value=0,
-                    visible=True
-                )
-                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                with gr.Row(visible=True):
-                    width = gr.Slider(
-                        label="Width",
-                        minimum=512,
-                        maximum=2048,
-                        step=64,
-                        value=1024,
-                    )
-                    height = gr.Slider(
-                        label="Height",
-                        minimum=512,
-                        maximum=2048,
-                        step=64,
-                        value=1024,
-                    )
-                with gr.Row():
-                    guidance_scale = gr.Slider(
-                        label="Guidance Scale",
-                        minimum=0.1,
-                        maximum=20.0,
-                        step=0.1,
-                        value=6,
-                    )
-        with gr.Column(scale=2):
-            result = gr.Gallery(label="Result", columns=1, show_label=False)
-            gr.Examples(
-                examples=examples,
-                inputs=prompt,
-                outputs=[result, seed],
-                fn=generate,
-                cache_examples=CACHE_EXAMPLES,
-            )
-        use_negative_prompt.change(
-            fn=lambda x: gr.update(visible=x),
-            inputs=use_negative_prompt,
-            outputs=negative_prompt,
-            api_name=False,
-        )
-    gr.on(
-        triggers=[
-            prompt.submit,
-            negative_prompt.submit,
-            run_button.click,
-        ],
-        fn=generate,
-        inputs=[
-            prompt,
-            negative_prompt,
-            use_negative_prompt,
-            style_selection,
-            collage_style_selection,
-            filter_selection,
-            grid_size_selection,
-            seed,
-            width,
-            height,
-            guidance_scale,
-            randomize_seed,
-            model_selection,
-        ],
-        outputs=[result, seed],
-        api_name="run",
-    )
 if __name__ == "__main__":
-    demo.queue(max_size=40).launch()

 import random
 import uuid
 import json
+import time
+import asyncio
+import re
+from threading import Thread
 import gradio as gr
 import spaces
 import torch
+import numpy as np
+from PIL import Image
+import edge_tts
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TextIteratorStreamer,
+    Qwen2VLForConditionalGeneration,
+    AutoProcessor,
+)
+from transformers.image_utils import load_image
+from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# Load text-only model and tokenizer for chat generation
+model_id = "prithivMLmods/FastThink-0.5B-Tiny"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+model.eval()
+# TTS Voices and processor for multimodal chat
+TTS_VOICES = [
+    "en-US-JennyNeural",  # @tts1
+    "en-US-GuyNeural",    # @tts2
+]
+MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
+processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
+model_m = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_ID_VL,
+    trust_remote_code=True,
+    torch_dtype=torch.float16
+).to("cuda").eval()
+# A helper function to convert text to speech via Edge TTS
+async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
+    communicate = edge_tts.Communicate(text, voice)
+    await communicate.save(output_file)
+    return output_file
+def clean_chat_history(chat_history):
+    cleaned = []
+    for msg in chat_history:
+        if isinstance(msg, dict) and isinstance(msg.get("content"), str):
+            cleaned.append(msg)
+    return cleaned
+# Restricted words check (if any)
 bad_words = json.loads(os.getenv('BAD_WORDS', "[]"))
 bad_words_negative = json.loads(os.getenv('BAD_WORDS_NEGATIVE', "[]"))
 default_negative = os.getenv("default_negative", "")
             return True
     return False
+# Use the same random seed function for both text and image generation
+def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    return seed
 MAX_SEED = np.iinfo(np.int32).max
 CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
+# Set dtype based on device: use half for CUDA, float32 otherwise.
 dtype = torch.float16 if device.type == "cuda" else torch.float32
+# Load image generation pipelines for the three model choices.
 if torch.cuda.is_available():
+    # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
         "SG161222/RealVisXL_V5.0_Lightning",
         torch_dtype=dtype,
         use_safetensors=True,
         add_watermarker=False
     ).to(device)
     pipe.text_encoder = pipe.text_encoder.half()
     if ENABLE_CPU_OFFLOAD:
         pipe.enable_model_cpu_offload()
     else:
         pipe.to(device)
         print("Loaded RealVisXL_V5.0_Lightning on Device!")
     if USE_TORCH_COMPILE:
         pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
         print("Model RealVisXL_V5.0_Lightning Compiled!")
+    # Lightning 4 model
     pipe2 = StableDiffusionXLPipeline.from_pretrained(
         "SG161222/RealVisXL_V4.0_Lightning",
         torch_dtype=dtype,
         use_safetensors=True,
         add_watermarker=False,
     ).to(device)
     pipe2.text_encoder = pipe2.text_encoder.half()
     if ENABLE_CPU_OFFLOAD:
         pipe2.enable_model_cpu_offload()
     else:
         pipe2.to(device)
         print("Loaded RealVisXL_V4.0 on Device!")
     if USE_TORCH_COMPILE:
         pipe2.unet = torch.compile(pipe2.unet, mode="reduce-overhead", fullgraph=True)
         print("Model RealVisXL_V4.0 Compiled!")
+    # Turbo v3 model
     pipe3 = StableDiffusionXLPipeline.from_pretrained(
         "SG161222/RealVisXL_V3.0_Turbo",
         torch_dtype=dtype,
         add_watermarker=False,
     ).to(device)
     pipe3.text_encoder = pipe3.text_encoder.half()
     if ENABLE_CPU_OFFLOAD:
         pipe3.enable_model_cpu_offload()
     else:
         pipe3.to(device)
         print("Loaded Animagine XL 4.0 on Device!")
     if USE_TORCH_COMPILE:
         pipe3.unet = torch.compile(pipe3.unet, mode="reduce-overhead", fullgraph=True)
         print("Model Animagine XL 4.0 Compiled!")
 else:
     pipe = StableDiffusionXLPipeline.from_pretrained(
         "SG161222/RealVisXL_V5.0_Lightning",
         torch_dtype=dtype,
     ).to(device)
     print("Running on CPU; models loaded in float32.")
+# Define available model choices and their mapping.
 DEFAULT_MODEL = "Lightning 5"
 MODEL_CHOICES = [DEFAULT_MODEL, "Lightning 4", "Turbo v3"]
 models = {
     "Turbo v3": pipe3
 }
+def generate_image_grid(prompt: str, seed: int, grid_size: str, width: int, height: int,
+                        guidance_scale: float, randomize_seed: bool, model_choice: str):
+    if check_text(prompt, ""):
         raise ValueError("Prompt contains restricted words.")
     seed = int(randomize_seed_fn(seed, randomize_seed))
     generator = torch.Generator(device=device).manual_seed(seed)
+    # Define supported grid sizes.
     grid_sizes = {
         "2x1": (2, 1),
         "1x2": (1, 2),
         "2x2": (2, 2),
         "1x1": (1, 1)
     }
+    grid_size_tuple = grid_sizes.get(grid_size, (1, 1))
+    num_images = grid_size_tuple[0] * grid_size_tuple[1]
     options = {
         "prompt": prompt,
+        "negative_prompt": default_negative,
         "width": width,
         "height": height,
         "guidance_scale": guidance_scale,
         "num_inference_steps": 30,
         "generator": generator,
         "num_images_per_prompt": num_images,
+        "use_resolution_binning": True,
         "output_type": "pil",
     }
     if device.type == "cuda":
         torch.cuda.empty_cache()
     selected_pipe = models.get(model_choice, pipe)
     images = selected_pipe(**options).images
+    # Create a grid image.
+    grid_img = Image.new('RGB', (width * grid_size_tuple[0], height * grid_size_tuple[1]))
     for i, img in enumerate(images[:num_images]):
+        grid_img.paste(img, ((i % grid_size_tuple[0]) * width, (i // grid_size_tuple[0]) * height))
     unique_name = str(uuid.uuid4()) + ".png"
+    grid_img.save(unique_name)
     return [unique_name], seed
+# -----------------------------
+# Main generate() Function
+# -----------------------------
+@spaces.GPU
+def generate(
+    input_dict: dict,
+    chat_history: list[dict],
+    max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
+    temperature: float = 0.6,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2,
+):
+    text = input_dict["text"]
+    files = input_dict.get("files", [])
+    lower_text = text.lower().strip()
+    # Check if the prompt is an image generation command using model flags.
+    if (lower_text.startswith("@lightningv5") or
+        lower_text.startswith("@lightningv4") or
+        lower_text.startswith("@turbov3")):
+        # Determine model choice based on flag.
+        model_choice = None
+        if "@lightningv5" in lower_text:
+            model_choice = "Lightning 5"
+        elif "@lightningv4" in lower_text:
+            model_choice = "Lightning 4"
+        elif "@turbov3" in lower_text:
+            model_choice = "Turbo v3"
+        # Parse grid size flag e.g. "@2x2"
+        grid_match = re.search(r"@(\d+x\d+)", lower_text)
+        grid_size = grid_match.group(1) if grid_match else "1x1"
+        # Remove the model and grid flags from the prompt.
+        prompt_clean = re.sub(r"@lightningv5", "", text, flags=re.IGNORECASE)
+        prompt_clean = re.sub(r"@lightningv4", "", prompt_clean, flags=re.IGNORECASE)
+        prompt_clean = re.sub(r"@turbov3", "", prompt_clean, flags=re.IGNORECASE)
+        prompt_clean = re.sub(r"@\d+x\d+", "", prompt_clean, flags=re.IGNORECASE)
+        prompt_clean = prompt_clean.strip().strip('"')
+        # Default parameters for image generation.
+        width = 1024
+        height = 1024
+        guidance_scale = 6.0
+        seed_val = 0
+        randomize_seed = True
+        use_resolution_binning = True
+        yield "Generating image grid..."
+        image_paths, used_seed = generate_image_grid(
+            prompt_clean,
+            seed_val,
+            grid_size,
+            width,
+            height,
+            guidance_scale,
+            randomize_seed,
+            model_choice,
+        )
+        yield gr.Image(image_paths[0])
+        return
+    # Otherwise, handle text/chat (and TTS) generation.
+    tts_prefix = "@tts"
+    is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
+    voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
+    if is_tts and voice_index:
+        voice = TTS_VOICES[voice_index - 1]
+        text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
+        conversation = [{"role": "user", "content": text}]
+    else:
+        voice = None
+        text = text.replace(tts_prefix, "").strip()
+        conversation = clean_chat_history(chat_history)
+        conversation.append({"role": "user", "content": text})
+    if files:
+        images = [load_image(image) for image in files] if len(files) > 1 else [load_image(files[0])]
+        messages = [{
+            "role": "user",
+            "content": [
+                *[{"type": "image", "image": image} for image in images],
+                {"type": "text", "text": text},
+            ]
+        }]
+        prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        inputs = processor(text=[prompt], images=images, return_tensors="pt", padding=True).to("cuda")
+        streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
+        thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
+        thread.start()
+        buffer = ""
+        yield "Thinking..."
+        for new_text in streamer:
+            buffer += new_text
+            buffer = buffer.replace("<|im_end|>", "")
+            time.sleep(0.01)
+            yield buffer
+    else:
+        input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
+        if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+            input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+            gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+        input_ids = input_ids.to(model.device)
+        streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs = {
+            "input_ids": input_ids,
+            "streamer": streamer,
+            "max_new_tokens": max_new_tokens,
+            "do_sample": True,
+            "top_p": top_p,
+            "top_k": top_k,
+            "temperature": temperature,
+            "num_beams": 1,
+            "repetition_penalty": repetition_penalty,
+        }
+        t = Thread(target=model.generate, kwargs=generation_kwargs)
+        t.start()
+        outputs = []
+        for new_text in streamer:
+            outputs.append(new_text)
+            yield "".join(outputs)
+        final_response = "".join(outputs)
+        yield final_response
+        if is_tts and voice:
+            output_file = asyncio.run(text_to_speech(final_response, voice))
+            yield gr.Audio(output_file, autoplay=True)
+DESCRIPTION = """
+# IMAGINEO 4K ⚡
+"""
 css = '''
 h1 {
+  text-align: center;
+  display: block;
 }
+#duplicate-button {
+  margin: auto;
+  color: #fff;
+  background: #1565c0;
+  border-radius: 100vh;
+}
+'''
+demo = gr.ChatInterface(
+    fn=generate,
+    additional_inputs=[
+        gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS),
+        gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6),
+        gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
+        gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50),
+        gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
+    ],
+    examples=[
+        ["@tts1 Who is Nikola Tesla, and why did he die?"],
+        ['@lightningv5 @2x2 "Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"'],
+        ['@lightningv4 @1x1 "A serene landscape with mountains"'],
+        ['@turbov3 @2x1 "Abstract art, colorful and vibrant"'],
+        ["Write a Python function to check if a number is prime."],
+        ["@tts2 What causes rainbows to form?"],
+    ],
+    cache_examples=False,
+    type="messages",
+    description=DESCRIPTION,
+    css=css,
+    fill_height=True,
+    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
+    stop_btn="Stop Generation",
+    multimodal=True,
+)
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch(share=True)