Spaces:

prithivMLmods
/

Imagineo-Chat

Running on Zero

App Files Files Community

prithivMLmods commited on 5 days ago

Commit

905e633

verified ·

1 Parent(s): 804f76a

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -80

app.py CHANGED Viewed

@@ -30,7 +30,6 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load text-only model and tokenizer for chat generation
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
@@ -40,7 +39,6 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 model.eval()
-# TTS Voices and processor for multimodal chat
 TTS_VOICES = [
     "en-US-JennyNeural",  # @tts1
     "en-US-GuyNeural",    # @tts2
@@ -53,7 +51,6 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to("cuda").eval()
-# A helper function to convert text to speech via Edge TTS
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
@@ -66,7 +63,6 @@ def clean_chat_history(chat_history):
             cleaned.append(msg)
     return cleaned
-# Restricted words check (if any)
 bad_words = json.loads(os.getenv('BAD_WORDS', "[]"))
 bad_words_negative = json.loads(os.getenv('BAD_WORDS_NEGATIVE', "[]"))
 default_negative = os.getenv("default_negative", "")
@@ -80,7 +76,6 @@ def check_text(prompt, negative=""):
             return True
     return False
-# Use the same random seed function for both text and image generation
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
@@ -92,10 +87,8 @@ MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
-# Set dtype based on device: use half for CUDA, float32 otherwise.
 dtype = torch.float16 if device.type == "cuda" else torch.float32
-# Load image generation pipelines for the three model choices.
 if torch.cuda.is_available():
     # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
@@ -168,7 +161,6 @@ else:
     ).to(device)
     print("Running on CPU; models loaded in float32.")
-# Define available model choices and their mapping.
 DEFAULT_MODEL = "Lightning 5"
 MODEL_CHOICES = [DEFAULT_MODEL, "Lightning 4", "Turbo v3"]
 models = {
@@ -177,55 +169,11 @@ models = {
     "Turbo v3": pipe3
 }
-def generate_image_grid(prompt: str, seed: int, grid_size: str, width: int, height: int,
-                        guidance_scale: float, randomize_seed: bool, model_choice: str):
-    if check_text(prompt, ""):
-        raise ValueError("Prompt contains restricted words.")
-    seed = int(randomize_seed_fn(seed, randomize_seed))
-    generator = torch.Generator(device=device).manual_seed(seed)
-    # Define supported grid sizes.
-    grid_sizes = {
-        "2x1": (2, 1),
-        "1x2": (1, 2),
-        "2x2": (2, 2),
-        "1x1": (1, 1)
-    }
-    grid_size_tuple = grid_sizes.get(grid_size, (1, 1))
-    num_images = grid_size_tuple[0] * grid_size_tuple[1]
-    options = {
-        "prompt": prompt,
-        "negative_prompt": default_negative,
-        "width": width,
-        "height": height,
-        "guidance_scale": guidance_scale,
-        "num_inference_steps": 30,
-        "generator": generator,
-        "num_images_per_prompt": num_images,
-        "use_resolution_binning": True,
-        "output_type": "pil",
-    }
-    if device.type == "cuda":
-        torch.cuda.empty_cache()
-    selected_pipe = models.get(model_choice, pipe)
-    images = selected_pipe(**options).images
-    # Create a grid image.
-    grid_img = Image.new('RGB', (width * grid_size_tuple[0], height * grid_size_tuple[1]))
-    for i, img in enumerate(images[:num_images]):
-        grid_img.paste(img, ((i % grid_size_tuple[0]) * width, (i // grid_size_tuple[0]) * height))
     unique_name = str(uuid.uuid4()) + ".png"
-    grid_img.save(unique_name)
-    return [unique_name], seed
-# -----------------------------
-# Main generate() Function
-# -----------------------------
 @spaces.GPU
 def generate(
     input_dict: dict,
@@ -254,37 +202,42 @@ def generate(
         elif "@turbov3" in lower_text:
             model_choice = "Turbo v3"
-        # Parse grid size flag e.g. "@2x2"
-        grid_match = re.search(r"@(\d+x\d+)", lower_text)
-        grid_size = grid_match.group(1) if grid_match else "1x1"
-        # Remove the model and grid flags from the prompt.
         prompt_clean = re.sub(r"@lightningv5", "", text, flags=re.IGNORECASE)
         prompt_clean = re.sub(r"@lightningv4", "", prompt_clean, flags=re.IGNORECASE)
         prompt_clean = re.sub(r"@turbov3", "", prompt_clean, flags=re.IGNORECASE)
-        prompt_clean = re.sub(r"@\d+x\d+", "", prompt_clean, flags=re.IGNORECASE)
         prompt_clean = prompt_clean.strip().strip('"')
-        # Default parameters for image generation.
         width = 1024
         height = 1024
         guidance_scale = 6.0
         seed_val = 0
-        randomize_seed = True
-        use_resolution_binning = True
-        yield "Generating image grid..."
-        image_paths, used_seed = generate_image_grid(
-            prompt_clean,
-            seed_val,
-            grid_size,
-            width,
-            height,
-            guidance_scale,
-            randomize_seed,
-            model_choice,
-        )
-        yield gr.Image(image_paths[0])
         return
     # Otherwise, handle text/chat (and TTS) generation.
@@ -358,7 +311,6 @@ def generate(
             output_file = asyncio.run(text_to_speech(final_response, voice))
             yield gr.Audio(output_file, autoplay=True)
 DESCRIPTION = """
 # IMAGINEO 4K ⚡
 """
@@ -388,9 +340,9 @@ demo = gr.ChatInterface(
     ],
     examples=[
         ["@tts1 Who is Nikola Tesla, and why did he die?"],
-        ['@lightningv5 @2x2 "Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"'],
-        ['@lightningv4 @1x1 "A serene landscape with mountains"'],
-        ['@turbov3 @2x1 "Abstract art, colorful and vibrant"'],
         ["Write a Python function to check if a number is prime."],
         ["@tts2 What causes rainbows to form?"],
     ],

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
 )
 model.eval()
 TTS_VOICES = [
     "en-US-JennyNeural",  # @tts1
     "en-US-GuyNeural",    # @tts2
     torch_dtype=torch.float16
 ).to("cuda").eval()
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
             cleaned.append(msg)
     return cleaned
 bad_words = json.loads(os.getenv('BAD_WORDS', "[]"))
 bad_words_negative = json.loads(os.getenv('BAD_WORDS_NEGATIVE', "[]"))
 default_negative = os.getenv("default_negative", "")
             return True
     return False
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
 dtype = torch.float16 if device.type == "cuda" else torch.float32
 if torch.cuda.is_available():
     # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
     ).to(device)
     print("Running on CPU; models loaded in float32.")
 DEFAULT_MODEL = "Lightning 5"
 MODEL_CHOICES = [DEFAULT_MODEL, "Lightning 4", "Turbo v3"]
 models = {
     "Turbo v3": pipe3
 }
+def save_image(img: Image.Image) -> str:
     unique_name = str(uuid.uuid4()) + ".png"
+    img.save(unique_name)
+    return unique_name
 @spaces.GPU
 def generate(
     input_dict: dict,
         elif "@turbov3" in lower_text:
             model_choice = "Turbo v3"
+        # Remove the model flag from the prompt.
         prompt_clean = re.sub(r"@lightningv5", "", text, flags=re.IGNORECASE)
         prompt_clean = re.sub(r"@lightningv4", "", prompt_clean, flags=re.IGNORECASE)
         prompt_clean = re.sub(r"@turbov3", "", prompt_clean, flags=re.IGNORECASE)
         prompt_clean = prompt_clean.strip().strip('"')
+        # Default parameters for single image generation.
         width = 1024
         height = 1024
         guidance_scale = 6.0
         seed_val = 0
+        randomize_seed_flag = True
+        seed_val = int(randomize_seed_fn(seed_val, randomize_seed_flag))
+        generator = torch.Generator(device=device).manual_seed(seed_val)
+        options = {
+            "prompt": prompt_clean,
+            "negative_prompt": default_negative,
+            "width": width,
+            "height": height,
+            "guidance_scale": guidance_scale,
+            "num_inference_steps": 30,
+            "generator": generator,
+            "num_images_per_prompt": 1,
+            "use_resolution_binning": True,
+            "output_type": "pil",
+        }
+        if device.type == "cuda":
+            torch.cuda.empty_cache()
+        selected_pipe = models.get(model_choice, pipe)
+        images = selected_pipe(**options).images
+        image_path = save_image(images[0])
+        yield "Generating image..."
+        yield gr.Image(image_path)
         return
     # Otherwise, handle text/chat (and TTS) generation.
             output_file = asyncio.run(text_to_speech(final_response, voice))
             yield gr.Audio(output_file, autoplay=True)
 DESCRIPTION = """
 # IMAGINEO 4K ⚡
 """
     ],
     examples=[
         ["@tts1 Who is Nikola Tesla, and why did he die?"],
+        ['@lightningv5 "Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"'],
+        ['@lightningv4 "A serene landscape with mountains"'],
+        ['@turbov3 "Abstract art, colorful and vibrant"'],
         ["Write a Python function to check if a number is prime."],
         ["@tts2 What causes rainbows to form?"],
     ],