import os import random import uuid import json import gradio as gr import numpy as np from PIL import Image import spaces import torch from diffusers import DiffusionPipeline, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler from typing import Tuple # Load restricted words bad_words = json.loads(os.getenv('BAD_WORDS', "[]")) bad_words_negative = json.loads(os.getenv('BAD_WORDS_NEGATIVE', "[]")) default_negative = os.getenv("default_negative", "") def check_text(prompt, negative=""): for i in bad_words: if i in prompt: return True for i in bad_words_negative: if i in negative: return True return False # Quality/Style-------------------------------------------------------------------- style_list = [ { "name": "3840 x 2160", "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "2560 x 1440", "prompt": "hyper-realistic 4K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "HD+", "prompt": "hyper-realistic 2K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "Style Zero", "prompt": "{prompt}", "negative_prompt": "", }, ] # Collage styles-------------------------------------------------------------------- collage_style_list = [ { "name": "Hi-Res", "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "B & W", "prompt": "black and white collage of {prompt}. monochromatic, timeless, classic, dramatic contrast", "negative_prompt": "colorful, vibrant, bright, flashy", }, { "name": "Polaroid", "prompt": "collage of polaroid photos featuring {prompt}. vintage style, high contrast, nostalgic, instant film aesthetic", "negative_prompt": "digital, modern, low quality, blurry", }, { "name": "Watercolor", "prompt": "watercolor collage of {prompt}. soft edges, translucent colors, painterly effects", "negative_prompt": "digital, sharp lines, solid colors", }, { "name": "Cinematic", "prompt": "cinematic collage of {prompt}. film stills, movie posters, dramatic lighting", "negative_prompt": "static, lifeless, mundane", }, { "name": "Nostalgic", "prompt": "nostalgic collage of {prompt}. retro imagery, vintage objects, sentimental journey", "negative_prompt": "contemporary, futuristic, forward-looking", }, { "name": "Vintage", "prompt": "vintage collage of {prompt}. aged paper, sepia tones, retro imagery, antique vibes", "negative_prompt": "modern, contemporary, futuristic, high-tech", }, { "name": "Scrapbook", "prompt": "scrapbook style collage of {prompt}. mixed media, hand-cut elements, textures, paper, stickers, doodles", "negative_prompt": "clean, digital, modern, low quality", }, { "name": "NeoNGlow", "prompt": "neon glow collage of {prompt}. vibrant colors, glowing effects, futuristic vibes", "negative_prompt": "dull, muted colors, vintage, retro", }, { "name": "Geometric", "prompt": "geometric collage of {prompt}. abstract shapes, colorful, sharp edges, modern design, high quality", "negative_prompt": "blurry, low quality, traditional, dull", }, { "name": "Thematic", "prompt": "thematic collage of {prompt}. cohesive theme, well-organized, matching colors, creative layout", "negative_prompt": "random, messy, unorganized, clashing colors", }, { "name": "Cherry", "prompt": "Duotone style Cherry tone applied to {prompt}", "negative_prompt": "", }, { "name": "Fuchsia", "prompt": "Duotone style Fuchsia tone applied to {prompt}", "negative_prompt": "", }, { "name": "Pop", "prompt": "Duotone style Pop tone applied to {prompt}", "negative_prompt": "", }, { "name": "Violet", "prompt": "Duotone style Violet applied to {prompt}", "negative_prompt": "", }, { "name": "Sea Blue", "prompt": "Duotone style Sea Blue applied to {prompt}", "negative_prompt": "", }, { "name": "Sea Green", "prompt": "Duotone style Sea Green applied to {prompt}", "negative_prompt": "", }, { "name": "Mustard", "prompt": "Duotone style Mustard applied to {prompt}", "negative_prompt": "", }, { "name": "Amber", "prompt": "Duotone style Amber applied to {prompt}", "negative_prompt": "", }, { "name": "Pomelo", "prompt": "Duotone style Pomelo applied to {prompt}", "negative_prompt": "", }, { "name": "Peppermint", "prompt": "Duotone style Peppermint applied to {prompt}", "negative_prompt": "", }, { "name": "Mystic", "prompt": "Duotone style Mystic tone applied to {prompt}", "negative_prompt": "", }, { "name": "Pastel", "prompt": "Duotone style Pastel applied to {prompt}", "negative_prompt": "", }, { "name": "Coral", "prompt": "Duotone style Coral applied to {prompt}", "negative_prompt": "", }, { "name": "No Style", "prompt": "{prompt}", "negative_prompt": "", }, ] # Filters-------------------------------------------------------------------- filters = { "Vivid": { "prompt": "extra vivid {prompt}", "negative_prompt": "washed out, dull" }, "Playa": { "prompt": "{prompt} set in a vast playa", "negative_prompt": "forest, mountains" }, "Desert": { "prompt": "{prompt} set in a desert landscape", "negative_prompt": "ocean, city" }, "West": { "prompt": "{prompt} with a western theme", "negative_prompt": "eastern, modern" }, "Blush": { "prompt": "{prompt} with a soft blush color palette", "negative_prompt": "harsh colors, neon" }, "Minimalist": { "prompt": "{prompt} with a minimalist design", "negative_prompt": "cluttered, ornate" }, "Zero filter": { "prompt": "{prompt}", "negative_prompt": "" }, } styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list} collage_styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in collage_style_list} filter_styles = {k: (v["prompt"], v["negative_prompt"]) for k, v in filters.items()} STYLE_NAMES = list(styles.keys()) COLLAGE_STYLE_NAMES = list(collage_styles.keys()) FILTER_NAMES = list(filters.keys()) DEFAULT_STYLE_NAME = "3840 x 2160" DEFAULT_COLLAGE_STYLE_NAME = "Hi-Res" DEFAULT_FILTER_NAME = "Zero filter" def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]: if style_name in styles: p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME]) elif style_name in collage_styles: p, n = collage_styles.get(style_name, collage_styles[DEFAULT_COLLAGE_STYLE_NAME]) elif style_name in filter_styles: p, n = filter_styles.get(style_name, filter_styles[DEFAULT_FILTER_NAME]) else: p, n = styles[DEFAULT_STYLE_NAME] if not negative: negative = "" return p.replace("{prompt}", positive), n + negative if not torch.cuda.is_available(): DESCRIPTION = "\n
⚠️Running on CPU, This may not work on CPU.
" MAX_SEED = np.iinfo(np.int32).max CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1" MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048")) USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1" ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set dtype based on device: half for CUDA, float32 for CPU dtype = torch.float16 if device.type == "cuda" else torch.float32 # Load primary model (RealVisXL_V5.0_Lightning) if torch.cuda.is_available(): pipe = StableDiffusionXLPipeline.from_pretrained( "SG161222/RealVisXL_V5.0_Lightning", torch_dtype=dtype, use_safetensors=True, add_watermarker=False ).to(device) # Ensure text encoder uses half precision on GPU pipe.text_encoder = pipe.text_encoder.half() if ENABLE_CPU_OFFLOAD: pipe.enable_model_cpu_offload() else: pipe.to(device) print("Loaded RealVisXL_V5.0_Lightning on Device!") if USE_TORCH_COMPILE: pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) print("Model RealVisXL_V5.0_Lightning Compiled!") # Load second model (RealVisXL_V4.0) pipe2 = StableDiffusionXLPipeline.from_pretrained( "SG161222/RealVisXL_V4.0", torch_dtype=dtype, use_safetensors=True, add_watermarker=False, ).to(device) pipe2.text_encoder = pipe2.text_encoder.half() if ENABLE_CPU_OFFLOAD: pipe2.enable_model_cpu_offload() else: pipe2.to(device) print("Loaded RealVisXL_V4.0 on Device!") if USE_TORCH_COMPILE: pipe2.unet = torch.compile(pipe2.unet, mode="reduce-overhead", fullgraph=True) print("Model RealVisXL_V4.0 Compiled!") # Load third model pipe3 = StableDiffusionXLPipeline.from_pretrained( "SG161222/RealVisXL_V3.0_Turbo", torch_dtype=dtype, use_safetensors=True, add_watermarker=False, ).to(device) pipe3.text_encoder = pipe3.text_encoder.half() if ENABLE_CPU_OFFLOAD: pipe3.enable_model_cpu_offload() else: pipe3.to(device) print("Loaded Animagine XL 4.0 on Device!") if USE_TORCH_COMPILE: pipe3.unet = torch.compile(pipe3.unet, mode="reduce-overhead", fullgraph=True) print("Model Animagine XL 4.0 Compiled!") else: # On CPU, load all models in float32 pipe = StableDiffusionXLPipeline.from_pretrained( "SG161222/RealVisXL_V5.0_Lightning", torch_dtype=dtype, use_safetensors=True, add_watermarker=False ).to(device) pipe2 = StableDiffusionXLPipeline.from_pretrained( "SG161222/RealVisXL_V4.0_Lightning", torch_dtype=dtype, use_safetensors=True, add_watermarker=False, ).to(device) pipe3 = StableDiffusionXLPipeline.from_pretrained( "SG161222/RealVisXL_V3.0_Turbo", torch_dtype=dtype, use_safetensors=True, add_watermarker=False, ).to(device) print("Running on CPU; models loaded in float32.") # A dictionary to easily choose the model based on selection. DEFAULT_MODEL = "Lightning 5" MODEL_CHOICES = [DEFAULT_MODEL, "Lightning 4", "Turbo v3"] models = { "Lightning 5": pipe, "Lightning 4": pipe2, "Turbo v3": pipe3 } def save_image(img, path): img.save(path) def randomize_seed_fn(seed: int, randomize_seed: bool) -> int: if randomize_seed: seed = random.randint(0, MAX_SEED) return seed @spaces.GPU(enable_queue=True) def generate( prompt: str, negative_prompt: str = "", use_negative_prompt: bool = False, style: str = DEFAULT_STYLE_NAME, collage_style: str = DEFAULT_COLLAGE_STYLE_NAME, filter_name: str = DEFAULT_FILTER_NAME, grid_size: str = "2x2", seed: int = 0, width: int = 1024, height: int = 1024, guidance_scale: float = 3, randomize_seed: bool = False, model_choice: str = DEFAULT_MODEL, use_resolution_binning: bool = True, progress=gr.Progress(track_tqdm=True), ): if check_text(prompt, negative_prompt): raise ValueError("Prompt contains restricted words.") if collage_style != "No Style": prompt, negative_prompt = apply_style(collage_style, prompt, negative_prompt) elif filter_name != "No Filter": prompt, negative_prompt = apply_style(filter_name, prompt, negative_prompt) else: prompt, negative_prompt = apply_style(style, prompt, negative_prompt) seed = int(randomize_seed_fn(seed, randomize_seed)) generator = torch.Generator(device=device).manual_seed(seed) if not use_negative_prompt: negative_prompt = "" negative_prompt += default_negative grid_sizes = { "2x1": (2, 1), "1x2": (1, 2), "2x2": (2, 2), "2x3": (2, 3), "3x2": (3, 2), "1x1": (1, 1) } grid_size_x, grid_size_y = grid_sizes.get(grid_size, (2, 2)) num_images = grid_size_x * grid_size_y options = { "prompt": prompt, "negative_prompt": negative_prompt, "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": 30, "generator": generator, "num_images_per_prompt": num_images, "use_resolution_binning": use_resolution_binning, "output_type": "pil", } if device.type == "cuda": torch.cuda.empty_cache() # Choose pipeline based on user selection selected_pipe = models.get(model_choice, pipe) images = selected_pipe(**options).images grid_img = Image.new('RGB', (width * grid_size_x, height * grid_size_y)) for i, img in enumerate(images[:num_images]): grid_img.paste(img, (i % grid_size_x * width, i // grid_size_x * height)) unique_name = str(uuid.uuid4()) + ".png" save_image(grid_img, unique_name) return [unique_name], seed examples = [ "Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic oil --ar 2:3 --q 2 --s 750 --v 5", "3d image, cute girl, in the style of Pixar --ar 1:2 --stylize 750, 4K resolution highlights, Sharp focus, octane render, ray tracing, Ultra-High-Definition, 8k, UHD, HDR, (Masterpiece:1.5), (best quality:1.5)", "Cold coffee in a cup bokeh --ar 85:128 --v 6.0 --style raw5, 4k hdr, --raw", "Food photography of a milk shake with flying strawberrys against a pink background, professionally studio shot with cinematic lighting. The image is in the style of a professional studio shot --ar 85:128 --v 6.0 --style raw", "1boy, male focus, yuuki makoto (persona 3), persona 3, black jacket, white shirt, long sleeves, closed mouth, glowing eyes, gun, hair over one eye, holding gun, handgun, looking at viewer, solo, upper body" ] css = ''' .gradio-container { max-width: 888px !important; margin: 0 auto !important; display: flex; flex-direction: column; align-items: center; } h1 { text-align: center; } .submit-btn { background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important; border: none !important; color: white !important; } .submit-btn:hover { background-color: #ff581e !important; } ''' title = """