FluxCapacitor

Running

File size: 7,812 Bytes


import gradio as gr
import requests
import io
import random
import os
from PIL import Image
from deep_translator import GoogleTranslator
from gradio_client import Client  # Import the gradio client for prompt enhancement

# os.makedirs('assets', exist_ok=True)
if not os.path.exists('icon.jpg'):
    os.system("wget -O icon.jpg https://i.pinimg.com/564x/64/49/88/644988c59447eb00286834c2e70fdd6b.jpg")
API_URL_DEV = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell"
timeout = 100

# Function to set the system prompt once
def set_system_prompt():
    client = Client("Qwen/Qwen2.5-72B-Instruct")
    result = client.predict(
        system="You are Qwen, an image generation prompt enhancer",
        api_name="/modify_system_session"
    )
    print(f"System session modified: {result}")
    return result

# Function to enhance the prompt with Qwen model
def enhance_prompt_with_qwen(prompt):
    client = Client("Qwen/Qwen2.5-72B-Instruct")
    result = client.predict(
        query=prompt,
        history=[],
        system="You are Qwen, an image generation prompt enhancer",
        api_name="/model_chat"
    )
    
    # Extract the relevant part of the tuple, index [0], which contains the enhanced prompt.
    enhanced_prompt = result[0]  # This is the string we need for the image generation prompt.
    
    print(f"Enhanced prompt: {enhanced_prompt}")
    return enhanced_prompt

# Image generation query function
def query(prompt, is_negative=False, steps=30, cfg_scale=7, sampler="DPM++ 2M Karras", seed=-1, strength=0.7, huggingface_api_key=None, use_dev=False):
    try:
        # Set system prompt first
        set_system_prompt()

        # Enhance the prompt before translation
        enhanced_prompt = enhance_prompt_with_qwen(prompt)

        # Determine which API URL to use
        api_url = API_URL_DEV if use_dev else API_URL

        # Check if the request is an API call by checking for the presence of the huggingface_api_key
        is_api_call = huggingface_api_key is not None

        if is_api_call:
            # Use the environment variable for the API key in GUI mode
            API_TOKEN = os.getenv("HF_READ_TOKEN")
            headers = {"Authorization": f"Bearer {API_TOKEN}"}
        else:
            # Validate the API key if it's an API call
            if huggingface_api_key == "":
                raise gr.Error("API key is required for API calls.")
            headers = {"Authorization": f"Bearer {huggingface_api_key}"}

        if enhanced_prompt == "" or enhanced_prompt is None:
            return None, None

        key = random.randint(0, 999)

        # Translate the enhanced prompt
        enhanced_prompt = GoogleTranslator(source='ru', target='en').translate(enhanced_prompt)
        print(f'\033[1mGeneration {key} translation:\033[0m {enhanced_prompt}')

        enhanced_prompt = f"{enhanced_prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
        print(f'\033[1mGeneration {key}:\033[0m {enhanced_prompt}')

        # If seed is -1, generate a random seed and use it
        if seed == -1:
            seed = random.randint(1, 1000000000)

        payload = {
            "inputs": enhanced_prompt,
            "is_negative": is_negative,
            "steps": steps,
            "cfg_scale": cfg_scale,
            "seed": seed,
            "strength": strength
        }

        response = requests.post(api_url, headers=headers, json=payload, timeout=timeout)
        if response.status_code != 200:
            print(f"Error: Failed to get image. Response status: {response.status_code}")
            print(f"Response content: {response.text}")
            if response.status_code == 503:
                raise gr.Error(f"{response.status_code} : The model is being loaded")
            raise gr.Error(f"{response.status_code}")
        
        try:
            # Attempt to open the image
            image_bytes = response.content
            image = Image.open(io.BytesIO(image_bytes))
            print(f'\033[1mGeneration {key} completed!\033[0m ({enhanced_prompt})')

            # Save the image to a file and return the file path and seed
            output_path = f"./output_{key}.png"
            image.save(output_path)
            
            return output_path, seed
        except Exception as e:
            print(f"Error when trying to open the image: {e}")
            return None, seed  # If the image fails, return None for image, seed is still returned

    except Exception as ex:
        print(f"Error in query execution: {ex}")
        return None, None  # If the entire process fails, return None for both

css = """
#app-container {
    max-width: 600px;
    margin-left: auto;
    margin-right: auto;
}
#title-container {
    display: flex;
    align-items: center;
    justify-content: center;
}
#title-icon {
    width: 32px; /* Adjust the width of the icon as needed */
    height: auto;
    margin-right: 10px; /* Space between icon and title */
}
#title-text {
    font-size: 24px; /* Adjust font size as needed */
    font-weight: bold;
}
"""

with gr.Blocks(theme='Nymbo/Nymbo_Theme', css=css) as app:
    gr.HTML("""
        <center>
            <div id="title-container">
                <img id="title-icon" src="icon.jpg" alt="Icon">
                <h1 id="title-text">FLUX Capacitor</h1>
            </div>
        </center>
    """)

    with gr.Column(elem_id="app-container"):
        with gr.Row():
            with gr.Column(elem_id="prompt-container"):
                with gr.Row():
                    text_prompt = gr.Textbox(label="Prompt", placeholder="Enter a prompt here", lines=2, elem_id="prompt-text-input")
                with gr.Row():
                    with gr.Accordion("Advanced Settings", open=False):
                        negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="What should not be in the image", value="(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, misspellings, typos", lines=3, elem_id="negative-prompt-text-input")
                        steps = gr.Slider(label="Sampling steps", value=35, minimum=1, maximum=100, step=1)
                        cfg = gr.Slider(label="CFG Scale", value=7, minimum=1, maximum=20, step=1)
                        method = gr.Radio(label="Sampling method", value="DPM++ 2M Karras", choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"])
                        strength = gr.Slider(label="Strength", value=0.7, minimum=0, maximum=1, step=0.001)
                        seed = gr.Slider(label="Seed", value=-1, minimum=-1, maximum=1000000000, step=1)
                        huggingface_api_key = gr.Textbox(label="Hugging Face API Key (required for API calls)", placeholder="Enter your Hugging Face API Key here", type="password", elem_id="api-key")
                        use_dev = gr.Checkbox(label="Use Dev API", value=False, elem_id="use-dev-checkbox")

        with gr.Row():
            text_button = gr.Button("Run", variant='primary', elem_id="gen-button")
        with gr.Row():
            image_output = gr.Image(type="pil", label="Image Output", elem_id="gallery")
            seed_output = gr.Textbox(label="Seed Used", elem_id="seed-output")
        
        # Adjust the click function to include the API key and use_dev as inputs
        text_button.click(query, inputs=[text_prompt, negative_prompt, steps, cfg, method, seed, strength, huggingface_api_key, use_dev], outputs=[image_output, seed_output])

app.launch(show_api=True, share=False)