Spaces:

ovi054
/

image-to-prompt

Running

File size: 3,217 Bytes

40a0c27
fe109d0
4ca9103
87e9ce2
 
 
fa8e3c4
 
 
411ddb3
643466f
 
 
87e9ce2
 
 
 
 
fa8e3c4
411ddb3
b2f83fb
 
 
 
 
 
 
 
 
 
 
 
 
 
446e43e
87e9ce2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411ddb3
226538c
fa8e3c4
 
 
 
411ddb3
fa8e3c4
 
 
 
 
 
 
 
 
 
 
7c4bc2a
fa8e3c4
 
cb31fbf
87e9ce2
d218a70
fa8e3c4
 
5052c5c
e7bb2a3
029098a
87e9ce2

import gradio as gr
import subprocess
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM

# import os
# import random
# from gradio_client import Client


subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

# Initialize Florence model
device = "cuda" if torch.cuda.is_available() else "cpu"
florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)

# api_key = os.getenv("HF_READ_TOKEN")

article_text = """
<div style="text-align: center;">
    <p>Enjoying the tool? Buy me a coffee and get exclusive prompt guides!</p>
    <p><i>Instantly unlock helpful tips for creating better prompts!</i></p>
    <div style="display: flex; justify-content: center;">
        <a href="https://piczify.lemonsqueezy.com/buy/0f5206fa-68e8-42f6-9ca8-4f80c587c83e">
            <img src="https://www.buymeacoffee.com/assets/img/custom_images/yellow_img.png" 
                 alt="Buy Me a Coffee" 
                 style="height: 40px; width: auto; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); border-radius: 10px;">
        </a>
    </div>
</div>
"""

def generate_caption(image):
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
    generated_ids = florence_model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        early_stopping=False,
        do_sample=False,
        num_beams=3,
    )
    generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = florence_processor.post_process_generation(
        generated_text,
        task="<MORE_DETAILED_CAPTION>",
        image_size=(image.width, image.height)
    )
    prompt =  parsed_answer["<MORE_DETAILED_CAPTION>"]
    print("\n\nGeneration completed!:"+ prompt)
    return prompt
    # yield prompt, None
    # image_path = generate_image(prompt,random.randint(0, 4294967296))
    # yield prompt, image_path 

# def generate_image(prompt, seed=42, width=1024, height=1024):
#     try:
#         result = Client("KingNish/Realtime-FLUX", hf_token=api_key).predict(
#             prompt=prompt,
#             seed=seed,
#             width=width,
#             height=height,
#             api_name="/generate_image"
#         )
#         # Extract the image path from the result tuple
#         image_path = result[0]
#         return image_path 
#     except Exception as e:
#         raise Exception(f"Error generating image: {str(e)}")
 
io = gr.Interface(generate_caption,
                  inputs=[gr.Image(label="Input Image")],
                  outputs = [gr.Textbox(label="Output Prompt", lines=2, show_copy_button = True),
                             # gr.Image(label="Output Image")
                            ],
                  #article = article_text 
                 )
io.launch(debug=True)