Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from gradio_client import Client | |
#fusecap_client = Client("https://noamrot-fusecap-image-captioning.hf.space/") | |
fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/") | |
def get_caption(image_in): | |
fuyu_result = fuyu_client.predict( | |
image_in, # str representing input in 'raw_image' Image component | |
False, # bool in 'Enable detailed captioning' Checkbox component | |
fn_index=2 | |
) | |
print(f"IMAGE CAPTION: {fuyu_result}") | |
return fuyu_result | |
import re | |
import torch | |
from transformers import pipeline | |
pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto") | |
agent_maker_sys = f""" | |
You are an AI whose job is to help users create their own chatbot whose personality will reflect the character or scene from an image described by users. | |
In particular, you need to respond succintly in a friendly tone, write a system prompt for an LLM, a catchy title for the chatbot, and a very short example user input. Make sure each part is included. | |
You'll use the image description to create a chatbot whose personality MUST reflect the character or scene informations provided by users. | |
For example, if a user says, "a picture of a man in a black suit and tie riding a black dragon", first do a friendly response, then add the title, system prompt, and example user input. Immediately STOP after the example input. It should be EXACTLY in this format: | |
Sure, I'd be happy to help you build a bot! I'm generating a title, system prompt, and an example input. How do they sound? Feel free to give me feedback! | |
Title: Dragon Trainer | |
System prompt: As an LLM, your job is to provide guidance and tips on mastering dragons. Use a friendly and informative tone. | |
Example input: How can I train a dragon to breathe fire? | |
Here's another example. If a user types, "a picture of a young girl with long brown hair and black glasses sits on a blanket in a park, reading an open book", respond: | |
Sure, I'd be happy to help you build a bot! I'm generating a title, system prompt, and an example input. How do they sound? Feel free to give me feedback! | |
Title: Book Buddy | |
System prompt: Your job as an LLM is to provide book recommendations based on the preferences of the user. You are a friendly and knowledgeable librarian who loves to read. Be helpful and encouraging, but also make sure your suggestions are age-appropriate for the user in the image. | |
Example input: What books would you recommend for a 9-year-old girl who loves animals and adventure? | |
""" | |
instruction = f""" | |
<|system|> | |
{agent_maker_sys}</s> | |
<|user|> | |
""" | |
def infer(image_in): | |
gr.Info("Getting image caption with Fuyu...") | |
user_prompt = get_caption(image_in) | |
prompt = f"{instruction.strip()}\n{user_prompt}</s>" | |
#print(f"PROMPT: {prompt}") | |
gr.Info("Building a system according to the image caption ...") | |
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) | |
pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>' | |
cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL) | |
print(f"SUGGESTED LLM: {cleaned_text}") | |
return cleaned_text | |
title = f"LLM Agent from a Picture", | |
description = f"Get a LLM system prompt from a picture so you can use it in <a href='https://huggingface.co/spaces/abidlabs/GPT-Baker'>GPT-Baker</a>." | |
css = """ | |
#col-container{ | |
margin: 0 auto; | |
max-width: 640px; | |
text-align: left; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.HTML(f""" | |
<h2 style="text-align: center;">LLM Agent from a Picture</h2> | |
<p style="text-align: center;">{description}</p> | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
image_in = gr.Image( | |
label = "Image reference", | |
type = "filepath" | |
) | |
submit_btn = gr.Button("Make LLM system from my pic !") | |
with gr.Column(): | |
result = gr.Textbox( | |
label ="Suggested System" | |
) | |
submit_btn.click( | |
fn = infer, | |
inputs = [ | |
image_in | |
], | |
outputs =[ | |
result | |
] | |
) | |
demo.queue().launch() |