image-to-music-v2

Running on Zero

App Files Files Community

image-to-music-v2 / app.py

fffiloni

tweak original sys prompt

98d08a5 about 1 year ago

raw

history blame

3.97 kB

	import gradio as gr
	from gradio_client import Client

	fusecap_client = Client("https://noamrot-fusecap-image-captioning.hf.space/")

	def get_caption(image_in):

	fusecap_result = fusecap_client.predict(
	image_in, # str representing input in 'raw_image' Image component
	api_name="/predict"
	)
	print(f"IMAGE CAPTION: {fusecap_result}")
	return fusecap_result

	import re
	import torch
	from transformers import pipeline

	pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")

	agent_maker_sys = f"""
	You are an AI whose job it is to help users create their own chatbots, based on the image description the user provide. In particular, you need to respond succintly in a friendly tone, write a system prompt for an LLM, a catchy title for the chatbot, and a very short example user input. Make sure each part is included.
	First, user will provide an image description, from which you'll take inspiration for your job. WITHOUT mentioning any image, use user provided description to imagine a llm with a personality reflecting informations given.
	For example, if a user says, "make a bot that gives advice on how to grow your startup", first do a friendly response, then add the title, system prompt, and example user input. Immediately STOP after the example input. It should be EXACTLY in this format:
	Sure, I'd be happy to help you build a bot! I'm generating a title, system prompt, and an example input. How do they sound? Feel free to give me feedback!
	Title: Startup Coach
	System prompt: Your job as an LLM is to provide good startup advice. Do not provide extraneous comments on other topics. Be succinct but useful.
	Example input: Risks of setting up a non-profit board
	Here's another example. If a user types, "Make a chatbot that roasts tech ceos", respond:
	Sure, I'd be happy to help you build a bot! I'm generating a title, system prompt, and an example input. How do they sound? Feel free to give me feedback!
	Title: Tech Roaster
	System prompt: As an LLM, your primary function is to deliver hilarious and biting critiques of technology CEOs. Keep it witty and entertaining, but also make sure your jokes aren't too mean-spirited or factually incorrect.
	Example input: Elon Musk
	"""

	instruction = f"""
	<\|system\|>
	{agent_maker_sys}</s>
	<\|user\|>
	"""

	def infer(image_in):
	gr.Info("Getting image caption from Fuse Cap...")
	user_prompt = get_caption(image_in)
	prompt = f"{instruction.strip()}\n{user_prompt}</s>"
	print(f"PROMPT: {prompt}")
	gr.Info("Building a system according to the image caption ...")
	outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
	print(outputs)

	pattern = r'\<\\|system\\|\>(.*?)\<\\|assistant\\|\>'
	cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)


	return cleaned_text

	title = f"LLM Agent from a Picture",
	description = f"Get a LLM system prompt from a picture so you can use it in <a href='https://huggingface.co/spaces/abidlabs/GPT-Baker'>GPT-Baker</a>."

	css = """
	#col-container{
	margin: 0 auto;
	max-width: 840px;
	text-align: left;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.HTML(f"""
	<h2 style="text-align: center;">LLM Agent from a Picture</h2>
	<p style="text-align: center;">{description}</p>
	""")
	with gr.Row():
	with gr.Column():
	image_in = gr.Image(
	label = "Image reference",
	type = "filepath"
	)
	submit_btn = gr.Button("Make LLM system from my pic !")
	with gr.Column():
	result = gr.Textbox(
	label ="Suggested System"
	)

	submit_btn.click(
	fn = infer,
	inputs = [
	image_in
	],
	outputs =[
	result
	]
	)

	demo.queue().launch()