Spaces:

Eladlev
/

simple_visual_agent

Sleeping

App Files Files Community

simple_visual_agent / app.py

Eladlev

Upload app.py

39ac8db verified 4 months ago

raw

history blame contribute delete

5.63 kB

	from langchain.agents import create_tool_calling_agent
	from langchain.agents import AgentExecutor
	import os
	from langchain_openai import ChatOpenAI
	from langchain.agents import Tool
	from langchain_community.utilities import GoogleSerperAPIWrapper
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.messages import HumanMessage, AIMessage
	import base64
	from PIL import Image
	import io


	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	os.environ["SERPER_API_KEY"] = '23'
	os.environ['OPENAI_API_KEY'] = "skc"

	llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY'])
	search = GoogleSerperAPIWrapper()
	tools = [
	Tool(
	name="web_search",
	func=search.run,
	description="useful for when you need to extract updated information from the web"
	)
	]

	# prompt = ChatPromptTemplate.from_messages([
	# self.system_prompt,
	# self.source_prompt,
	# self.generate_eval_message(url)])

	agent_prompt = ChatPromptTemplate.from_messages(
	[
	(
	"system",
	"You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.",
	),
	("human", "placeholder"),
	("placeholder", "{chat_history}"),
	("human", "{input}"),
	("placeholder", "{agent_scratchpad}"),
	]
	)

	agent = create_tool_calling_agent(llm, tools, agent_prompt)

	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

	import gradio as gr
	import os
	from openai import OpenAI

	with gr.Blocks() as demo:
	with gr.Row():
	image = gr.Image(label="image", height=600)
	chatbot = gr.Chatbot()

	prompt = gr.Textbox(label="prompt")
	serper_api = gr.Textbox(label="Serper API key")
	openai_key = gr.Textbox(label="OpenAI API key")
	gr.Examples(
	examples=[
	["https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png",
	"Describe what is in the image",
	"https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png"]
	],
	inputs=[image, prompt],
	)


	def respond(message, chat_history, image):
	# Convert NumPy array to an Image object



	agent_input_history = []
	for c in chat_history:
	agent_input_history.extend([HumanMessage(content=c[0]), AIMessage(content=c[1])])

	out = agent_executor.invoke(
	{
	"input": message,
	"chat_history": agent_input_history,
	}
	)

	chat_history.append((message, out['output']))
	return "", chat_history


	def update_serper_api(serper_api):
	print(os.environ['OPENAI_API_KEY'])
	print(serper_api)
	os.environ["SERPER_API_KEY"] = serper_api
	search = GoogleSerperAPIWrapper()
	global tools
	tools = [
	Tool(
	name="web_search",
	func=search.run,
	description="useful for when you need to extract updated information from the web"
	)
	]
	agent = create_tool_calling_agent(llm, tools, agent_prompt)
	global agent_executor
	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


	def update_agent(openai_key):
	os.environ['OPENAI_API_KEY'] = openai_key
	print(os.environ['OPENAI_API_KEY'])
	global llm
	llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY'])
	agent = create_tool_calling_agent(llm, tools, agent_prompt)
	global agent_executor
	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

	def change_image(image):
	image_pil = Image.fromarray(image)

	# Save the image to a bytes buffer
	buffer = io.BytesIO()
	image_pil.save(buffer, format="PNG") # You can also use "JPEG" if needed

	# Get the byte data from the buffer and encode it to base64
	image_bytes = buffer.getvalue()
	image_base64 = base64.b64encode(image_bytes).decode('utf-8')
	message_content = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,"
	f"{image_base64}"}}]
	image_message = HumanMessage(content=message_content)
	global agent_prompt
	agent_prompt = ChatPromptTemplate.from_messages(
	[
	(
	"system",
	"You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.",
	),
	image_message,
	("placeholder", "{chat_history}"),
	("human", "{input}"),
	("placeholder", "{agent_scratchpad}"),
	]
	)



	agent = create_tool_calling_agent(llm, tools, agent_prompt)
	global agent_executor
	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


	prompt.submit(respond, [prompt, chatbot, image], [prompt, chatbot])
	openai_key.submit(update_agent, [openai_key], [])
	serper_api.submit(update_serper_api, [serper_api], [])
	image.change(change_image,[image],[])
	demo.queue().launch(share=True)