from langchain.agents import create_tool_calling_agent from langchain.agents import AgentExecutor import os from langchain_openai import ChatOpenAI from langchain.agents import Tool from langchain_community.utilities import GoogleSerperAPIWrapper from langchain_core.prompts import ChatPromptTemplate from langchain_core.messages import HumanMessage, AIMessage import base64 from PIL import Image import io def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') os.environ["SERPER_API_KEY"] = '23' os.environ['OPENAI_API_KEY'] = "skc" llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY']) search = GoogleSerperAPIWrapper() tools = [ Tool( name="web_search", func=search.run, description="useful for when you need to extract **updated** information from the web" ) ] # prompt = ChatPromptTemplate.from_messages([ # self.system_prompt, # self.source_prompt, # self.generate_eval_message(url)]) agent_prompt = ChatPromptTemplate.from_messages( [ ( "system", "You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.", ), ("human", "placeholder"), ("placeholder", "{chat_history}"), ("human", "{input}"), ("placeholder", "{agent_scratchpad}"), ] ) agent = create_tool_calling_agent(llm, tools, agent_prompt) agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) import gradio as gr import os from openai import OpenAI with gr.Blocks() as demo: with gr.Row(): image = gr.Image(label="image", height=600) chatbot = gr.Chatbot() prompt = gr.Textbox(label="prompt") serper_api = gr.Textbox(label="Serper API key") openai_key = gr.Textbox(label="OpenAI API key") gr.Examples( examples=[ ["https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png", "Describe what is in the image", "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png"] ], inputs=[image, prompt], ) def respond(message, chat_history, image): # Convert NumPy array to an Image object agent_input_history = [] for c in chat_history: agent_input_history.extend([HumanMessage(content=c[0]), AIMessage(content=c[1])]) out = agent_executor.invoke( { "input": message, "chat_history": agent_input_history, } ) chat_history.append((message, out['output'])) return "", chat_history def update_serper_api(serper_api): print(os.environ['OPENAI_API_KEY']) print(serper_api) os.environ["SERPER_API_KEY"] = serper_api search = GoogleSerperAPIWrapper() global tools tools = [ Tool( name="web_search", func=search.run, description="useful for when you need to extract **updated** information from the web" ) ] agent = create_tool_calling_agent(llm, tools, agent_prompt) global agent_executor agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) def update_agent(openai_key): os.environ['OPENAI_API_KEY'] = openai_key print(os.environ['OPENAI_API_KEY']) global llm llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY']) agent = create_tool_calling_agent(llm, tools, agent_prompt) global agent_executor agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) def change_image(image): image_pil = Image.fromarray(image) # Save the image to a bytes buffer buffer = io.BytesIO() image_pil.save(buffer, format="PNG") # You can also use "JPEG" if needed # Get the byte data from the buffer and encode it to base64 image_bytes = buffer.getvalue() image_base64 = base64.b64encode(image_bytes).decode('utf-8') message_content = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64," f"{image_base64}"}}] image_message = HumanMessage(content=message_content) global agent_prompt agent_prompt = ChatPromptTemplate.from_messages( [ ( "system", "You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.", ), image_message, ("placeholder", "{chat_history}"), ("human", "{input}"), ("placeholder", "{agent_scratchpad}"), ] ) agent = create_tool_calling_agent(llm, tools, agent_prompt) global agent_executor agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) prompt.submit(respond, [prompt, chatbot, image], [prompt, chatbot]) openai_key.submit(update_agent, [openai_key], []) serper_api.submit(update_serper_api, [serper_api], []) image.change(change_image,[image],[]) demo.queue().launch(share=True)