Spaces:

Eladlev
/

simple_visual_agent

Sleeping

File size: 5,633 Bytes

from langchain.agents import create_tool_calling_agent
from langchain.agents import AgentExecutor
import os
from langchain_openai import ChatOpenAI
from langchain.agents import  Tool
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
import base64
from PIL import Image
import io


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

os.environ["SERPER_API_KEY"] = '23'
os.environ['OPENAI_API_KEY'] = "skc"

llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY'])
search = GoogleSerperAPIWrapper()
tools = [
    Tool(
        name="web_search",
        func=search.run,
        description="useful for when you need to extract **updated** information from the web"
    )
]

# prompt = ChatPromptTemplate.from_messages([
#     self.system_prompt,
#     self.source_prompt,
#     self.generate_eval_message(url)])

agent_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.",
        ),
        ("human", "placeholder"),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)

agent = create_tool_calling_agent(llm, tools, agent_prompt)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

import gradio as gr
import os
from openai import OpenAI

with gr.Blocks() as demo:
    with gr.Row():
        image = gr.Image(label="image", height=600)
        chatbot = gr.Chatbot()

    prompt = gr.Textbox(label="prompt")
    serper_api = gr.Textbox(label="Serper API key")
    openai_key = gr.Textbox(label="OpenAI API key")
    gr.Examples(
        examples=[
            ["https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png",
             "Describe what is in the image",
             "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png"]
        ],
        inputs=[image, prompt],
    )


    def respond(message, chat_history, image):
        # Convert NumPy array to an Image object



        agent_input_history = []
        for c in chat_history:
            agent_input_history.extend([HumanMessage(content=c[0]), AIMessage(content=c[1])])

        out = agent_executor.invoke(
            {
                "input": message,
                "chat_history": agent_input_history,
            }
        )

        chat_history.append((message, out['output']))
        return "", chat_history


    def update_serper_api(serper_api):
        print(os.environ['OPENAI_API_KEY'])
        print(serper_api)
        os.environ["SERPER_API_KEY"] = serper_api
        search = GoogleSerperAPIWrapper()
        global tools
        tools = [
            Tool(
                name="web_search",
                func=search.run,
                description="useful for when you need to extract **updated** information from the web"
            )
        ]
        agent = create_tool_calling_agent(llm, tools, agent_prompt)
        global agent_executor
        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


    def update_agent(openai_key):
        os.environ['OPENAI_API_KEY'] = openai_key
        print(os.environ['OPENAI_API_KEY'])
        global llm
        llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY'])
        agent = create_tool_calling_agent(llm, tools, agent_prompt)
        global agent_executor
        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

    def change_image(image):
        image_pil = Image.fromarray(image)

        # Save the image to a bytes buffer
        buffer = io.BytesIO()
        image_pil.save(buffer, format="PNG")  # You can also use "JPEG" if needed

        # Get the byte data from the buffer and encode it to base64
        image_bytes = buffer.getvalue()
        image_base64 = base64.b64encode(image_bytes).decode('utf-8')
        message_content = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,"
                                                                                      f"{image_base64}"}}]
        image_message = HumanMessage(content=message_content)
        global agent_prompt
        agent_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.",
                ),
                image_message,
                ("placeholder", "{chat_history}"),
                ("human", "{input}"),
                ("placeholder", "{agent_scratchpad}"),
            ]
        )



        agent = create_tool_calling_agent(llm, tools, agent_prompt)
        global agent_executor
        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


    prompt.submit(respond, [prompt, chatbot, image], [prompt, chatbot])
    openai_key.submit(update_agent, [openai_key], [])
    serper_api.submit(update_serper_api, [serper_api], [])
    image.change(change_image,[image],[])
demo.queue().launch(share=True)