File size: 5,633 Bytes
2e4600f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39ac8db
 
2e4600f
 
 
 
 
339560f
2e4600f
 
 
 
 
 
 
 
 
 
 
39ac8db
 
2e4600f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from langchain.agents import create_tool_calling_agent
from langchain.agents import AgentExecutor
import os
from langchain_openai import ChatOpenAI
from langchain.agents import  Tool
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
import base64
from PIL import Image
import io


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

os.environ["SERPER_API_KEY"] = '23'
os.environ['OPENAI_API_KEY'] = "skc"

llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY'])
search = GoogleSerperAPIWrapper()
tools = [
    Tool(
        name="web_search",
        func=search.run,
        description="useful for when you need to extract **updated** information from the web"
    )
]

# prompt = ChatPromptTemplate.from_messages([
#     self.system_prompt,
#     self.source_prompt,
#     self.generate_eval_message(url)])

agent_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.",
        ),
        ("human", "placeholder"),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)

agent = create_tool_calling_agent(llm, tools, agent_prompt)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

import gradio as gr
import os
from openai import OpenAI

with gr.Blocks() as demo:
    with gr.Row():
        image = gr.Image(label="image", height=600)
        chatbot = gr.Chatbot()

    prompt = gr.Textbox(label="prompt")
    serper_api = gr.Textbox(label="Serper API key")
    openai_key = gr.Textbox(label="OpenAI API key")
    gr.Examples(
        examples=[
            ["https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png",
             "Describe what is in the image",
             "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png"]
        ],
        inputs=[image, prompt],
    )


    def respond(message, chat_history, image):
        # Convert NumPy array to an Image object



        agent_input_history = []
        for c in chat_history:
            agent_input_history.extend([HumanMessage(content=c[0]), AIMessage(content=c[1])])

        out = agent_executor.invoke(
            {
                "input": message,
                "chat_history": agent_input_history,
            }
        )

        chat_history.append((message, out['output']))
        return "", chat_history


    def update_serper_api(serper_api):
        print(os.environ['OPENAI_API_KEY'])
        print(serper_api)
        os.environ["SERPER_API_KEY"] = serper_api
        search = GoogleSerperAPIWrapper()
        global tools
        tools = [
            Tool(
                name="web_search",
                func=search.run,
                description="useful for when you need to extract **updated** information from the web"
            )
        ]
        agent = create_tool_calling_agent(llm, tools, agent_prompt)
        global agent_executor
        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


    def update_agent(openai_key):
        os.environ['OPENAI_API_KEY'] = openai_key
        print(os.environ['OPENAI_API_KEY'])
        global llm
        llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=os.environ['OPENAI_API_KEY'])
        agent = create_tool_calling_agent(llm, tools, agent_prompt)
        global agent_executor
        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

    def change_image(image):
        image_pil = Image.fromarray(image)

        # Save the image to a bytes buffer
        buffer = io.BytesIO()
        image_pil.save(buffer, format="PNG")  # You can also use "JPEG" if needed

        # Get the byte data from the buffer and encode it to base64
        image_bytes = buffer.getvalue()
        image_base64 = base64.b64encode(image_bytes).decode('utf-8')
        message_content = [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,"
                                                                                      f"{image_base64}"}}]
        image_message = HumanMessage(content=message_content)
        global agent_prompt
        agent_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are a helpful assistant. You are provided with an image an image and a question about the image. You should answer the question. You should use the Web search tool to find the most updated information.",
                ),
                image_message,
                ("placeholder", "{chat_history}"),
                ("human", "{input}"),
                ("placeholder", "{agent_scratchpad}"),
            ]
        )



        agent = create_tool_calling_agent(llm, tools, agent_prompt)
        global agent_executor
        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


    prompt.submit(respond, [prompt, chatbot, image], [prompt, chatbot])
    openai_key.submit(update_agent, [openai_key], [])
    serper_api.submit(update_serper_api, [serper_api], [])
    image.change(change_image,[image],[])
demo.queue().launch(share=True)