File size: 6,142 Bytes
e245801
4311882
 
 
 
e245801
 
4311882
 
 
 
 
 
 
 
 
9b60d66
 
4311882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e245801
4311882
 
 
 
 
 
e245801
4311882
 
 
e245801
4311882
 
 
e245801
4311882
 
 
 
e245801
4311882
e245801
 
 
4311882
 
 
 
 
 
 
 
 
 
 
 
 
 
e245801
 
4311882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import gradio as gr
import io
import os
from PIL import Image, ImageDraw
from anthropic import Anthropic
from anthropic.types import TextBlock
from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
max_tokens = 4096
import base64
model = 'claude-3-5-sonnet-20241022'
system = """<SYSTEM_CAPABILITY>
* You are utilizing a Windows system with internet access.
* The current date is Monday, November 18, 2024.
</SYSTEM_CAPABILITY>"""

def save_image_or_get_url(image, filename="processed_image.png"):
    if not os.path.isdir("static"):
        os.mkdir("static")
    filepath = os.path.join("static", filename)
    image.save(filepath)
    return filepath

def draw_circle_on_image(image, center, radius=30):
    """
    Draws a circle on the given image using a center point and radius.

    Parameters:
        image (PIL.Image): The image to draw on.
        center (tuple): A tuple (x, y) representing the center of the circle.
        radius (int): The radius of the circle.

    Returns:
        PIL.Image: The image with the circle drawn.
    """
    if not isinstance(center, tuple) or len(center) != 2:
        raise ValueError("Center must be a tuple of two values (x, y).")
    if not isinstance(radius, (int, float)) or radius <= 0:
        raise ValueError("Radius must be a positive number.")

    # Calculate the bounding box for the circle
    bbox = [
        center[0] - radius, center[1] - radius,  # Top-left corner
        center[0] + radius, center[1] + radius  # Bottom-right corner
    ]

    # Create a drawing context
    draw = ImageDraw.Draw(image)

    # Draw the circle
    draw.ellipse(bbox, outline="red", width=15)  # Change outline color and width as needed

    return image


def pil_image_to_base64(pil_image):
    # Save the PIL image to an in-memory buffer as a file-like object
    buffered = io.BytesIO()
    pil_image.save(buffered, format="PNG")  # Specify format (e.g., PNG, JPEG)
    buffered.seek(0)  # Rewind the buffer to the beginning

    # Encode the bytes from the buffer to Base64
    image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return image_data







# Function to simulate chatbot responses
def chatbot_response(input_text, image, key, chat_history):

    if not key:
        return chat_history + [[input_text, "Please enter a valid key."]]
    if image is None:
        return chat_history + [[input_text, "Please upload an image."]]
    api_key =key
    client = Anthropic(api_key=api_key)



    messages = [{'role': 'user', 'content': [TextBlock(text=f'Look at my screenshot, {input_text}', type='text')]},
                {'role': 'assistant', 'content': [BetaTextBlock(
                    text="I'll help you check your screen, but first I need to take a screenshot to see what you're looking at.",
                    type='text'), BetaToolUseBlock(id='toolu_01PSTVtavFgmx6ctaiSvacCB',
                                                   input={'action': 'screenshot'}, name='computer',
                                                   type='tool_use')]}]
    image_data = pil_image_to_base64(image)

    tool_res = {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_01PSTVtavFgmx6ctaiSvacCB',
                                             'is_error': False,
                                             'content': [{'type': 'image',
                                                          'source': {'type': 'base64', 'media_type': 'image/png',
                                                                     'data': image_data}}]}]}
    messages.append(tool_res)
    params = [{'name': 'computer', 'type': 'computer_20241022', 'display_width_px': 1512, 'display_height_px': 982,
               'display_number': None}, {'type': 'bash_20241022', 'name': 'bash'},
              {'name': 'str_replace_editor', 'type': 'text_editor_20241022'}]
    raw_response = client.beta.messages.with_raw_response.create(
        max_tokens=max_tokens,
        messages=messages,
        model=model,
        system=system,
        tools=params,
        betas=["computer-use-2024-10-22"],
        temperature=0.0,
    )
    response = raw_response.parse()
    scale_x = image.width // 1512
    scale_y = image.height // 982
    for r in response.content:
        if hasattr(r, 'text'):
            chat_history = chat_history + [[input_text, r.text]]

        if hasattr(r, 'input') and 'coordinate' in r.input:
            coordinate = r.input['coordinate']
            new_image = draw_circle_on_image(image, (coordinate[0] * scale_x, coordinate[1] * scale_y))

            # Save the image or encode it as a base64 string if needed
            image_url = save_image_or_get_url(
                new_image)  # Define this function to save or generate the URL for the image

            # Include the image as part of the chat history
            image_html = f'<img src="{image_url}" alt="Processed Image" style="max-width: 100%; max-height: 200px;">'
            chat_history = chat_history + [[None, (image_url,)]]
    return chat_history

    # Read the image and encode it in base64





    # Simulated response
    response = f"Received input: {input_text}\nKey: {key}\nImage uploaded successfully!"
    return chat_history + [[input_text, response]]


# Create the Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(label="Upload Image", type="pil", interactive=True)
        with gr.Column():
            chatbot = gr.Chatbot(label="Chatbot Interaction", height=400)

    with gr.Row():
        user_input = gr.Textbox(label="Type your message here", placeholder="Enter your message...")
        key_input = gr.Textbox(label="API Key", placeholder="Enter your key...", type="password")

    # Button to submit
    submit_button = gr.Button("Submit")

    # Initialize chat history
    chat_history = gr.State(value=[])

    # Set interactions
    submit_button.click(
        fn=chatbot_response,
        inputs=[user_input, image_input, key_input, chat_history],
        outputs=[chatbot],
    )

# Launch the app
demo.launch()