Spaces:

Eladlev
/

test_computer_use

Sleeping

App Files Files Community

Eladlev commited on Nov 25, 2024

Commit

1aa017f

verified ·

1 Parent(s): 9b60d66

Upload app.py

Browse files

Files changed (1) hide show

app.py +247 -153

app.py CHANGED Viewed

@@ -1,166 +1,260 @@
-import gradio as gr
-import io
 import os
-from PIL import Image, ImageDraw
-from anthropic import Anthropic
 from anthropic.types import TextBlock
 from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
-max_tokens = 4096
-import base64
-model = 'claude-3-5-sonnet-20241022'
-system = """<SYSTEM_CAPABILITY>
-* You are utilizing a Windows system with internet access.
-* The current date is Monday, November 18, 2024.
-</SYSTEM_CAPABILITY>"""
-def save_image_or_get_url(image, filename="processed_image.png"):
-    if not os.path.isdir("static"):
-        os.mkdir("static")
-    filepath = os.path.join("static", filename)
-    image.save(filepath)
-    return filepath
-def draw_circle_on_image(image, center, radius=30):
-    """
-    Draws a circle on the given image using a center point and radius.
-    Parameters:
-        image (PIL.Image): The image to draw on.
-        center (tuple): A tuple (x, y) representing the center of the circle.
-        radius (int): The radius of the circle.
-    Returns:
-        PIL.Image: The image with the circle drawn.
-    """
-    if not isinstance(center, tuple) or len(center) != 2:
-        raise ValueError("Center must be a tuple of two values (x, y).")
-    if not isinstance(radius, (int, float)) or radius <= 0:
-        raise ValueError("Radius must be a positive number.")
-    # Calculate the bounding box for the circle
-    bbox = [
-        center[0] - radius, center[1] - radius,  # Top-left corner
-        center[0] + radius, center[1] + radius  # Bottom-right corner
-    ]
-    # Create a drawing context
-    draw = ImageDraw.Draw(image)
-    # Draw the circle
-    draw.ellipse(bbox, outline="red", width=15)  # Change outline color and width as needed
-    return image
-def pil_image_to_base64(pil_image):
-    # Save the PIL image to an in-memory buffer as a file-like object
-    buffered = io.BytesIO()
-    pil_image.save(buffered, format="PNG")  # Specify format (e.g., PNG, JPEG)
-    buffered.seek(0)  # Rewind the buffer to the beginning
-    # Encode the bytes from the buffer to Base64
-    image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
-    return image_data
-# Function to simulate chatbot responses
-def chatbot_response(input_text, image, key, chat_history):
-    if not key:
-        return chat_history + [[input_text, "Please enter a valid key."]]
-    if image is None:
-        return chat_history + [[input_text, "Please upload an image."]]
-    api_key =key
-    client = Anthropic(api_key=api_key)
-    messages = [{'role': 'user', 'content': [TextBlock(text=f'Look at my screenshot, {input_text}', type='text')]},
-                {'role': 'assistant', 'content': [BetaTextBlock(
-                    text="I'll help you check your screen, but first I need to take a screenshot to see what you're looking at.",
-                    type='text'), BetaToolUseBlock(id='toolu_01PSTVtavFgmx6ctaiSvacCB',
-                                                   input={'action': 'screenshot'}, name='computer',
-                                                   type='tool_use')]}]
-    image_data = pil_image_to_base64(image)
-    tool_res = {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_01PSTVtavFgmx6ctaiSvacCB',
-                                             'is_error': False,
-                                             'content': [{'type': 'image',
-                                                          'source': {'type': 'base64', 'media_type': 'image/png',
-                                                                     'data': image_data}}]}]}
-    messages.append(tool_res)
-    params = [{'name': 'computer', 'type': 'computer_20241022', 'display_width_px': 1512, 'display_height_px': 982,
-               'display_number': None}, {'type': 'bash_20241022', 'name': 'bash'},
-              {'name': 'str_replace_editor', 'type': 'text_editor_20241022'}]
-    raw_response = client.beta.messages.with_raw_response.create(
-        max_tokens=max_tokens,
-        messages=messages,
-        model=model,
-        system=system,
-        tools=params,
-        betas=["computer-use-2024-10-22"],
-        temperature=0.0,
     )
-    response = raw_response.parse()
-    scale_x = image.width // 1512
-    scale_y = image.height // 982
-    for r in response.content:
-        if hasattr(r, 'text'):
-            chat_history = chat_history + [[input_text, r.text]]
-        if hasattr(r, 'input') and 'coordinate' in r.input:
-            coordinate = r.input['coordinate']
-            new_image = draw_circle_on_image(image, (coordinate[0] * scale_x, coordinate[1] * scale_y))
-            # Save the image or encode it as a base64 string if needed
-            image_url = save_image_or_get_url(
-                new_image)  # Define this function to save or generate the URL for the image
-            # Include the image as part of the chat history
-            image_html = f'<img src="{image_url}" alt="Processed Image" style="max-width: 100%; max-height: 200px;">'
-            chat_history = chat_history + [[None, (image_url,)]]
-    return chat_history
-    # Read the image and encode it in base64
-    # Simulated response
-    response = f"Received input: {input_text}\nKey: {key}\nImage uploaded successfully!"
-    return chat_history + [[input_text, response]]
-# Create the Gradio interface
 with gr.Blocks() as demo:
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(label="Upload Image", type="pil", interactive=True)
-        with gr.Column():
-            chatbot = gr.Chatbot(label="Chatbot Interaction", height=400)
-    with gr.Row():
-        user_input = gr.Textbox(label="Type your message here", placeholder="Enter your message...")
-        key_input = gr.Textbox(label="API Key", placeholder="Enter your key...", type="password")
-    # Button to submit
-    submit_button = gr.Button("Submit")
-    # Initialize chat history
-    chat_history = gr.State(value=[])
-    # Set interactions
-    submit_button.click(
-        fn=chatbot_response,
-        inputs=[user_input, image_input, key_input, chat_history],
-        outputs=[chatbot],
-    )
-# Launch the app
-demo.launch()

+"""
+Entrypoint for Gradio, see https://gradio.app/
+"""
+import asyncio
+import base64
 import os
+from datetime import datetime
+from enum import StrEnum
+from functools import partial
+from pathlib import Path
+from typing import cast, Dict
+import gradio as gr
+from anthropic import APIResponse
 from anthropic.types import TextBlock
 from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
+from anthropic.types.tool_use_block import ToolUseBlock
+from computer_use_demo.loop import (
+    PROVIDER_TO_DEFAULT_MODEL_NAME,
+    APIProvider,
+    sampling_loop,
+    sampling_loop_sync,
+)
+from computer_use_demo.tools import ToolResult
+CONFIG_DIR = Path("~/.anthropic").expanduser()
+API_KEY_FILE = CONFIG_DIR / "api_key"
+WARNING_TEXT = "⚠️ Security Alert: Never provide access to sensitive accounts or data, as malicious web content can hijack Claude's behavior"
+class Sender(StrEnum):
+    USER = "user"
+    BOT = "assistant"
+    TOOL = "tool"
+def setup_state(state):
+    if "messages" not in state:
+        state["messages"] = []
+    if "api_key" not in state:
+        # Try to load API key from file first, then environment
+        state["api_key"] = load_from_storage("api_key") or os.getenv("ANTHROPIC_API_KEY", "")
+        if not state["api_key"]:
+            print("API key not found. Please set it in the environment or storage.")
+    if "provider" not in state:
+        state["provider"] = os.getenv("API_PROVIDER", "anthropic") or APIProvider.ANTHROPIC
+    if "provider_radio" not in state:
+        state["provider_radio"] = state["provider"]
+    if "model" not in state:
+        _reset_model(state)
+    if "auth_validated" not in state:
+        state["auth_validated"] = False
+    if "responses" not in state:
+        state["responses"] = {}
+    if "tools" not in state:
+        state["tools"] = {}
+    if "only_n_most_recent_images" not in state:
+        state["only_n_most_recent_images"] = 10
+    if "custom_system_prompt" not in state:
+        state["custom_system_prompt"] = load_from_storage("system_prompt") or ""
+        # remove if want to use default system prompt
+        state["custom_system_prompt"] += "\n\nNote that you are operating on a Windows machine, so you should use double click to open a desktop application"
+    if "hide_images" not in state:
+        state["hide_images"] = False
+def _reset_model(state):
+    state["model"] = PROVIDER_TO_DEFAULT_MODEL_NAME[cast(APIProvider, state["provider"])]
+async def main(state):
+    """Render loop for Gradio"""
+    setup_state(state)
+    return "Setup completed"
+def validate_auth(provider: APIProvider, api_key: str | None):
+    if provider == APIProvider.ANTHROPIC:
+        if not api_key:
+            return "Enter your Anthropic API key to continue."
+    if provider == APIProvider.BEDROCK:
+        import boto3
+        if not boto3.Session().get_credentials():
+            return "You must have AWS credentials set up to use the Bedrock API."
+    if provider == APIProvider.VERTEX:
+        import google.auth
+        from google.auth.exceptions import DefaultCredentialsError
+        if not os.environ.get("CLOUD_ML_REGION"):
+            return "Set the CLOUD_ML_REGION environment variable to use the Vertex API."
+        try:
+            google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
+        except DefaultCredentialsError:
+            return "Your google cloud credentials are not set up correctly."
+def load_from_storage(filename: str) -> str | None:
+    """Load data from a file in the storage directory."""
+    try:
+        file_path = CONFIG_DIR / filename
+        if file_path.exists():
+            data = file_path.read_text().strip()
+            if data:
+                return data
+    except Exception as e:
+        print(f"Debug: Error loading {filename}: {e}")
+    return None
+def save_to_storage(filename: str, data: str) -> None:
+    """Save data to a file in the storage directory."""
+    try:
+        CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+        file_path = CONFIG_DIR / filename
+        file_path.write_text(data)
+        # Ensure only user can read/write the file
+        file_path.chmod(0o600)
+    except Exception as e:
+        print(f"Debug: Error saving {filename}: {e}")
+def _api_response_callback(response: APIResponse[BetaMessage], response_state: dict):
+    response_id = datetime.now().isoformat()
+    response_state[response_id] = response
+def _tool_output_callback(tool_output: ToolResult, tool_id: str, tool_state: dict):
+    tool_state[tool_id] = tool_output
+def _render_message(sender: Sender, message: str | BetaTextBlock | BetaToolUseBlock | ToolResult, state):
+    is_tool_result = not isinstance(message, str) and (
+        isinstance(message, ToolResult)
+        or message.__class__.__name__ == "ToolResult"
+        or message.__class__.__name__ == "CLIResult"
+    )
+    if not message or (
+        is_tool_result
+        and state["hide_images"]
+        and not hasattr(message, "error")
+        and not hasattr(message, "output")
+    ):
+        return
+    if is_tool_result:
+        message = cast(ToolResult, message)
+        if message.output:
+            return message.output
+        if message.error:
+            return f"Error: {message.error}"
+        if message.base64_image and not state["hide_images"]:
+            return base64.b64decode(message.base64_image)
+    elif isinstance(message, BetaTextBlock) or isinstance(message, TextBlock):
+        return message.text
+    elif isinstance(message, BetaToolUseBlock) or isinstance(message, ToolUseBlock):
+        return f"Tool Use: {message.name}\nInput: {message.input}"
+    else:
+        return message
+# open new tab, open google sheets inside, then create a new blank spreadsheet
+def process_input(user_input, state):
+    # Ensure the state is properly initialized
+    setup_state(state)
+    # Append the user input to the messages in the state
+    state["messages"].append(
+        {
+            "role": Sender.USER,
+            "content": [TextBlock(type="text", text=user_input)],
+        }
     )
+    # Run the sampling loop synchronously and yield messages
+    for message in sampling_loop(state):
+        yield message
+def accumulate_messages(*args, **kwargs):
+    """
+    Wrapper function to accumulate messages from sampling_loop_sync.
+    """
+    accumulated_messages = []
+    for message in sampling_loop_sync(*args, **kwargs):
+        # Check if the message is already in the accumulated messages
+        if message not in accumulated_messages:
+            accumulated_messages.append(message)
+            # Yield the accumulated messages as a list
+            yield accumulated_messages
+def sampling_loop(state):
+    # Ensure the API key is present
+    if not state.get("api_key"):
+        raise ValueError("API key is missing. Please set it in the environment or storage.")
+    # Call the sampling loop and yield messages
+    for message in accumulate_messages(
+        system_prompt_suffix=state["custom_system_prompt"],
+        model=state["model"],
+        provider=state["provider"],
+        messages=state["messages"],
+        output_callback=partial(_render_message, Sender.BOT, state=state),
+        tool_output_callback=partial(_tool_output_callback, tool_state=state["tools"]),
+        api_response_callback=partial(_api_response_callback, response_state=state["responses"]),
+        api_key=state["api_key"],
+        only_n_most_recent_images=state["only_n_most_recent_images"],
+    ):
+        yield message
 with gr.Blocks() as demo:
+    state = gr.State({})  # Use Gradio's state management
+    gr.Markdown("# Claude Computer Use Demo")
+    if not os.getenv("HIDE_WARNING", False):
+        gr.Markdown(WARNING_TEXT)
+    with gr.Row():
+        provider = gr.Dropdown(
+            label="API Provider",
+            choices=[option.value for option in APIProvider],
+            value="anthropic",
+            interactive=True,
+        )
+        model = gr.Textbox(label="Model", value="claude-3-5-sonnet-20241022")
+        api_key = gr.Textbox(
+            label="Anthropic API Key",
+            type="password",
+            value="",
+            interactive=True,
+        )
+        only_n_images = gr.Slider(
+            label="Only send N most recent images",
+            minimum=0,
+            value=10,
+            interactive=True,
+        )
+        custom_prompt = gr.Textbox(
+            label="Custom System Prompt Suffix",
+            value="",
+            interactive=True,
+        )
+        hide_images = gr.Checkbox(label="Hide screenshots", value=False)
+    api_key.change(fn=lambda key: save_to_storage(API_KEY_FILE, key), inputs=api_key)
+    chat_input = gr.Textbox(label="Type a message to send to Claude...")
+    # chat_output = gr.Textbox(label="Chat Output", interactive=False)
+    chatbot = gr.Chatbot(label="Chatbot History")
+    # Pass state as an input to the function
+    chat_input.submit(process_input, [chat_input, state], chatbot)
+demo.launch(share=True)