File size: 7,035 Bytes
c87c295 |
|
import openai
import base64
import os
import io
import time
from PIL import Image
from abc import ABCMeta, abstractmethod
def create_vision_chat_completion(vision_model, base64_image, prompt):
try:
response = openai.ChatCompletion.create(
model=vision_model,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
],
max_tokens=1000,
)
return response.choices[0].message.content
except:
return None
def create_image(prompt):
try:
response = openai.Image.create(
model="dall-e-3",
prompt=prompt,
response_format="b64_json"
)
return response.data[0]['b64_json']
except:
return None
def image_to_base64(path):
try:
_, suffix = os.path.splitext(path)
if suffix not in {'.jpg', '.jpeg', '.png', '.webp'}:
img = Image.open(path)
img_png = img.convert('RGB')
img_png.tobytes()
byte_buffer = io.BytesIO()
img_png.save(byte_buffer, 'PNG')
encoded_string = base64.b64encode(byte_buffer.getvalue()).decode('utf-8')
else:
with open(path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
return encoded_string
except:
return None
def base64_to_image_bytes(image_base64):
try:
return base64.b64decode(image_base64)
except:
return None
def inquire_image(work_dir, vision_model, path, prompt):
image_base64 = image_to_base64(f'{work_dir}/{path}')
hypertext_to_display = None
if image_base64 is None:
return "Error: Image transform error", None
else:
response = create_vision_chat_completion(vision_model, image_base64, prompt)
if response is None:
return "Model response error", None
else:
return response, hypertext_to_display
def dalle(unique_id, prompt):
img_base64 = create_image(prompt)
text_to_gpt = "Image has been successfully generated and displayed to user."
if img_base64 is None:
return "Error: Model response error", None
img_bytes = base64_to_image_bytes(img_base64)
if img_bytes is None:
return "Error: Image transform error", None
temp_path = f'cache/temp_{unique_id}'
if not os.path.exists(temp_path):
os.mkdir(temp_path)
path = f'{temp_path}/{hash(time.time())}.png'
with open(path, 'wb') as f:
f.write(img_bytes)
hypertext_to_display = f'<img src=\"file={path}\" width="50%" style=\'max-width:none; max-height:none\'>'
return text_to_gpt, hypertext_to_display
class Tool(metaclass=ABCMeta):
def __init__(self, config):
self.config = config
@abstractmethod
def support(self):
pass
@abstractmethod
def get_tool_data(self):
pass
class ImageInquireTool(Tool):
def support(self):
return self.config['model']['GPT-4V']['available']
def get_tool_data(self):
return {
"tool_name": "inquire_image",
"tool": inquire_image,
"system_prompt": "If necessary, utilize the 'inquire_image' tool to query an AI model regarding the "
"content of images uploaded by users. Avoid phrases like\"based on the analysis\"; "
"instead, respond as if you viewed the image by yourself. Keep in mind that not every"
"tasks related to images require knowledge of the image content, such as converting "
"an image format or extracting image file attributes, which should use `execute_code` "
"tool instead. Use the tool only when understanding the image content is necessary.",
"tool_description": {
"name": "inquire_image",
"description": "This function enables you to inquire with an AI model about the contents of an image "
"and receive the model's response.",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "File path of the image"
},
"prompt": {
"type": "string",
"description": "The question you want to pose to the AI model about the image"
}
},
"required": ["path", "prompt"]
}
},
"additional_parameters": {
"work_dir": lambda bot_backend: bot_backend.jupyter_work_dir,
"vision_model": self.config['model']['GPT-4V']['model_name']
}
}
class DALLETool(Tool):
def support(self):
return True
def get_tool_data(self):
return {
"tool_name": "dalle",
"tool": dalle,
"system_prompt": "If user ask you to generate an art image, you can translate user's requirements into a "
"prompt and sending it to the `dalle` tool. Please note that this tool is specifically "
"designed for creating art images. For scientific figures, such as plots, please use the "
"Python code execution tool `execute_code` instead.",
"tool_description": {
"name": "dalle",
"description": "This function allows you to access OpenAI's DALL·E-3 model for image generation.",
"parameters": {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "A detailed description of the image you want to generate, should be in "
"English only. "
}
},
"required": ["prompt"]
}
},
"additional_parameters": {
"unique_id": lambda bot_backend: bot_backend.unique_id,
}
}
def get_available_tools(config):
tools = [ImageInquireTool]
available_tools = []
for tool in tools:
tool_instance = tool(config)
if tool_instance.support():
available_tools.append(tool_instance.get_tool_data())
return available_tools
|