File size: 7,035 Bytes
c87c295 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import openai
import base64
import os
import io
import time
from PIL import Image
from abc import ABCMeta, abstractmethod
def create_vision_chat_completion(vision_model, base64_image, prompt):
try:
response = openai.ChatCompletion.create(
model=vision_model,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
],
max_tokens=1000,
)
return response.choices[0].message.content
except:
return None
def create_image(prompt):
try:
response = openai.Image.create(
model="dall-e-3",
prompt=prompt,
response_format="b64_json"
)
return response.data[0]['b64_json']
except:
return None
def image_to_base64(path):
try:
_, suffix = os.path.splitext(path)
if suffix not in {'.jpg', '.jpeg', '.png', '.webp'}:
img = Image.open(path)
img_png = img.convert('RGB')
img_png.tobytes()
byte_buffer = io.BytesIO()
img_png.save(byte_buffer, 'PNG')
encoded_string = base64.b64encode(byte_buffer.getvalue()).decode('utf-8')
else:
with open(path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
return encoded_string
except:
return None
def base64_to_image_bytes(image_base64):
try:
return base64.b64decode(image_base64)
except:
return None
def inquire_image(work_dir, vision_model, path, prompt):
image_base64 = image_to_base64(f'{work_dir}/{path}')
hypertext_to_display = None
if image_base64 is None:
return "Error: Image transform error", None
else:
response = create_vision_chat_completion(vision_model, image_base64, prompt)
if response is None:
return "Model response error", None
else:
return response, hypertext_to_display
def dalle(unique_id, prompt):
img_base64 = create_image(prompt)
text_to_gpt = "Image has been successfully generated and displayed to user."
if img_base64 is None:
return "Error: Model response error", None
img_bytes = base64_to_image_bytes(img_base64)
if img_bytes is None:
return "Error: Image transform error", None
temp_path = f'cache/temp_{unique_id}'
if not os.path.exists(temp_path):
os.mkdir(temp_path)
path = f'{temp_path}/{hash(time.time())}.png'
with open(path, 'wb') as f:
f.write(img_bytes)
hypertext_to_display = f'<img src=\"file={path}\" width="50%" style=\'max-width:none; max-height:none\'>'
return text_to_gpt, hypertext_to_display
class Tool(metaclass=ABCMeta):
def __init__(self, config):
self.config = config
@abstractmethod
def support(self):
pass
@abstractmethod
def get_tool_data(self):
pass
class ImageInquireTool(Tool):
def support(self):
return self.config['model']['GPT-4V']['available']
def get_tool_data(self):
return {
"tool_name": "inquire_image",
"tool": inquire_image,
"system_prompt": "If necessary, utilize the 'inquire_image' tool to query an AI model regarding the "
"content of images uploaded by users. Avoid phrases like\"based on the analysis\"; "
"instead, respond as if you viewed the image by yourself. Keep in mind that not every"
"tasks related to images require knowledge of the image content, such as converting "
"an image format or extracting image file attributes, which should use `execute_code` "
"tool instead. Use the tool only when understanding the image content is necessary.",
"tool_description": {
"name": "inquire_image",
"description": "This function enables you to inquire with an AI model about the contents of an image "
"and receive the model's response.",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "File path of the image"
},
"prompt": {
"type": "string",
"description": "The question you want to pose to the AI model about the image"
}
},
"required": ["path", "prompt"]
}
},
"additional_parameters": {
"work_dir": lambda bot_backend: bot_backend.jupyter_work_dir,
"vision_model": self.config['model']['GPT-4V']['model_name']
}
}
class DALLETool(Tool):
def support(self):
return True
def get_tool_data(self):
return {
"tool_name": "dalle",
"tool": dalle,
"system_prompt": "If user ask you to generate an art image, you can translate user's requirements into a "
"prompt and sending it to the `dalle` tool. Please note that this tool is specifically "
"designed for creating art images. For scientific figures, such as plots, please use the "
"Python code execution tool `execute_code` instead.",
"tool_description": {
"name": "dalle",
"description": "This function allows you to access OpenAI's DALL·E-3 model for image generation.",
"parameters": {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "A detailed description of the image you want to generate, should be in "
"English only. "
}
},
"required": ["prompt"]
}
},
"additional_parameters": {
"unique_id": lambda bot_backend: bot_backend.unique_id,
}
}
def get_available_tools(config):
tools = [ImageInquireTool]
available_tools = []
for tool in tools:
tool_instance = tool(config)
if tool_instance.support():
available_tools.append(tool_instance.get_tool_data())
return available_tools
|