Use local image and quantise the model for low Gpu usage with solution
import os
from PIL import Image
import base64
from io import BytesIO
from huggingface_hub import login
from vllm import LLM
from vllm.sampling_params import SamplingParams
import http.server
import socketserver
import threading
import time
Start a simple HTTP server to serve images
def start_http_server(port=8000, directory="."):
handler = http.server.SimpleHTTPRequestHandler
os.chdir(directory)
httpd = socketserver.TCPServer(("", port), handler)
thread = threading.Thread(target=httpd.serve_forever, daemon=True)
thread.start()
print(f"Serving images at http://localhost:{port}")
return httpd
def get_image_url(local_image_path, port=8000):
# Get the file name
image_name = os.path.basename(local_image_path)
image_url = f"http://localhost:{port}/{image_name}"
return image_url
Main function to handle the image processing and sending to the model
def process_image_with_llm(local_image_path):
login(token="ENTER YOU HF TOCKEN")
# Define the model and sampling parameters
model_name = "mistralai/Pixtral-12B-2409"
sampling_params = SamplingParams(max_tokens=8192)
# Initialize the LLM with adjusted memory settings
llm = LLM(
model=model_name,
tokenizer_mode="mistral",
gpu_memory_utilization=0.75, # Adjust GPU memory utilization
dtype='float16', # Use 16-bit precision
max_model_len=4096 # Adjust max model length if necessary
)
# Define the prompt for extracting details
# prompt = """
"""
# Start the HTTP server to serve the image
port = 8000
httpd = start_http_server(port=port, directory=os.path.dirname(local_image_path))
# Get the URL of the image
image_url = get_image_url(local_image_path, port=port)
# Prepare the input messages for the model
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}}
]
},
]
# Get the model's response
outputs = llm.chat(messages=messages, sampling_params=sampling_params)
# Output the results
print(outputs[0].outputs[0].text)
# Gracefully stop the server after completion
httpd.shutdown()
Example usage
local_image_path = "image local path"
process_image_with_llm(local_image_path)
whether its suited for Mac OS ?
Kindly help me to execute this model in mac os.
It can work like this.
def image_to_data_url(image_path):
with Image.open(image_path) as img:
buffered = BytesIO()
img.save(buffered, format="PNG")
image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
return f"data:image/png;base64,{image_base64}"
def main():
llm = LLM(
model="mistralai/Pixtral-12B-2409",
tokenizer_mode="mistral",
max_model_len=4000
)
prompt = "Describe this image in one sentence."
image_url = image_to_data_url("./local_image.png")
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}}
]
},
]
it not working properly.