vllm

Use local image and quantise the model for low Gpu usage with solution

#11
by faizan4458 - opened

import os
from PIL import Image
import base64
from io import BytesIO
from huggingface_hub import login
from vllm import LLM
from vllm.sampling_params import SamplingParams
import http.server
import socketserver
import threading
import time

Start a simple HTTP server to serve images

def start_http_server(port=8000, directory="."):
handler = http.server.SimpleHTTPRequestHandler
os.chdir(directory)

httpd = socketserver.TCPServer(("", port), handler)

thread = threading.Thread(target=httpd.serve_forever, daemon=True)
thread.start()

print(f"Serving images at http://localhost:{port}")
return httpd

def get_image_url(local_image_path, port=8000):
# Get the file name
image_name = os.path.basename(local_image_path)

image_url = f"http://localhost:{port}/{image_name}"
return image_url

Main function to handle the image processing and sending to the model

def process_image_with_llm(local_image_path):
login(token="ENTER YOU HF TOCKEN")

# Define the model and sampling parameters
model_name = "mistralai/Pixtral-12B-2409"
sampling_params = SamplingParams(max_tokens=8192)

# Initialize the LLM with adjusted memory settings
llm = LLM(
    model=model_name,
    tokenizer_mode="mistral",
    gpu_memory_utilization=0.75,  # Adjust GPU memory utilization
    dtype='float16',  # Use 16-bit precision
    max_model_len=4096  # Adjust max model length if necessary
)

# Define the prompt for extracting details


# prompt = """

"""

# Start the HTTP server to serve the image
port = 8000
httpd = start_http_server(port=port, directory=os.path.dirname(local_image_path))

# Get the URL of the image
image_url = get_image_url(local_image_path, port=port)

# Prepare the input messages for the model
messages = [
    {
        "role": "user",
        "content": [
            {"type": "text", "text": prompt},
            {"type": "image_url", "image_url": {"url": image_url}}
        ]
    },
]

# Get the model's response
outputs = llm.chat(messages=messages, sampling_params=sampling_params)

# Output the results
print(outputs[0].outputs[0].text)

# Gracefully stop the server after completion
httpd.shutdown()

Example usage

local_image_path = "image local path"
process_image_with_llm(local_image_path)

whether its suited for Mac OS ?
Kindly help me to execute this model in mac os.

It can work like this.

def image_to_data_url(image_path):
    with Image.open(image_path) as img:
        buffered = BytesIO()
        img.save(buffered, format="PNG")
        image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
        return f"data:image/png;base64,{image_base64}"


def main():

    llm = LLM(
        model="mistralai/Pixtral-12B-2409",
        tokenizer_mode="mistral",
        max_model_len=4000
    )


    prompt = "Describe this image in one sentence."
    image_url = image_to_data_url("./local_image.png")

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": image_url}}
            ]
        },
    ]

it not working properly.

Sign up or log in to comment