Llama_Vision / handler.py
kiddobellamy's picture
Update handler.py
1900674 verified
raw
history blame
1.54 kB
import torch
from transformers import LlamaForCausalLM, AutoProcessor
from PIL import Image
import base64
import io
# Load model and processor globally
model_id = "kiddobellamy/Llama_Vision"
model = LlamaForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)
def handler(event, context):
try:
# Parse inputs
inputs = event.get('inputs', {})
image_base64 = inputs.get('image')
prompt = inputs.get('prompt', '')
if not image_base64 or not prompt:
return {'error': 'Both "image" and "prompt" are required in inputs.'}
# Decode the base64 image
image_bytes = base64.b64decode(image_base64)
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
# Prepare the message
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": prompt}
]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
# Process inputs
inputs = processor(image, input_text, return_tensors="pt").to(model.device)
# Generate output
output_ids = model.generate(**inputs, max_new_tokens=50)
generated_text = processor.decode(output_ids[0], skip_special_tokens=True)
# Return the result
return {'generated_text': generated_text}
except Exception as e:
return {'error': str(e)}