File size: 2,070 Bytes
8e761cc 6cf0252 ebfa455 67e8921 1900674 ebfa455 6cf0252 1900674 67e8921 6cf0252 67e8921 6cf0252 67e8921 ebfa455 67e8921 ebfa455 67e8921 ebfa455 67e8921 ebfa455 6cf0252 ebfa455 6cf0252 ebfa455 67e8921 6cf0252 ebfa455 67e8921 ebfa455 67e8921 6cf0252 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import torch
from transformers import LlamaForCausalLM, AutoTokenizer, AutoProcessor
from PIL import Image
import base64
import io
# Load model and processor globally
model_id = "kiddobellamy/Llama_Vision"
# Load the model
model = LlamaForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16, # Use torch.float16 if bfloat16 is not supported
device_map="auto",
)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Load the processor if needed (for image processing)
processor = AutoProcessor.from_pretrained(model_id)
def handler(event, context):
try:
# Parse inputs
inputs = event.get('inputs', {})
image_base64 = inputs.get('image')
prompt = inputs.get('prompt', '')
if not image_base64 or not prompt:
return {'error': 'Both "image" and "prompt" are required in inputs.'}
# Decode the base64 image
image_bytes = base64.b64decode(image_base64)
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
# Process image if necessary (depends on your model)
# Assuming your processor handles image preprocessing
image_inputs = processor(images=image, return_tensors="pt").to(model.device)
# Tokenize the prompt
text_inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Combine image and text inputs if required by your model
# This step depends on how your model processes images and text together
inputs = {
'input_ids': text_inputs['input_ids'],
'attention_mask': text_inputs['attention_mask'],
# Include image inputs as required
# 'pixel_values': image_inputs['pixel_values'],
}
# Generate output
output_ids = model.generate(**inputs, max_new_tokens=50)
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Return the result
return {'generated_text': generated_text}
except Exception as e:
return {'error': str(e)}
#111 |