kiddobellamy commited on
Commit
8e761cc
1 Parent(s): 42c2dda

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +53 -39
handler.py CHANGED
@@ -1,39 +1,53 @@
1
- import torch
2
- from transformers import AutoModelForVision2Seq, AutoTokenizer
3
-
4
- class ModelHandler:
5
- def __init__(self):
6
- self.model = None
7
- self.tokenizer = None
8
-
9
- def initialize(self, context):
10
- """ Load the model and tokenizer """
11
- model_dir = context.system_properties.get("model_dir")
12
- self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
13
- self.model = AutoModelForVision2Seq.from_pretrained(model_dir)
14
- self.model.eval()
15
-
16
- def preprocess(self, data):
17
- """ Preprocess the input data before passing it to the model """
18
- inputs = self.tokenizer(data, return_tensors="pt")
19
- return inputs
20
-
21
- def inference(self, inputs):
22
- """ Run the forward pass of the model """
23
- with torch.no_grad():
24
- outputs = self.model(**inputs)
25
- return outputs
26
-
27
- def postprocess(self, outputs):
28
- """ Post-process the output data from the model """
29
- return outputs
30
-
31
- # This is required for the Hugging Face inference endpoints
32
- _handler = ModelHandler()
33
-
34
- def handle(data, context):
35
- if not _handler.model:
36
- _handler.initialize(context)
37
- inputs = _handler.preprocess(data)
38
- outputs = _handler.inference(inputs)
39
- return _handler.postprocess(outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
5
+
6
+ # Define the model ID and load the model and processor
7
+ model_id = "meta-llama/Llama-3.2-90B-Vision-Instruct"
8
+
9
+ def load_model():
10
+ """Loads the Llama 3.2-90B Vision-Instruct model and processor."""
11
+ model = MllamaForConditionalGeneration.from_pretrained(
12
+ model_id,
13
+ torch_dtype=torch.bfloat16,
14
+ device_map="auto",
15
+ )
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+ return model, processor
18
+
19
+ def process_image(url):
20
+ """Processes the image from the given URL."""
21
+ image = Image.open(requests.get(url, stream=True).raw)
22
+ return image
23
+
24
+ def generate_response(model, processor, image, prompt):
25
+ """Generates a text response based on the image and the prompt."""
26
+ messages = [
27
+ {"role": "user", "content": [
28
+ {"type": "image"},
29
+ {"type": "text", "text": prompt}
30
+ ]}
31
+ ]
32
+ input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
33
+ inputs = processor(image, input_text, return_tensors="pt").to(model.device)
34
+ output = model.generate(**inputs, max_new_tokens=30)
35
+ return processor.decode(output[0])
36
+
37
+ def main():
38
+ # Load model and processor
39
+ model, processor = load_model()
40
+
41
+ # Sample image URL
42
+ url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
43
+ image = process_image(url)
44
+
45
+ # Define a sample prompt
46
+ prompt = "If I had to write a haiku for this one, it would be:"
47
+
48
+ # Generate response
49
+ response = generate_response(model, processor, image, prompt)
50
+ print(response)
51
+
52
+ if __name__ == "__main__":
53
+ main()