Spaces:

hgdgng
/

HG_Llama3.2

Runtime error

App Files Files Community

hgdgng commited on Sep 26, 2024

Commit

903f1a6

verified ·

1 Parent(s): 1e50a4a

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -8

app.py CHANGED Viewed

@@ -1,28 +1,43 @@
 import requests
 import torch
 from PIL import Image
-from transformers import MllamaForConditionalGeneration, AutoProcessor
 model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-model = MllamaForConditionalGeneration.from_pretrained(
     model_id,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
 )
 processor = AutoProcessor.from_pretrained(model_id)
 url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
 image = Image.open(requests.get(url, stream=True).raw)
 messages = [
     {"role": "user", "content": [
-        {"type": "image"},
-        {"type": "text", "text": "If I had to write a haiku for this one, it would be: "}
     ]}
 ]
 input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
 inputs = processor(image, input_text, return_tensors="pt").to(model.device)
-output = model.generate(**inputs, max_new_tokens=30)
-print(processor.decode(output[0]))

 import requests
 import torch
 from PIL import Image
+from transformers import LlamaForConditionalGeneration, AutoProcessor
+# Define the model ID, replace with the correct ID if needed
 model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+# Load the model in bfloat16 or float16 if needed
+model = LlamaForConditionalGeneration.from_pretrained(
     model_id,
+    torch_dtype=torch.bfloat16,  # Change to torch.float16 if hardware doesn't support bfloat16
+    device_map="auto",  # Automatically selects the appropriate device
 )
+# Load the processor
 processor = AutoProcessor.from_pretrained(model_id)
+# Define an image URL
 url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
+# Fetch the image using requests
 image = Image.open(requests.get(url, stream=True).raw)
+# Define the messages in a format the model understands (adjust as needed)
 messages = [
     {"role": "user", "content": [
+        {"type": "image"},  # This indicates that the input contains an image
+        {"type": "text", "text": "Can you please describe this image in one sentence?"}
     ]}
 ]
+# Generate input text with the processor
 input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
+# Process the image and input text, prepare them for the model
 inputs = processor(image, input_text, return_tensors="pt").to(model.device)
+# Run the model to generate a response
+output = model.generate(**inputs, max_new_tokens=70)
+# Decode and print the output
+print(processor.decode(output[0][inputs["input_ids"].shape[-1]:]))