Qwen2-VL-2B

Runtime error

vykanand commited on 19 days ago

Commit

73d58c2

verified ·

1 Parent(s): f580378

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,23 +16,28 @@ image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo
 response = requests.get(image_url)
 img = Image.open(BytesIO(response.content))
-# Ensure the image is resized and processed correctly
-img_resized = img.resize((224, 224))  # Resize as needed (adjust based on model requirements)
-# Create a prompt or text input
 text_input = "Describe this image."
-# Process the image and the text input
 inputs = processor(
-    images=img_resized,
     text=text_input,
     return_tensors="pt",
-).to(device)
 # Check the number of tokens generated by the processor and the shape of inputs
 print("Input tokens:", inputs.input_ids.shape)
 print("Image features shape:", inputs.pixel_values.shape)
 # Inference
 generated_ids = model.generate(**inputs, max_new_tokens=128)

 response = requests.get(image_url)
 img = Image.open(BytesIO(response.content))
+# Automatically preprocess the image and text input using the processor
 text_input = "Describe this image."
+# The processor automatically handles resizing, normalization, and tokenization
 inputs = processor(
+    images=img,
     text=text_input,
     return_tensors="pt",
+    padding=True,  # Automatically pad to match model input size
+)
 # Check the number of tokens generated by the processor and the shape of inputs
 print("Input tokens:", inputs.input_ids.shape)
 print("Image features shape:", inputs.pixel_values.shape)
+# Ensure image and text are properly tokenized and features align
+assert inputs.input_ids.shape[1] > 0, "No tokens generated for text input!"
+assert inputs.pixel_values.shape[0] > 0, "No features generated for the image!"
+# Move inputs to the device (either GPU or CPU)
+inputs = {key: value.to(device) for key, value in inputs.items()}
 # Inference
 generated_ids = model.generate(**inputs, max_new_tokens=128)