vykanand commited on
Commit
73d58c2
Β·
verified Β·
1 Parent(s): f580378

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -16,23 +16,28 @@ image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo
16
  response = requests.get(image_url)
17
  img = Image.open(BytesIO(response.content))
18
 
19
- # Ensure the image is resized and processed correctly
20
- img_resized = img.resize((224, 224)) # Resize as needed (adjust based on model requirements)
21
-
22
- # Create a prompt or text input
23
  text_input = "Describe this image."
24
 
25
- # Process the image and the text input
26
  inputs = processor(
27
- images=img_resized,
28
  text=text_input,
29
  return_tensors="pt",
30
- ).to(device)
 
31
 
32
  # Check the number of tokens generated by the processor and the shape of inputs
33
  print("Input tokens:", inputs.input_ids.shape)
34
  print("Image features shape:", inputs.pixel_values.shape)
35
 
 
 
 
 
 
 
 
36
  # Inference
37
  generated_ids = model.generate(**inputs, max_new_tokens=128)
38
 
 
16
  response = requests.get(image_url)
17
  img = Image.open(BytesIO(response.content))
18
 
19
+ # Automatically preprocess the image and text input using the processor
 
 
 
20
  text_input = "Describe this image."
21
 
22
+ # The processor automatically handles resizing, normalization, and tokenization
23
  inputs = processor(
24
+ images=img,
25
  text=text_input,
26
  return_tensors="pt",
27
+ padding=True, # Automatically pad to match model input size
28
+ )
29
 
30
  # Check the number of tokens generated by the processor and the shape of inputs
31
  print("Input tokens:", inputs.input_ids.shape)
32
  print("Image features shape:", inputs.pixel_values.shape)
33
 
34
+ # Ensure image and text are properly tokenized and features align
35
+ assert inputs.input_ids.shape[1] > 0, "No tokens generated for text input!"
36
+ assert inputs.pixel_values.shape[0] > 0, "No features generated for the image!"
37
+
38
+ # Move inputs to the device (either GPU or CPU)
39
+ inputs = {key: value.to(device) for key, value in inputs.items()}
40
+
41
  # Inference
42
  generated_ids = model.generate(**inputs, max_new_tokens=128)
43