rag_ColPali_Qwen2VL

Running on Zero

App Files Files Community

AdrienB134 commited on Sep 6

Commit

86019ea

•

1 Parent(s): 8575432

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -11

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ import time
 from PIL import Image
 import torch
 import subprocess
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
@@ -32,11 +32,7 @@ def model_inference(
     images, text,
 ):
-    # print(type(images))
-    # print(images[0])
-    # images = Image.open(images[0][0])
-    # print(images)
-    # print(type(images))
     images = [{"type": "image", "image": Image.open(image[0])} for image in images]
     images.append({"type": "text", "text": text})
@@ -47,7 +43,7 @@ def model_inference(
     #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2-VL-2B-Instruct",
-        attn_implementation="flash_attention_2", #doesn't work on zerogpu WTF?!
         trust_remote_code=True,
         torch_dtype=torch.bfloat16).to("cuda:0")
@@ -55,10 +51,6 @@ def model_inference(
     min_pixels = 256*28*28
     max_pixels = 1280*28*28
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
-    # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
-    # min_pixels = 256*28*28
-    # max_pixels = 1280*28*28
     # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
     messages = [

 from PIL import Image
 import torch
 import subprocess
+#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
     images, text,
 ):
     images = [{"type": "image", "image": Image.open(image[0])} for image in images]
     images.append({"type": "text", "text": text})
     #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2-VL-2B-Instruct",
+        #attn_implementation="flash_attention_2", #doesn't work on zerogpu WTF?!
         trust_remote_code=True,
         torch_dtype=torch.bfloat16).to("cuda:0")
     min_pixels = 256*28*28
     max_pixels = 1280*28*28
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
     # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
     messages = [