winstoneli commited on
Commit
d293559
·
1 Parent(s): 1ce6f8e

update [qwen2.5]

Browse files
app/src/brushedit_app.py CHANGED
@@ -15,7 +15,7 @@ from PIL import Image
15
  from huggingface_hub import hf_hub_download, snapshot_download
16
  from scipy.ndimage import binary_dilation, binary_erosion
17
  from transformers import (LlavaNextProcessor, LlavaNextForConditionalGeneration,
18
- Qwen2VLForConditionalGeneration, Qwen2VLProcessor)
19
 
20
  from segment_anything import SamPredictor, build_sam, SamAutomaticMaskGenerator
21
  from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
@@ -293,7 +293,7 @@ OUTPUT_IMAGE_PATH = {
293
  # os.makedirs('gradio_temp_dir', exist_ok=True)
294
 
295
  VLM_MODEL_NAMES = list(vlms_template.keys())
296
- DEFAULT_VLM_MODEL_NAME = "Qwen2-VL-7B-Instruct (Default)"
297
  BASE_MODELS = list(base_models_template.keys())
298
  DEFAULT_BASE_MODEL = "realisticVision (Default)"
299
 
@@ -553,18 +553,12 @@ def update_vlm_model(vlm_name):
553
  return vlm_model_dropdown
554
  else:
555
  if os.path.exists(vlm_local_path):
556
- vlm_processor = Qwen2VLProcessor.from_pretrained(vlm_local_path)
557
- vlm_model = Qwen2VLForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype=torch_dtype, device_map=device)
558
  else:
559
- if vlm_name == "qwen2-vl-2b-instruct (Preload)":
560
- vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
561
- vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device)
562
- elif vlm_name == "qwen2-vl-7b-instruct (Preload)":
563
- vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
564
- vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
565
- elif vlm_name == "qwen2-vl-72b-instruct (Preload)":
566
- vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
567
- vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-72B-Instruct", torch_dtype=torch_dtype, device_map=device)
568
  elif vlm_type == "openai":
569
  pass
570
  return "success"
 
15
  from huggingface_hub import hf_hub_download, snapshot_download
16
  from scipy.ndimage import binary_dilation, binary_erosion
17
  from transformers import (LlavaNextProcessor, LlavaNextForConditionalGeneration,
18
+ Qwen2_5_VLForConditionalGeneration, AutoProcessor)
19
 
20
  from segment_anything import SamPredictor, build_sam, SamAutomaticMaskGenerator
21
  from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
 
293
  # os.makedirs('gradio_temp_dir', exist_ok=True)
294
 
295
  VLM_MODEL_NAMES = list(vlms_template.keys())
296
+ DEFAULT_VLM_MODEL_NAME = "Qwen2.5-VL-7B-Instruct (Default)"
297
  BASE_MODELS = list(base_models_template.keys())
298
  DEFAULT_BASE_MODEL = "realisticVision (Default)"
299
 
 
553
  return vlm_model_dropdown
554
  else:
555
  if os.path.exists(vlm_local_path):
556
+ vlm_processor = AutoProcessor.from_pretrained(vlm_local_path)
557
+ vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype=torch_dtype, device_map=device)
558
  else:
559
+ if vlm_name == "Qwen2.5-VL-7B-Instruct (Default)":
560
+ vlm_processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
561
+ vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
 
 
 
 
 
 
562
  elif vlm_type == "openai":
563
  pass
564
  return "success"
app/src/vlm_pipeline.py CHANGED
@@ -8,7 +8,7 @@ import numpy as np
8
  import gradio as gr
9
 
10
  from openai import OpenAI
11
- from transformers import (LlavaNextForConditionalGeneration, Qwen2VLForConditionalGeneration)
12
  from qwen_vl_utils import process_vision_info
13
 
14
  from app.gpt4_o.instructions import (
@@ -94,7 +94,7 @@ def vlm_response_editing_type(vlm_processor,
94
  elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
95
  messages = create_editing_category_messages_llava(editing_prompt)
96
  response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device=device)
97
- elif isinstance(vlm_model, Qwen2VLForConditionalGeneration):
98
  messages = create_editing_category_messages_qwen2(editing_prompt)
99
  response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device=device)
100
 
@@ -123,7 +123,7 @@ def vlm_response_object_wait_for_edit(vlm_processor,
123
  elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
124
  messages = create_ori_object_messages_llava(editing_prompt)
125
  response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image , device)
126
- elif isinstance(vlm_model, Qwen2VLForConditionalGeneration):
127
  messages = create_ori_object_messages_qwen2(editing_prompt)
128
  response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
129
  return response_str
@@ -155,7 +155,7 @@ def vlm_response_mask(vlm_processor,
155
  elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
156
  messages = create_add_object_messages_llava(editing_prompt, height=height, width=width)
157
  response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
158
- elif isinstance(vlm_model, Qwen2VLForConditionalGeneration):
159
  base64_image = encode_image(image)
160
  messages = create_add_object_messages_qwen2(editing_prompt, base64_image, height=height, width=width)
161
  response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
@@ -217,7 +217,7 @@ def vlm_response_prompt_after_apply_instruction(vlm_processor,
217
  elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
218
  messages = create_apply_editing_messages_llava(editing_prompt)
219
  response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
220
- elif isinstance(vlm_model, Qwen2VLForConditionalGeneration):
221
  base64_image = encode_image(image)
222
  messages = create_apply_editing_messages_qwen2(editing_prompt, base64_image)
223
  response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
 
8
  import gradio as gr
9
 
10
  from openai import OpenAI
11
+ from transformers import (LlavaNextForConditionalGeneration, Qwen2_5_VLForConditionalGeneration)
12
  from qwen_vl_utils import process_vision_info
13
 
14
  from app.gpt4_o.instructions import (
 
94
  elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
95
  messages = create_editing_category_messages_llava(editing_prompt)
96
  response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device=device)
97
+ elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
98
  messages = create_editing_category_messages_qwen2(editing_prompt)
99
  response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device=device)
100
 
 
123
  elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
124
  messages = create_ori_object_messages_llava(editing_prompt)
125
  response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image , device)
126
+ elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
127
  messages = create_ori_object_messages_qwen2(editing_prompt)
128
  response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
129
  return response_str
 
155
  elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
156
  messages = create_add_object_messages_llava(editing_prompt, height=height, width=width)
157
  response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
158
+ elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
159
  base64_image = encode_image(image)
160
  messages = create_add_object_messages_qwen2(editing_prompt, base64_image, height=height, width=width)
161
  response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
 
217
  elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
218
  messages = create_apply_editing_messages_llava(editing_prompt)
219
  response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
220
+ elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
221
  base64_image = encode_image(image)
222
  messages = create_apply_editing_messages_qwen2(editing_prompt, base64_image)
223
  response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
app/src/vlm_template.py CHANGED
@@ -4,7 +4,7 @@ import torch
4
  from openai import OpenAI
5
  from transformers import (
6
  LlavaNextProcessor, LlavaNextForConditionalGeneration,
7
- Qwen2VLForConditionalGeneration, Qwen2VLProcessor
8
  )
9
  ## init device
10
  device = "cuda"
@@ -12,100 +12,20 @@ torch_dtype = torch.float16
12
 
13
 
14
  vlms_list = [
15
- # {
16
- # "type": "llava-next",
17
- # "name": "llava-v1.6-mistral-7b-hf",
18
- # "local_path": "models/vlms/llava-v1.6-mistral-7b-hf",
19
- # "processor": LlavaNextProcessor.from_pretrained(
20
- # "models/vlms/llava-v1.6-mistral-7b-hf"
21
- # ) if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else LlavaNextProcessor.from_pretrained(
22
- # "llava-hf/llava-v1.6-mistral-7b-hf"
23
- # ),
24
- # "model": LlavaNextForConditionalGeneration.from_pretrained(
25
- # "models/vlms/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
26
- # ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else
27
- # LlavaNextForConditionalGeneration.from_pretrained(
28
- # "llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
29
- # ).to("cpu"),
30
- # },
31
- # {
32
- # "type": "llava-next",
33
- # "name": "llama3-llava-next-8b-hf (Preload)",
34
- # "local_path": "models/vlms/llama3-llava-next-8b-hf",
35
- # "processor": LlavaNextProcessor.from_pretrained(
36
- # "models/vlms/llama3-llava-next-8b-hf"
37
- # ) if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else LlavaNextProcessor.from_pretrained(
38
- # "llava-hf/llama3-llava-next-8b-hf"
39
- # ),
40
- # "model": LlavaNextForConditionalGeneration.from_pretrained(
41
- # "models/vlms/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
42
- # ).to("cpu") if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else
43
- # LlavaNextForConditionalGeneration.from_pretrained(
44
- # "llava-hf/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
45
- # ).to("cpu"),
46
- # },
47
- # {
48
- # "type": "llava-next",
49
- # "name": "llava-v1.6-vicuna-13b-hf",
50
- # "local_path": "models/vlms/llava-v1.6-vicuna-13b-hf",
51
- # "processor": LlavaNextProcessor.from_pretrained(
52
- # "models/vlms/llava-v1.6-vicuna-13b-hf"
53
- # ) if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else LlavaNextProcessor.from_pretrained(
54
- # "llava-hf/llava-v1.6-vicuna-13b-hf"
55
- # ),
56
- # "model": LlavaNextForConditionalGeneration.from_pretrained(
57
- # "models/vlms/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
58
- # ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else
59
- # LlavaNextForConditionalGeneration.from_pretrained(
60
- # "llava-hf/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
61
- # ).to("cpu"),
62
- # },
63
- # {
64
- # "type": "llava-next",
65
- # "name": "llava-v1.6-34b-hf",
66
- # "local_path": "models/vlms/llava-v1.6-34b-hf",
67
- # "processor": LlavaNextProcessor.from_pretrained(
68
- # "models/vlms/llava-v1.6-34b-hf"
69
- # ) if os.path.exists("models/vlms/llava-v1.6-34b-hf") else LlavaNextProcessor.from_pretrained(
70
- # "llava-hf/llava-v1.6-34b-hf"
71
- # ),
72
- # "model": LlavaNextForConditionalGeneration.from_pretrained(
73
- # "models/vlms/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
74
- # ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-34b-hf") else
75
- # LlavaNextForConditionalGeneration.from_pretrained(
76
- # "llava-hf/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
77
- # ).to("cpu"),
78
- # },
79
- # {
80
- # "type": "qwen2-vl",
81
- # "name": "Qwen2-VL-2B-Instruct",
82
- # "local_path": "models/vlms/Qwen2-VL-2B-Instruct",
83
- # "processor": Qwen2VLProcessor.from_pretrained(
84
- # "models/vlms/Qwen2-VL-2B-Instruct"
85
- # ) if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else Qwen2VLProcessor.from_pretrained(
86
- # "Qwen/Qwen2-VL-2B-Instruct"
87
- # ),
88
- # "model": Qwen2VLForConditionalGeneration.from_pretrained(
89
- # "models/vlms/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
90
- # ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else
91
- # Qwen2VLForConditionalGeneration.from_pretrained(
92
- # "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
93
- # ).to("cpu"),
94
- # },
95
  {
96
  "type": "qwen2-vl",
97
- "name": "Qwen2-VL-7B-Instruct (Default)",
98
- "local_path": "models/vlms/Qwen2-VL-7B-Instruct",
99
- "processor": Qwen2VLProcessor.from_pretrained(
100
- "models/vlms/Qwen2-VL-7B-Instruct"
101
- ) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else Qwen2VLProcessor.from_pretrained(
102
- "Qwen/Qwen2-VL-7B-Instruct"
103
  ),
104
- "model": Qwen2VLForConditionalGeneration.from_pretrained(
105
- "models/vlms/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
106
- ).to(device) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else
107
- Qwen2VLForConditionalGeneration.from_pretrained(
108
- "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
109
  ).to(device),
110
  },
111
  {
 
4
  from openai import OpenAI
5
  from transformers import (
6
  LlavaNextProcessor, LlavaNextForConditionalGeneration,
7
+ Qwen2_5_VLForConditionalGeneration, AutoProcessor
8
  )
9
  ## init device
10
  device = "cuda"
 
12
 
13
 
14
  vlms_list = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  {
16
  "type": "qwen2-vl",
17
+ "name": "Qwen2.5-VL-7B-Instruct (Default)",
18
+ "local_path": "models/vlms/Qwen/Qwen2.5-VL-7B-Instruct",
19
+ "processor": AutoProcessor.from_pretrained(
20
+ "models/vlms/Qwen/Qwen2.5-VL-7B-Instruct"
21
+ ) if os.path.exists("models/vlms/Qwen/Qwen2.5-VL-7B-Instruct") else AutoProcessor.from_pretrained(
22
+ "Qwen/Qwen2.5-VL-7B-Instruct"
23
  ),
24
+ "model": Qwen2_5_VLForConditionalGeneration.from_pretrained(
25
+ "models/vlms/Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
26
+ ).to(device) if os.path.exists("models/vlms/Qwen/Qwen2.5-VL-7B-Instruct") else
27
+ Qwen2_5_VLForConditionalGeneration.from_pretrained(
28
+ "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
29
  ).to(device),
30
  },
31
  {