Spaces:
Runtime error
Runtime error
winstoneli
commited on
Commit
·
d293559
1
Parent(s):
1ce6f8e
update [qwen2.5]
Browse files- app/src/brushedit_app.py +7 -13
- app/src/vlm_pipeline.py +5 -5
- app/src/vlm_template.py +12 -92
app/src/brushedit_app.py
CHANGED
@@ -15,7 +15,7 @@ from PIL import Image
|
|
15 |
from huggingface_hub import hf_hub_download, snapshot_download
|
16 |
from scipy.ndimage import binary_dilation, binary_erosion
|
17 |
from transformers import (LlavaNextProcessor, LlavaNextForConditionalGeneration,
|
18 |
-
|
19 |
|
20 |
from segment_anything import SamPredictor, build_sam, SamAutomaticMaskGenerator
|
21 |
from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
|
@@ -293,7 +293,7 @@ OUTPUT_IMAGE_PATH = {
|
|
293 |
# os.makedirs('gradio_temp_dir', exist_ok=True)
|
294 |
|
295 |
VLM_MODEL_NAMES = list(vlms_template.keys())
|
296 |
-
DEFAULT_VLM_MODEL_NAME = "Qwen2-VL-7B-Instruct (Default)"
|
297 |
BASE_MODELS = list(base_models_template.keys())
|
298 |
DEFAULT_BASE_MODEL = "realisticVision (Default)"
|
299 |
|
@@ -553,18 +553,12 @@ def update_vlm_model(vlm_name):
|
|
553 |
return vlm_model_dropdown
|
554 |
else:
|
555 |
if os.path.exists(vlm_local_path):
|
556 |
-
vlm_processor =
|
557 |
-
vlm_model =
|
558 |
else:
|
559 |
-
if vlm_name == "
|
560 |
-
vlm_processor =
|
561 |
-
vlm_model =
|
562 |
-
elif vlm_name == "qwen2-vl-7b-instruct (Preload)":
|
563 |
-
vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
|
564 |
-
vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
|
565 |
-
elif vlm_name == "qwen2-vl-72b-instruct (Preload)":
|
566 |
-
vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
|
567 |
-
vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-72B-Instruct", torch_dtype=torch_dtype, device_map=device)
|
568 |
elif vlm_type == "openai":
|
569 |
pass
|
570 |
return "success"
|
|
|
15 |
from huggingface_hub import hf_hub_download, snapshot_download
|
16 |
from scipy.ndimage import binary_dilation, binary_erosion
|
17 |
from transformers import (LlavaNextProcessor, LlavaNextForConditionalGeneration,
|
18 |
+
Qwen2_5_VLForConditionalGeneration, AutoProcessor)
|
19 |
|
20 |
from segment_anything import SamPredictor, build_sam, SamAutomaticMaskGenerator
|
21 |
from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
|
|
|
293 |
# os.makedirs('gradio_temp_dir', exist_ok=True)
|
294 |
|
295 |
VLM_MODEL_NAMES = list(vlms_template.keys())
|
296 |
+
DEFAULT_VLM_MODEL_NAME = "Qwen2.5-VL-7B-Instruct (Default)"
|
297 |
BASE_MODELS = list(base_models_template.keys())
|
298 |
DEFAULT_BASE_MODEL = "realisticVision (Default)"
|
299 |
|
|
|
553 |
return vlm_model_dropdown
|
554 |
else:
|
555 |
if os.path.exists(vlm_local_path):
|
556 |
+
vlm_processor = AutoProcessor.from_pretrained(vlm_local_path)
|
557 |
+
vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype=torch_dtype, device_map=device)
|
558 |
else:
|
559 |
+
if vlm_name == "Qwen2.5-VL-7B-Instruct (Default)":
|
560 |
+
vlm_processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
|
561 |
+
vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
|
|
|
|
|
|
|
|
|
|
|
|
|
562 |
elif vlm_type == "openai":
|
563 |
pass
|
564 |
return "success"
|
app/src/vlm_pipeline.py
CHANGED
@@ -8,7 +8,7 @@ import numpy as np
|
|
8 |
import gradio as gr
|
9 |
|
10 |
from openai import OpenAI
|
11 |
-
from transformers import (LlavaNextForConditionalGeneration,
|
12 |
from qwen_vl_utils import process_vision_info
|
13 |
|
14 |
from app.gpt4_o.instructions import (
|
@@ -94,7 +94,7 @@ def vlm_response_editing_type(vlm_processor,
|
|
94 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
95 |
messages = create_editing_category_messages_llava(editing_prompt)
|
96 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device=device)
|
97 |
-
elif isinstance(vlm_model,
|
98 |
messages = create_editing_category_messages_qwen2(editing_prompt)
|
99 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device=device)
|
100 |
|
@@ -123,7 +123,7 @@ def vlm_response_object_wait_for_edit(vlm_processor,
|
|
123 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
124 |
messages = create_ori_object_messages_llava(editing_prompt)
|
125 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image , device)
|
126 |
-
elif isinstance(vlm_model,
|
127 |
messages = create_ori_object_messages_qwen2(editing_prompt)
|
128 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
129 |
return response_str
|
@@ -155,7 +155,7 @@ def vlm_response_mask(vlm_processor,
|
|
155 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
156 |
messages = create_add_object_messages_llava(editing_prompt, height=height, width=width)
|
157 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
|
158 |
-
elif isinstance(vlm_model,
|
159 |
base64_image = encode_image(image)
|
160 |
messages = create_add_object_messages_qwen2(editing_prompt, base64_image, height=height, width=width)
|
161 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
@@ -217,7 +217,7 @@ def vlm_response_prompt_after_apply_instruction(vlm_processor,
|
|
217 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
218 |
messages = create_apply_editing_messages_llava(editing_prompt)
|
219 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
|
220 |
-
elif isinstance(vlm_model,
|
221 |
base64_image = encode_image(image)
|
222 |
messages = create_apply_editing_messages_qwen2(editing_prompt, base64_image)
|
223 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
|
|
8 |
import gradio as gr
|
9 |
|
10 |
from openai import OpenAI
|
11 |
+
from transformers import (LlavaNextForConditionalGeneration, Qwen2_5_VLForConditionalGeneration)
|
12 |
from qwen_vl_utils import process_vision_info
|
13 |
|
14 |
from app.gpt4_o.instructions import (
|
|
|
94 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
95 |
messages = create_editing_category_messages_llava(editing_prompt)
|
96 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device=device)
|
97 |
+
elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
|
98 |
messages = create_editing_category_messages_qwen2(editing_prompt)
|
99 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device=device)
|
100 |
|
|
|
123 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
124 |
messages = create_ori_object_messages_llava(editing_prompt)
|
125 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image , device)
|
126 |
+
elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
|
127 |
messages = create_ori_object_messages_qwen2(editing_prompt)
|
128 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
129 |
return response_str
|
|
|
155 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
156 |
messages = create_add_object_messages_llava(editing_prompt, height=height, width=width)
|
157 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
|
158 |
+
elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
|
159 |
base64_image = encode_image(image)
|
160 |
messages = create_add_object_messages_qwen2(editing_prompt, base64_image, height=height, width=width)
|
161 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
|
|
217 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
218 |
messages = create_apply_editing_messages_llava(editing_prompt)
|
219 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
|
220 |
+
elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
|
221 |
base64_image = encode_image(image)
|
222 |
messages = create_apply_editing_messages_qwen2(editing_prompt, base64_image)
|
223 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
app/src/vlm_template.py
CHANGED
@@ -4,7 +4,7 @@ import torch
|
|
4 |
from openai import OpenAI
|
5 |
from transformers import (
|
6 |
LlavaNextProcessor, LlavaNextForConditionalGeneration,
|
7 |
-
|
8 |
)
|
9 |
## init device
|
10 |
device = "cuda"
|
@@ -12,100 +12,20 @@ torch_dtype = torch.float16
|
|
12 |
|
13 |
|
14 |
vlms_list = [
|
15 |
-
# {
|
16 |
-
# "type": "llava-next",
|
17 |
-
# "name": "llava-v1.6-mistral-7b-hf",
|
18 |
-
# "local_path": "models/vlms/llava-v1.6-mistral-7b-hf",
|
19 |
-
# "processor": LlavaNextProcessor.from_pretrained(
|
20 |
-
# "models/vlms/llava-v1.6-mistral-7b-hf"
|
21 |
-
# ) if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else LlavaNextProcessor.from_pretrained(
|
22 |
-
# "llava-hf/llava-v1.6-mistral-7b-hf"
|
23 |
-
# ),
|
24 |
-
# "model": LlavaNextForConditionalGeneration.from_pretrained(
|
25 |
-
# "models/vlms/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
|
26 |
-
# ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else
|
27 |
-
# LlavaNextForConditionalGeneration.from_pretrained(
|
28 |
-
# "llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
|
29 |
-
# ).to("cpu"),
|
30 |
-
# },
|
31 |
-
# {
|
32 |
-
# "type": "llava-next",
|
33 |
-
# "name": "llama3-llava-next-8b-hf (Preload)",
|
34 |
-
# "local_path": "models/vlms/llama3-llava-next-8b-hf",
|
35 |
-
# "processor": LlavaNextProcessor.from_pretrained(
|
36 |
-
# "models/vlms/llama3-llava-next-8b-hf"
|
37 |
-
# ) if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else LlavaNextProcessor.from_pretrained(
|
38 |
-
# "llava-hf/llama3-llava-next-8b-hf"
|
39 |
-
# ),
|
40 |
-
# "model": LlavaNextForConditionalGeneration.from_pretrained(
|
41 |
-
# "models/vlms/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
|
42 |
-
# ).to("cpu") if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else
|
43 |
-
# LlavaNextForConditionalGeneration.from_pretrained(
|
44 |
-
# "llava-hf/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
|
45 |
-
# ).to("cpu"),
|
46 |
-
# },
|
47 |
-
# {
|
48 |
-
# "type": "llava-next",
|
49 |
-
# "name": "llava-v1.6-vicuna-13b-hf",
|
50 |
-
# "local_path": "models/vlms/llava-v1.6-vicuna-13b-hf",
|
51 |
-
# "processor": LlavaNextProcessor.from_pretrained(
|
52 |
-
# "models/vlms/llava-v1.6-vicuna-13b-hf"
|
53 |
-
# ) if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else LlavaNextProcessor.from_pretrained(
|
54 |
-
# "llava-hf/llava-v1.6-vicuna-13b-hf"
|
55 |
-
# ),
|
56 |
-
# "model": LlavaNextForConditionalGeneration.from_pretrained(
|
57 |
-
# "models/vlms/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
|
58 |
-
# ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else
|
59 |
-
# LlavaNextForConditionalGeneration.from_pretrained(
|
60 |
-
# "llava-hf/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
|
61 |
-
# ).to("cpu"),
|
62 |
-
# },
|
63 |
-
# {
|
64 |
-
# "type": "llava-next",
|
65 |
-
# "name": "llava-v1.6-34b-hf",
|
66 |
-
# "local_path": "models/vlms/llava-v1.6-34b-hf",
|
67 |
-
# "processor": LlavaNextProcessor.from_pretrained(
|
68 |
-
# "models/vlms/llava-v1.6-34b-hf"
|
69 |
-
# ) if os.path.exists("models/vlms/llava-v1.6-34b-hf") else LlavaNextProcessor.from_pretrained(
|
70 |
-
# "llava-hf/llava-v1.6-34b-hf"
|
71 |
-
# ),
|
72 |
-
# "model": LlavaNextForConditionalGeneration.from_pretrained(
|
73 |
-
# "models/vlms/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
|
74 |
-
# ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-34b-hf") else
|
75 |
-
# LlavaNextForConditionalGeneration.from_pretrained(
|
76 |
-
# "llava-hf/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
|
77 |
-
# ).to("cpu"),
|
78 |
-
# },
|
79 |
-
# {
|
80 |
-
# "type": "qwen2-vl",
|
81 |
-
# "name": "Qwen2-VL-2B-Instruct",
|
82 |
-
# "local_path": "models/vlms/Qwen2-VL-2B-Instruct",
|
83 |
-
# "processor": Qwen2VLProcessor.from_pretrained(
|
84 |
-
# "models/vlms/Qwen2-VL-2B-Instruct"
|
85 |
-
# ) if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else Qwen2VLProcessor.from_pretrained(
|
86 |
-
# "Qwen/Qwen2-VL-2B-Instruct"
|
87 |
-
# ),
|
88 |
-
# "model": Qwen2VLForConditionalGeneration.from_pretrained(
|
89 |
-
# "models/vlms/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
|
90 |
-
# ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else
|
91 |
-
# Qwen2VLForConditionalGeneration.from_pretrained(
|
92 |
-
# "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
|
93 |
-
# ).to("cpu"),
|
94 |
-
# },
|
95 |
{
|
96 |
"type": "qwen2-vl",
|
97 |
-
"name": "Qwen2-VL-7B-Instruct (Default)",
|
98 |
-
"local_path": "models/vlms/Qwen2-VL-7B-Instruct",
|
99 |
-
"processor":
|
100 |
-
"models/vlms/Qwen2-VL-7B-Instruct"
|
101 |
-
) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else
|
102 |
-
"Qwen/Qwen2-VL-7B-Instruct"
|
103 |
),
|
104 |
-
"model":
|
105 |
-
"models/vlms/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
|
106 |
-
).to(device) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else
|
107 |
-
|
108 |
-
"Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
|
109 |
).to(device),
|
110 |
},
|
111 |
{
|
|
|
4 |
from openai import OpenAI
|
5 |
from transformers import (
|
6 |
LlavaNextProcessor, LlavaNextForConditionalGeneration,
|
7 |
+
Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
8 |
)
|
9 |
## init device
|
10 |
device = "cuda"
|
|
|
12 |
|
13 |
|
14 |
vlms_list = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
{
|
16 |
"type": "qwen2-vl",
|
17 |
+
"name": "Qwen2.5-VL-7B-Instruct (Default)",
|
18 |
+
"local_path": "models/vlms/Qwen/Qwen2.5-VL-7B-Instruct",
|
19 |
+
"processor": AutoProcessor.from_pretrained(
|
20 |
+
"models/vlms/Qwen/Qwen2.5-VL-7B-Instruct"
|
21 |
+
) if os.path.exists("models/vlms/Qwen/Qwen2.5-VL-7B-Instruct") else AutoProcessor.from_pretrained(
|
22 |
+
"Qwen/Qwen2.5-VL-7B-Instruct"
|
23 |
),
|
24 |
+
"model": Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
25 |
+
"models/vlms/Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
|
26 |
+
).to(device) if os.path.exists("models/vlms/Qwen/Qwen2.5-VL-7B-Instruct") else
|
27 |
+
Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
28 |
+
"Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
|
29 |
).to(device),
|
30 |
},
|
31 |
{
|