Spaces:
Runtime error
Runtime error
fix gradi_space
Browse files
MuseV/scripts/gradio/gradio_text2video.py
CHANGED
@@ -69,7 +69,7 @@ args_dict = {
|
|
69 |
"context_stride": 1,
|
70 |
"cross_attention_dim": 768,
|
71 |
"face_image_path": None,
|
72 |
-
"facein_model_cfg_path": "
|
73 |
"facein_model_name": None,
|
74 |
"facein_scale": 1.0,
|
75 |
"fix_condition_images": False,
|
@@ -82,14 +82,18 @@ args_dict = {
|
|
82 |
"img_length_ratio": 1.0,
|
83 |
"img_weight": 0.001,
|
84 |
"interpolation_factor": 1,
|
85 |
-
"ip_adapter_face_model_cfg_path":
|
|
|
|
|
86 |
"ip_adapter_face_model_name": None,
|
87 |
"ip_adapter_face_scale": 1.0,
|
88 |
-
"ip_adapter_model_cfg_path":
|
|
|
|
|
89 |
"ip_adapter_model_name": "musev_referencenet",
|
90 |
"ip_adapter_scale": 1.0,
|
91 |
"ipadapter_image_path": None,
|
92 |
-
"lcm_model_cfg_path": "
|
93 |
"lcm_model_name": None,
|
94 |
"log_level": "INFO",
|
95 |
"motion_speed": 8.0,
|
@@ -101,7 +105,7 @@ args_dict = {
|
|
101 |
"need_img_based_video_noise": True,
|
102 |
"need_redraw": False,
|
103 |
"negative_prompt": "V2",
|
104 |
-
"negprompt_cfg_path": "
|
105 |
"noise_type": "video_fusion",
|
106 |
"num_inference_steps": 30,
|
107 |
"output_dir": "./results/",
|
@@ -115,22 +119,26 @@ args_dict = {
|
|
115 |
"redraw_condition_image_with_ipdapter": True,
|
116 |
"redraw_condition_image_with_referencenet": True,
|
117 |
"referencenet_image_path": None,
|
118 |
-
"referencenet_model_cfg_path":
|
|
|
|
|
119 |
"referencenet_model_name": "musev_referencenet",
|
120 |
"save_filetype": "mp4",
|
121 |
"save_images": False,
|
122 |
-
"sd_model_cfg_path": "
|
123 |
"sd_model_name": "majicmixRealv6Fp16",
|
124 |
"seed": None,
|
125 |
"strength": 0.8,
|
126 |
"target_datas": "boy_dance2",
|
127 |
-
"test_data_path":
|
128 |
-
|
129 |
-
|
|
|
|
|
130 |
"unet_model_name": "musev_referencenet",
|
131 |
"use_condition_image": True,
|
132 |
"use_video_redraw": True,
|
133 |
-
"vae_model_path": "
|
134 |
"video_guidance_scale": 3.5,
|
135 |
"video_guidance_scale_end": None,
|
136 |
"video_guidance_scale_method": "linear",
|
@@ -138,7 +146,9 @@ args_dict = {
|
|
138 |
"video_num_inference_steps": 10,
|
139 |
"video_overlap": 1,
|
140 |
"vision_clip_extractor_class_name": "ImageClipVisionFeatureExtractor",
|
141 |
-
"vision_clip_model_path":
|
|
|
|
|
142 |
"w_ind_noise": 0.5,
|
143 |
"width": None,
|
144 |
"write_info": False,
|
@@ -248,17 +258,17 @@ n_repeat = args.n_repeat
|
|
248 |
|
249 |
b = 1
|
250 |
negative_embedding = [
|
251 |
-
["
|
252 |
[
|
253 |
-
"
|
254 |
"ng_deepnegative_v1_75t",
|
255 |
],
|
256 |
[
|
257 |
-
"
|
258 |
"EasyNegativeV2",
|
259 |
],
|
260 |
[
|
261 |
-
"
|
262 |
"bad_prompt_version2-neg",
|
263 |
],
|
264 |
]
|
@@ -466,7 +476,7 @@ if referencenet_model_name is not None and not use_v2v_predictor:
|
|
466 |
referencenet = load_referencenet_by_name(
|
467 |
model_name=referencenet_model_name,
|
468 |
# sd_model=sd_model_path,
|
469 |
-
# sd_model="
|
470 |
sd_referencenet_model=referencenet_model_path,
|
471 |
cross_attention_dim=cross_attention_dim,
|
472 |
)
|
@@ -517,7 +527,7 @@ for model_name, sd_model_params in sd_model_params_dict.items():
|
|
517 |
model_name=unet_model_name,
|
518 |
sd_unet_model=unet_model_path,
|
519 |
sd_model=sd_model_path,
|
520 |
-
# sd_model="
|
521 |
cross_attention_dim=cross_attention_dim,
|
522 |
need_t2i_facein=facein_model_name is not None,
|
523 |
# facein 目前没参与训练,但在unet中定义了,载入相关参数会报错,所以用strict控制
|
|
|
69 |
"context_stride": 1,
|
70 |
"cross_attention_dim": 768,
|
71 |
"face_image_path": None,
|
72 |
+
"facein_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/facein.py"),
|
73 |
"facein_model_name": None,
|
74 |
"facein_scale": 1.0,
|
75 |
"fix_condition_images": False,
|
|
|
82 |
"img_length_ratio": 1.0,
|
83 |
"img_weight": 0.001,
|
84 |
"interpolation_factor": 1,
|
85 |
+
"ip_adapter_face_model_cfg_path": os.path.join(
|
86 |
+
PROJECT_DIR, "configs/model/ip_adapter.py"
|
87 |
+
),
|
88 |
"ip_adapter_face_model_name": None,
|
89 |
"ip_adapter_face_scale": 1.0,
|
90 |
+
"ip_adapter_model_cfg_path": os.path.join(
|
91 |
+
PROJECT_DIR, "configs/model/ip_adapter.py"
|
92 |
+
),
|
93 |
"ip_adapter_model_name": "musev_referencenet",
|
94 |
"ip_adapter_scale": 1.0,
|
95 |
"ipadapter_image_path": None,
|
96 |
+
"lcm_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/lcm_model.py"),
|
97 |
"lcm_model_name": None,
|
98 |
"log_level": "INFO",
|
99 |
"motion_speed": 8.0,
|
|
|
105 |
"need_img_based_video_noise": True,
|
106 |
"need_redraw": False,
|
107 |
"negative_prompt": "V2",
|
108 |
+
"negprompt_cfg_path": os.path.join(PROJECT_DIR, "configs/model/negative_prompt.py"),
|
109 |
"noise_type": "video_fusion",
|
110 |
"num_inference_steps": 30,
|
111 |
"output_dir": "./results/",
|
|
|
119 |
"redraw_condition_image_with_ipdapter": True,
|
120 |
"redraw_condition_image_with_referencenet": True,
|
121 |
"referencenet_image_path": None,
|
122 |
+
"referencenet_model_cfg_path": os.path.join(
|
123 |
+
PROJECT_DIR, "configs/model/referencenet.py"
|
124 |
+
),
|
125 |
"referencenet_model_name": "musev_referencenet",
|
126 |
"save_filetype": "mp4",
|
127 |
"save_images": False,
|
128 |
+
"sd_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/T2I_all_model.py"),
|
129 |
"sd_model_name": "majicmixRealv6Fp16",
|
130 |
"seed": None,
|
131 |
"strength": 0.8,
|
132 |
"target_datas": "boy_dance2",
|
133 |
+
"test_data_path": os.path.join(
|
134 |
+
PROJECT_DIR, "configs/infer/testcase_video_famous.yaml"
|
135 |
+
),
|
136 |
+
"time_size": 12,
|
137 |
+
"unet_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/motion_model.py"),
|
138 |
"unet_model_name": "musev_referencenet",
|
139 |
"use_condition_image": True,
|
140 |
"use_video_redraw": True,
|
141 |
+
"vae_model_path": os.path.join(PROJECT_DIR, "checkpoints/vae/sd-vae-ft-mse"),
|
142 |
"video_guidance_scale": 3.5,
|
143 |
"video_guidance_scale_end": None,
|
144 |
"video_guidance_scale_method": "linear",
|
|
|
146 |
"video_num_inference_steps": 10,
|
147 |
"video_overlap": 1,
|
148 |
"vision_clip_extractor_class_name": "ImageClipVisionFeatureExtractor",
|
149 |
+
"vision_clip_model_path": os.path.join(
|
150 |
+
PROJECT_DIR, "checkpoints/IP-Adapter/models/image_encoder"
|
151 |
+
),
|
152 |
"w_ind_noise": 0.5,
|
153 |
"width": None,
|
154 |
"write_info": False,
|
|
|
258 |
|
259 |
b = 1
|
260 |
negative_embedding = [
|
261 |
+
[os.path.join(PROJECT_DIR, "checkpoints/embedding/badhandv4.pt"), "badhandv4"],
|
262 |
[
|
263 |
+
os.path.join(PROJECT_DIR, "checkpoints/embedding/ng_deepnegative_v1_75t.pt"),
|
264 |
"ng_deepnegative_v1_75t",
|
265 |
],
|
266 |
[
|
267 |
+
os.path.join(PROJECT_DIR, "checkpoints/embedding/EasyNegativeV2.safetensors"),
|
268 |
"EasyNegativeV2",
|
269 |
],
|
270 |
[
|
271 |
+
os.path.join(PROJECT_DIR, "checkpoints/embedding/bad_prompt_version2-neg.pt"),
|
272 |
"bad_prompt_version2-neg",
|
273 |
],
|
274 |
]
|
|
|
476 |
referencenet = load_referencenet_by_name(
|
477 |
model_name=referencenet_model_name,
|
478 |
# sd_model=sd_model_path,
|
479 |
+
# sd_model=os.path.join(PROJECT_DIR, "checkpoints//Moore-AnimateAnyone/AnimateAnyone/reference_unet.pth"),
|
480 |
sd_referencenet_model=referencenet_model_path,
|
481 |
cross_attention_dim=cross_attention_dim,
|
482 |
)
|
|
|
527 |
model_name=unet_model_name,
|
528 |
sd_unet_model=unet_model_path,
|
529 |
sd_model=sd_model_path,
|
530 |
+
# sd_model=os.path.join(PROJECT_DIR, "checkpoints//Moore-AnimateAnyone/AnimateAnyone/denoising_unet.pth"),
|
531 |
cross_attention_dim=cross_attention_dim,
|
532 |
need_t2i_facein=facein_model_name is not None,
|
533 |
# facein 目前没参与训练,但在unet中定义了,载入相关参数会报错,所以用strict控制
|
MuseV/scripts/gradio/gradio_video2video.py
CHANGED
@@ -66,7 +66,7 @@ args_dict = {
|
|
66 |
"enable_zero_snr": False,
|
67 |
"end_to_end": True,
|
68 |
"face_image_path": None,
|
69 |
-
"facein_model_cfg_path": "
|
70 |
"facein_model_name": None,
|
71 |
"facein_scale": 1.0,
|
72 |
"fix_condition_images": False,
|
@@ -79,14 +79,18 @@ args_dict = {
|
|
79 |
"img_length_ratio": 1.0,
|
80 |
"img_weight": 0.001,
|
81 |
"interpolation_factor": 1,
|
82 |
-
"ip_adapter_face_model_cfg_path":
|
|
|
|
|
83 |
"ip_adapter_face_model_name": None,
|
84 |
"ip_adapter_face_scale": 1.0,
|
85 |
-
"ip_adapter_model_cfg_path":
|
86 |
-
|
|
|
|
|
87 |
"ip_adapter_scale": 1.0,
|
88 |
"ipadapter_image_path": None,
|
89 |
-
"lcm_model_cfg_path": "
|
90 |
"lcm_model_name": None,
|
91 |
"log_level": "INFO",
|
92 |
"motion_speed": 8.0,
|
@@ -100,7 +104,7 @@ args_dict = {
|
|
100 |
"need_return_videos": False,
|
101 |
"need_video2video": False,
|
102 |
"negative_prompt": "V2",
|
103 |
-
"negprompt_cfg_path": "
|
104 |
"noise_type": "video_fusion",
|
105 |
"num_inference_steps": 30,
|
106 |
"output_dir": "./results/",
|
@@ -115,22 +119,26 @@ args_dict = {
|
|
115 |
"redraw_condition_image_with_ipdapter": True,
|
116 |
"redraw_condition_image_with_referencenet": True,
|
117 |
"referencenet_image_path": None,
|
118 |
-
"referencenet_model_cfg_path":
|
|
|
|
|
119 |
"referencenet_model_name": "musev_referencenet",
|
120 |
"sample_rate": 1,
|
121 |
"save_filetype": "mp4",
|
122 |
"save_images": False,
|
123 |
-
"sd_model_cfg_path": "
|
124 |
"sd_model_name": "majicmixRealv6Fp16",
|
125 |
"seed": None,
|
126 |
"strength": 0.8,
|
127 |
"target_datas": "boy_dance2",
|
128 |
-
"test_data_path":
|
|
|
|
|
129 |
"time_size": 12,
|
130 |
-
"unet_model_cfg_path": "
|
131 |
"unet_model_name": "musev_referencenet_pose",
|
132 |
"use_condition_image": True,
|
133 |
-
"vae_model_path": "
|
134 |
"video_guidance_scale": 3.5,
|
135 |
"video_guidance_scale_end": None,
|
136 |
"video_guidance_scale_method": "linear",
|
@@ -141,7 +149,9 @@ args_dict = {
|
|
141 |
"video_overlap": 1,
|
142 |
"video_strength": 1.0,
|
143 |
"vision_clip_extractor_class_name": "ImageClipVisionFeatureExtractor",
|
144 |
-
"vision_clip_model_path":
|
|
|
|
|
145 |
"w_ind_noise": 0.5,
|
146 |
"which2video": "video_middle",
|
147 |
"width": None,
|
@@ -279,17 +289,17 @@ else:
|
|
279 |
)
|
280 |
b = 1
|
281 |
negative_embedding = [
|
282 |
-
["
|
283 |
[
|
284 |
-
"
|
285 |
"ng_deepnegative_v1_75t",
|
286 |
],
|
287 |
[
|
288 |
-
"
|
289 |
"EasyNegativeV2",
|
290 |
],
|
291 |
[
|
292 |
-
"
|
293 |
"bad_prompt_version2-neg",
|
294 |
],
|
295 |
]
|
@@ -494,7 +504,7 @@ if referencenet_model_name is not None:
|
|
494 |
referencenet = load_referencenet_by_name(
|
495 |
model_name=referencenet_model_name,
|
496 |
# sd_model=sd_model_path,
|
497 |
-
# sd_model="
|
498 |
sd_referencenet_model=referencenet_model_path,
|
499 |
cross_attention_dim=cross_attention_dim,
|
500 |
)
|
@@ -554,7 +564,7 @@ for model_name, sd_model_params in sd_model_params_dict.items():
|
|
554 |
model_name=unet_model_name,
|
555 |
sd_unet_model=unet_model_path,
|
556 |
sd_model=sd_model_path,
|
557 |
-
# sd_model="
|
558 |
cross_attention_dim=cross_attention_dim,
|
559 |
need_t2i_facein=facein_model_name is not None,
|
560 |
# facein 目前没参与训练,但在unet中定义了,载入相关参数会报错,所以用strict控制
|
|
|
66 |
"enable_zero_snr": False,
|
67 |
"end_to_end": True,
|
68 |
"face_image_path": None,
|
69 |
+
"facein_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/facein.py"),
|
70 |
"facein_model_name": None,
|
71 |
"facein_scale": 1.0,
|
72 |
"fix_condition_images": False,
|
|
|
79 |
"img_length_ratio": 1.0,
|
80 |
"img_weight": 0.001,
|
81 |
"interpolation_factor": 1,
|
82 |
+
"ip_adapter_face_model_cfg_path": os.path.join(
|
83 |
+
PROJECT_DIR, "configs/model/ip_adapter.py"
|
84 |
+
),
|
85 |
"ip_adapter_face_model_name": None,
|
86 |
"ip_adapter_face_scale": 1.0,
|
87 |
+
"ip_adapter_model_cfg_path": os.path.join(
|
88 |
+
PROJECT_DIR, "configs/model/ip_adapter.py"
|
89 |
+
),
|
90 |
+
"ip_adapter_model_name": "musev_referencenet",
|
91 |
"ip_adapter_scale": 1.0,
|
92 |
"ipadapter_image_path": None,
|
93 |
+
"lcm_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/lcm_model.py"),
|
94 |
"lcm_model_name": None,
|
95 |
"log_level": "INFO",
|
96 |
"motion_speed": 8.0,
|
|
|
104 |
"need_return_videos": False,
|
105 |
"need_video2video": False,
|
106 |
"negative_prompt": "V2",
|
107 |
+
"negprompt_cfg_path": os.path.join(PROJECT_DIR, "configs/model/negative_prompt.py"),
|
108 |
"noise_type": "video_fusion",
|
109 |
"num_inference_steps": 30,
|
110 |
"output_dir": "./results/",
|
|
|
119 |
"redraw_condition_image_with_ipdapter": True,
|
120 |
"redraw_condition_image_with_referencenet": True,
|
121 |
"referencenet_image_path": None,
|
122 |
+
"referencenet_model_cfg_path": os.path.join(
|
123 |
+
PROJECT_DIR, "configs/model/referencenet.py"
|
124 |
+
),
|
125 |
"referencenet_model_name": "musev_referencenet",
|
126 |
"sample_rate": 1,
|
127 |
"save_filetype": "mp4",
|
128 |
"save_images": False,
|
129 |
+
"sd_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/T2I_all_model.py"),
|
130 |
"sd_model_name": "majicmixRealv6Fp16",
|
131 |
"seed": None,
|
132 |
"strength": 0.8,
|
133 |
"target_datas": "boy_dance2",
|
134 |
+
"test_data_path": os.path.join(
|
135 |
+
PROJECT_DIR, "configs/infer/testcase_video_famous.yaml"
|
136 |
+
),
|
137 |
"time_size": 12,
|
138 |
+
"unet_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/motion_model.py"),
|
139 |
"unet_model_name": "musev_referencenet_pose",
|
140 |
"use_condition_image": True,
|
141 |
+
"vae_model_path": os.path.join(PROJECT_DIR, "checkpoints/vae/sd-vae-ft-mse"),
|
142 |
"video_guidance_scale": 3.5,
|
143 |
"video_guidance_scale_end": None,
|
144 |
"video_guidance_scale_method": "linear",
|
|
|
149 |
"video_overlap": 1,
|
150 |
"video_strength": 1.0,
|
151 |
"vision_clip_extractor_class_name": "ImageClipVisionFeatureExtractor",
|
152 |
+
"vision_clip_model_path": os.path.join(
|
153 |
+
PROJECT_DIR, "checkpoints/IP-Adapter/models/image_encoder"
|
154 |
+
),
|
155 |
"w_ind_noise": 0.5,
|
156 |
"which2video": "video_middle",
|
157 |
"width": None,
|
|
|
289 |
)
|
290 |
b = 1
|
291 |
negative_embedding = [
|
292 |
+
[os.path.join(PROJECT_DIR, "checkpoints/embedding/badhandv4.pt"), "badhandv4"],
|
293 |
[
|
294 |
+
os.path.join(PROJECT_DIR, "checkpoints/embedding/ng_deepnegative_v1_75t.pt"),
|
295 |
"ng_deepnegative_v1_75t",
|
296 |
],
|
297 |
[
|
298 |
+
os.path.join(PROJECT_DIR, "checkpoints/embedding/EasyNegativeV2.safetensors"),
|
299 |
"EasyNegativeV2",
|
300 |
],
|
301 |
[
|
302 |
+
os.path.join(PROJECT_DIR, "checkpoints/embedding/bad_prompt_version2-neg.pt"),
|
303 |
"bad_prompt_version2-neg",
|
304 |
],
|
305 |
]
|
|
|
504 |
referencenet = load_referencenet_by_name(
|
505 |
model_name=referencenet_model_name,
|
506 |
# sd_model=sd_model_path,
|
507 |
+
# sd_model=os.path.join(PROJECT_DIR, "checkpoints//Moore-AnimateAnyone/AnimateAnyone/reference_unet.pth"),
|
508 |
sd_referencenet_model=referencenet_model_path,
|
509 |
cross_attention_dim=cross_attention_dim,
|
510 |
)
|
|
|
564 |
model_name=unet_model_name,
|
565 |
sd_unet_model=unet_model_path,
|
566 |
sd_model=sd_model_path,
|
567 |
+
# sd_model=os.path.join(PROJECT_DIR, "checkpoints//Moore-AnimateAnyone/AnimateAnyone/denoising_unet.pth"),
|
568 |
cross_attention_dim=cross_attention_dim,
|
569 |
need_t2i_facein=facein_model_name is not None,
|
570 |
# facein 目前没参与训练,但在unet中定义了,载入相关参数会报错,所以用strict控制
|
app_gradio_space.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import time
|
3 |
import pdb
|
4 |
import torch
|
|
|
5 |
torch.jit.script = lambda f: f
|
6 |
import timm
|
7 |
import cuid
|
@@ -20,6 +21,7 @@ CheckpointsDir = os.path.join(ProjectDir, "checkpoints")
|
|
20 |
ignore_video2video = False
|
21 |
max_image_edge = 960
|
22 |
|
|
|
23 |
sys.path.insert(0, f"{ProjectDir}/MMCM")
|
24 |
sys.path.insert(0, f"{ProjectDir}/scripts/gradio")
|
25 |
|
|
|
2 |
import time
|
3 |
import pdb
|
4 |
import torch
|
5 |
+
|
6 |
torch.jit.script = lambda f: f
|
7 |
import timm
|
8 |
import cuid
|
|
|
21 |
ignore_video2video = False
|
22 |
max_image_edge = 960
|
23 |
|
24 |
+
sys.path.insert(0, f"{ProjectDir}")
|
25 |
sys.path.insert(0, f"{ProjectDir}/MMCM")
|
26 |
sys.path.insert(0, f"{ProjectDir}/scripts/gradio")
|
27 |
|