anchorxia commited on
Commit
1f6fb31
·
1 Parent(s): 96d7ad8

fix gradi_space

Browse files
MuseV/scripts/gradio/gradio_text2video.py CHANGED
@@ -69,7 +69,7 @@ args_dict = {
69
  "context_stride": 1,
70
  "cross_attention_dim": 768,
71
  "face_image_path": None,
72
- "facein_model_cfg_path": "../../configs/model/facein.py",
73
  "facein_model_name": None,
74
  "facein_scale": 1.0,
75
  "fix_condition_images": False,
@@ -82,14 +82,18 @@ args_dict = {
82
  "img_length_ratio": 1.0,
83
  "img_weight": 0.001,
84
  "interpolation_factor": 1,
85
- "ip_adapter_face_model_cfg_path": "../../configs/model/ip_adapter.py",
 
 
86
  "ip_adapter_face_model_name": None,
87
  "ip_adapter_face_scale": 1.0,
88
- "ip_adapter_model_cfg_path": "../../configs/model/ip_adapter.py",
 
 
89
  "ip_adapter_model_name": "musev_referencenet",
90
  "ip_adapter_scale": 1.0,
91
  "ipadapter_image_path": None,
92
- "lcm_model_cfg_path": "../../configs/model/lcm_model.py",
93
  "lcm_model_name": None,
94
  "log_level": "INFO",
95
  "motion_speed": 8.0,
@@ -101,7 +105,7 @@ args_dict = {
101
  "need_img_based_video_noise": True,
102
  "need_redraw": False,
103
  "negative_prompt": "V2",
104
- "negprompt_cfg_path": "../../configs/model/negative_prompt.py",
105
  "noise_type": "video_fusion",
106
  "num_inference_steps": 30,
107
  "output_dir": "./results/",
@@ -115,22 +119,26 @@ args_dict = {
115
  "redraw_condition_image_with_ipdapter": True,
116
  "redraw_condition_image_with_referencenet": True,
117
  "referencenet_image_path": None,
118
- "referencenet_model_cfg_path": "../../configs/model/referencenet.py",
 
 
119
  "referencenet_model_name": "musev_referencenet",
120
  "save_filetype": "mp4",
121
  "save_images": False,
122
- "sd_model_cfg_path": "../../configs/model/T2I_all_model.py",
123
  "sd_model_name": "majicmixRealv6Fp16",
124
  "seed": None,
125
  "strength": 0.8,
126
  "target_datas": "boy_dance2",
127
- "test_data_path": "../../configs/infer/testcase_video_famous.yaml",
128
- "time_size": 24,
129
- "unet_model_cfg_path": "../../configs/model/motion_model.py",
 
 
130
  "unet_model_name": "musev_referencenet",
131
  "use_condition_image": True,
132
  "use_video_redraw": True,
133
- "vae_model_path": "../../checkpoints/vae/sd-vae-ft-mse",
134
  "video_guidance_scale": 3.5,
135
  "video_guidance_scale_end": None,
136
  "video_guidance_scale_method": "linear",
@@ -138,7 +146,9 @@ args_dict = {
138
  "video_num_inference_steps": 10,
139
  "video_overlap": 1,
140
  "vision_clip_extractor_class_name": "ImageClipVisionFeatureExtractor",
141
- "vision_clip_model_path": "../../checkpoints/IP-Adapter/models/image_encoder",
 
 
142
  "w_ind_noise": 0.5,
143
  "width": None,
144
  "write_info": False,
@@ -248,17 +258,17 @@ n_repeat = args.n_repeat
248
 
249
  b = 1
250
  negative_embedding = [
251
- ["../../checkpoints/embedding/badhandv4.pt", "badhandv4"],
252
  [
253
- "../../checkpoints/embedding/ng_deepnegative_v1_75t.pt",
254
  "ng_deepnegative_v1_75t",
255
  ],
256
  [
257
- "../../checkpoints/embedding/EasyNegativeV2.safetensors",
258
  "EasyNegativeV2",
259
  ],
260
  [
261
- "../../checkpoints/embedding/bad_prompt_version2-neg.pt",
262
  "bad_prompt_version2-neg",
263
  ],
264
  ]
@@ -466,7 +476,7 @@ if referencenet_model_name is not None and not use_v2v_predictor:
466
  referencenet = load_referencenet_by_name(
467
  model_name=referencenet_model_name,
468
  # sd_model=sd_model_path,
469
- # sd_model="../../checkpoints//Moore-AnimateAnyone/AnimateAnyone/reference_unet.pth",
470
  sd_referencenet_model=referencenet_model_path,
471
  cross_attention_dim=cross_attention_dim,
472
  )
@@ -517,7 +527,7 @@ for model_name, sd_model_params in sd_model_params_dict.items():
517
  model_name=unet_model_name,
518
  sd_unet_model=unet_model_path,
519
  sd_model=sd_model_path,
520
- # sd_model="../../checkpoints//Moore-AnimateAnyone/AnimateAnyone/denoising_unet.pth",
521
  cross_attention_dim=cross_attention_dim,
522
  need_t2i_facein=facein_model_name is not None,
523
  # facein 目前没参与训练,但在unet中定义了,载入相关参数会报错,所以用strict控制
 
69
  "context_stride": 1,
70
  "cross_attention_dim": 768,
71
  "face_image_path": None,
72
+ "facein_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/facein.py"),
73
  "facein_model_name": None,
74
  "facein_scale": 1.0,
75
  "fix_condition_images": False,
 
82
  "img_length_ratio": 1.0,
83
  "img_weight": 0.001,
84
  "interpolation_factor": 1,
85
+ "ip_adapter_face_model_cfg_path": os.path.join(
86
+ PROJECT_DIR, "configs/model/ip_adapter.py"
87
+ ),
88
  "ip_adapter_face_model_name": None,
89
  "ip_adapter_face_scale": 1.0,
90
+ "ip_adapter_model_cfg_path": os.path.join(
91
+ PROJECT_DIR, "configs/model/ip_adapter.py"
92
+ ),
93
  "ip_adapter_model_name": "musev_referencenet",
94
  "ip_adapter_scale": 1.0,
95
  "ipadapter_image_path": None,
96
+ "lcm_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/lcm_model.py"),
97
  "lcm_model_name": None,
98
  "log_level": "INFO",
99
  "motion_speed": 8.0,
 
105
  "need_img_based_video_noise": True,
106
  "need_redraw": False,
107
  "negative_prompt": "V2",
108
+ "negprompt_cfg_path": os.path.join(PROJECT_DIR, "configs/model/negative_prompt.py"),
109
  "noise_type": "video_fusion",
110
  "num_inference_steps": 30,
111
  "output_dir": "./results/",
 
119
  "redraw_condition_image_with_ipdapter": True,
120
  "redraw_condition_image_with_referencenet": True,
121
  "referencenet_image_path": None,
122
+ "referencenet_model_cfg_path": os.path.join(
123
+ PROJECT_DIR, "configs/model/referencenet.py"
124
+ ),
125
  "referencenet_model_name": "musev_referencenet",
126
  "save_filetype": "mp4",
127
  "save_images": False,
128
+ "sd_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/T2I_all_model.py"),
129
  "sd_model_name": "majicmixRealv6Fp16",
130
  "seed": None,
131
  "strength": 0.8,
132
  "target_datas": "boy_dance2",
133
+ "test_data_path": os.path.join(
134
+ PROJECT_DIR, "configs/infer/testcase_video_famous.yaml"
135
+ ),
136
+ "time_size": 12,
137
+ "unet_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/motion_model.py"),
138
  "unet_model_name": "musev_referencenet",
139
  "use_condition_image": True,
140
  "use_video_redraw": True,
141
+ "vae_model_path": os.path.join(PROJECT_DIR, "checkpoints/vae/sd-vae-ft-mse"),
142
  "video_guidance_scale": 3.5,
143
  "video_guidance_scale_end": None,
144
  "video_guidance_scale_method": "linear",
 
146
  "video_num_inference_steps": 10,
147
  "video_overlap": 1,
148
  "vision_clip_extractor_class_name": "ImageClipVisionFeatureExtractor",
149
+ "vision_clip_model_path": os.path.join(
150
+ PROJECT_DIR, "checkpoints/IP-Adapter/models/image_encoder"
151
+ ),
152
  "w_ind_noise": 0.5,
153
  "width": None,
154
  "write_info": False,
 
258
 
259
  b = 1
260
  negative_embedding = [
261
+ [os.path.join(PROJECT_DIR, "checkpoints/embedding/badhandv4.pt"), "badhandv4"],
262
  [
263
+ os.path.join(PROJECT_DIR, "checkpoints/embedding/ng_deepnegative_v1_75t.pt"),
264
  "ng_deepnegative_v1_75t",
265
  ],
266
  [
267
+ os.path.join(PROJECT_DIR, "checkpoints/embedding/EasyNegativeV2.safetensors"),
268
  "EasyNegativeV2",
269
  ],
270
  [
271
+ os.path.join(PROJECT_DIR, "checkpoints/embedding/bad_prompt_version2-neg.pt"),
272
  "bad_prompt_version2-neg",
273
  ],
274
  ]
 
476
  referencenet = load_referencenet_by_name(
477
  model_name=referencenet_model_name,
478
  # sd_model=sd_model_path,
479
+ # sd_model=os.path.join(PROJECT_DIR, "checkpoints//Moore-AnimateAnyone/AnimateAnyone/reference_unet.pth"),
480
  sd_referencenet_model=referencenet_model_path,
481
  cross_attention_dim=cross_attention_dim,
482
  )
 
527
  model_name=unet_model_name,
528
  sd_unet_model=unet_model_path,
529
  sd_model=sd_model_path,
530
+ # sd_model=os.path.join(PROJECT_DIR, "checkpoints//Moore-AnimateAnyone/AnimateAnyone/denoising_unet.pth"),
531
  cross_attention_dim=cross_attention_dim,
532
  need_t2i_facein=facein_model_name is not None,
533
  # facein 目前没参与训练,但在unet中定义了,载入相关参数会报错,所以用strict控制
MuseV/scripts/gradio/gradio_video2video.py CHANGED
@@ -66,7 +66,7 @@ args_dict = {
66
  "enable_zero_snr": False,
67
  "end_to_end": True,
68
  "face_image_path": None,
69
- "facein_model_cfg_path": "../../configs/model/facein.py",
70
  "facein_model_name": None,
71
  "facein_scale": 1.0,
72
  "fix_condition_images": False,
@@ -79,14 +79,18 @@ args_dict = {
79
  "img_length_ratio": 1.0,
80
  "img_weight": 0.001,
81
  "interpolation_factor": 1,
82
- "ip_adapter_face_model_cfg_path": "../../configs/model/ip_adapter.py",
 
 
83
  "ip_adapter_face_model_name": None,
84
  "ip_adapter_face_scale": 1.0,
85
- "ip_adapter_model_cfg_path": "../../configs/model/ip_adapter.py",
86
- "ip_adapter_model_name": "musev_referencenet_pose",
 
 
87
  "ip_adapter_scale": 1.0,
88
  "ipadapter_image_path": None,
89
- "lcm_model_cfg_path": "../../configs/model/lcm_model.py",
90
  "lcm_model_name": None,
91
  "log_level": "INFO",
92
  "motion_speed": 8.0,
@@ -100,7 +104,7 @@ args_dict = {
100
  "need_return_videos": False,
101
  "need_video2video": False,
102
  "negative_prompt": "V2",
103
- "negprompt_cfg_path": "../../configs/model/negative_prompt.py",
104
  "noise_type": "video_fusion",
105
  "num_inference_steps": 30,
106
  "output_dir": "./results/",
@@ -115,22 +119,26 @@ args_dict = {
115
  "redraw_condition_image_with_ipdapter": True,
116
  "redraw_condition_image_with_referencenet": True,
117
  "referencenet_image_path": None,
118
- "referencenet_model_cfg_path": "../../configs/model/referencenet.py",
 
 
119
  "referencenet_model_name": "musev_referencenet",
120
  "sample_rate": 1,
121
  "save_filetype": "mp4",
122
  "save_images": False,
123
- "sd_model_cfg_path": "../../configs/model/T2I_all_model.py",
124
  "sd_model_name": "majicmixRealv6Fp16",
125
  "seed": None,
126
  "strength": 0.8,
127
  "target_datas": "boy_dance2",
128
- "test_data_path": "./configs/infer/testcase_video_famous.yaml",
 
 
129
  "time_size": 12,
130
- "unet_model_cfg_path": "../../configs/model/motion_model.py",
131
  "unet_model_name": "musev_referencenet_pose",
132
  "use_condition_image": True,
133
- "vae_model_path": "../../checkpoints/vae/sd-vae-ft-mse",
134
  "video_guidance_scale": 3.5,
135
  "video_guidance_scale_end": None,
136
  "video_guidance_scale_method": "linear",
@@ -141,7 +149,9 @@ args_dict = {
141
  "video_overlap": 1,
142
  "video_strength": 1.0,
143
  "vision_clip_extractor_class_name": "ImageClipVisionFeatureExtractor",
144
- "vision_clip_model_path": "../../checkpoints/IP-Adapter/models/image_encoder",
 
 
145
  "w_ind_noise": 0.5,
146
  "which2video": "video_middle",
147
  "width": None,
@@ -279,17 +289,17 @@ else:
279
  )
280
  b = 1
281
  negative_embedding = [
282
- ["../../checkpoints/embedding/badhandv4.pt", "badhandv4"],
283
  [
284
- "../../checkpoints/embedding/ng_deepnegative_v1_75t.pt",
285
  "ng_deepnegative_v1_75t",
286
  ],
287
  [
288
- "../../checkpoints/embedding/EasyNegativeV2.safetensors",
289
  "EasyNegativeV2",
290
  ],
291
  [
292
- "../../checkpoints/embedding/bad_prompt_version2-neg.pt",
293
  "bad_prompt_version2-neg",
294
  ],
295
  ]
@@ -494,7 +504,7 @@ if referencenet_model_name is not None:
494
  referencenet = load_referencenet_by_name(
495
  model_name=referencenet_model_name,
496
  # sd_model=sd_model_path,
497
- # sd_model="../../checkpoints/Moore-AnimateAnyone/AnimateAnyone/reference_unet.pth",
498
  sd_referencenet_model=referencenet_model_path,
499
  cross_attention_dim=cross_attention_dim,
500
  )
@@ -554,7 +564,7 @@ for model_name, sd_model_params in sd_model_params_dict.items():
554
  model_name=unet_model_name,
555
  sd_unet_model=unet_model_path,
556
  sd_model=sd_model_path,
557
- # sd_model="../../checkpoints/Moore-AnimateAnyone/AnimateAnyone/denoising_unet.pth",
558
  cross_attention_dim=cross_attention_dim,
559
  need_t2i_facein=facein_model_name is not None,
560
  # facein 目前没参与训练,但在unet中定义了,载入相关参数会报错,所以用strict控制
 
66
  "enable_zero_snr": False,
67
  "end_to_end": True,
68
  "face_image_path": None,
69
+ "facein_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/facein.py"),
70
  "facein_model_name": None,
71
  "facein_scale": 1.0,
72
  "fix_condition_images": False,
 
79
  "img_length_ratio": 1.0,
80
  "img_weight": 0.001,
81
  "interpolation_factor": 1,
82
+ "ip_adapter_face_model_cfg_path": os.path.join(
83
+ PROJECT_DIR, "configs/model/ip_adapter.py"
84
+ ),
85
  "ip_adapter_face_model_name": None,
86
  "ip_adapter_face_scale": 1.0,
87
+ "ip_adapter_model_cfg_path": os.path.join(
88
+ PROJECT_DIR, "configs/model/ip_adapter.py"
89
+ ),
90
+ "ip_adapter_model_name": "musev_referencenet",
91
  "ip_adapter_scale": 1.0,
92
  "ipadapter_image_path": None,
93
+ "lcm_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/lcm_model.py"),
94
  "lcm_model_name": None,
95
  "log_level": "INFO",
96
  "motion_speed": 8.0,
 
104
  "need_return_videos": False,
105
  "need_video2video": False,
106
  "negative_prompt": "V2",
107
+ "negprompt_cfg_path": os.path.join(PROJECT_DIR, "configs/model/negative_prompt.py"),
108
  "noise_type": "video_fusion",
109
  "num_inference_steps": 30,
110
  "output_dir": "./results/",
 
119
  "redraw_condition_image_with_ipdapter": True,
120
  "redraw_condition_image_with_referencenet": True,
121
  "referencenet_image_path": None,
122
+ "referencenet_model_cfg_path": os.path.join(
123
+ PROJECT_DIR, "configs/model/referencenet.py"
124
+ ),
125
  "referencenet_model_name": "musev_referencenet",
126
  "sample_rate": 1,
127
  "save_filetype": "mp4",
128
  "save_images": False,
129
+ "sd_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/T2I_all_model.py"),
130
  "sd_model_name": "majicmixRealv6Fp16",
131
  "seed": None,
132
  "strength": 0.8,
133
  "target_datas": "boy_dance2",
134
+ "test_data_path": os.path.join(
135
+ PROJECT_DIR, "configs/infer/testcase_video_famous.yaml"
136
+ ),
137
  "time_size": 12,
138
+ "unet_model_cfg_path": os.path.join(PROJECT_DIR, "configs/model/motion_model.py"),
139
  "unet_model_name": "musev_referencenet_pose",
140
  "use_condition_image": True,
141
+ "vae_model_path": os.path.join(PROJECT_DIR, "checkpoints/vae/sd-vae-ft-mse"),
142
  "video_guidance_scale": 3.5,
143
  "video_guidance_scale_end": None,
144
  "video_guidance_scale_method": "linear",
 
149
  "video_overlap": 1,
150
  "video_strength": 1.0,
151
  "vision_clip_extractor_class_name": "ImageClipVisionFeatureExtractor",
152
+ "vision_clip_model_path": os.path.join(
153
+ PROJECT_DIR, "checkpoints/IP-Adapter/models/image_encoder"
154
+ ),
155
  "w_ind_noise": 0.5,
156
  "which2video": "video_middle",
157
  "width": None,
 
289
  )
290
  b = 1
291
  negative_embedding = [
292
+ [os.path.join(PROJECT_DIR, "checkpoints/embedding/badhandv4.pt"), "badhandv4"],
293
  [
294
+ os.path.join(PROJECT_DIR, "checkpoints/embedding/ng_deepnegative_v1_75t.pt"),
295
  "ng_deepnegative_v1_75t",
296
  ],
297
  [
298
+ os.path.join(PROJECT_DIR, "checkpoints/embedding/EasyNegativeV2.safetensors"),
299
  "EasyNegativeV2",
300
  ],
301
  [
302
+ os.path.join(PROJECT_DIR, "checkpoints/embedding/bad_prompt_version2-neg.pt"),
303
  "bad_prompt_version2-neg",
304
  ],
305
  ]
 
504
  referencenet = load_referencenet_by_name(
505
  model_name=referencenet_model_name,
506
  # sd_model=sd_model_path,
507
+ # sd_model=os.path.join(PROJECT_DIR, "checkpoints//Moore-AnimateAnyone/AnimateAnyone/reference_unet.pth"),
508
  sd_referencenet_model=referencenet_model_path,
509
  cross_attention_dim=cross_attention_dim,
510
  )
 
564
  model_name=unet_model_name,
565
  sd_unet_model=unet_model_path,
566
  sd_model=sd_model_path,
567
+ # sd_model=os.path.join(PROJECT_DIR, "checkpoints//Moore-AnimateAnyone/AnimateAnyone/denoising_unet.pth"),
568
  cross_attention_dim=cross_attention_dim,
569
  need_t2i_facein=facein_model_name is not None,
570
  # facein 目前没参与训练,但在unet中定义了,载入相关参数会报错,所以用strict控制
app_gradio_space.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import time
3
  import pdb
4
  import torch
 
5
  torch.jit.script = lambda f: f
6
  import timm
7
  import cuid
@@ -20,6 +21,7 @@ CheckpointsDir = os.path.join(ProjectDir, "checkpoints")
20
  ignore_video2video = False
21
  max_image_edge = 960
22
 
 
23
  sys.path.insert(0, f"{ProjectDir}/MMCM")
24
  sys.path.insert(0, f"{ProjectDir}/scripts/gradio")
25
 
 
2
  import time
3
  import pdb
4
  import torch
5
+
6
  torch.jit.script = lambda f: f
7
  import timm
8
  import cuid
 
21
  ignore_video2video = False
22
  max_image_edge = 960
23
 
24
+ sys.path.insert(0, f"{ProjectDir}")
25
  sys.path.insert(0, f"{ProjectDir}/MMCM")
26
  sys.path.insert(0, f"{ProjectDir}/scripts/gradio")
27