Spaces:
Paused
Paused
ekhatskevich
commited on
Commit
·
7802e94
1
Parent(s):
f28659f
add config
Browse files- app.py +1 -1
- config/ace_plus_fft.yaml +192 -0
- models/model_zoo.yaml +0 -34
app.py
CHANGED
@@ -12,7 +12,7 @@ from inference.ace_plus_inference import ACEInference
|
|
12 |
from scepter.modules.utils.config import Config
|
13 |
|
14 |
|
15 |
-
config_path = os.path.join("
|
16 |
cfg = Config(load=True, cfg_file=config_path)
|
17 |
|
18 |
# Instantiate the ACEInference object.
|
|
|
12 |
from scepter.modules.utils.config import Config
|
13 |
|
14 |
|
15 |
+
config_path = os.path.join("config", "ace_plus_fft.yaml")
|
16 |
cfg = Config(load=True, cfg_file=config_path)
|
17 |
|
18 |
# Instantiate the ACEInference object.
|
config/ace_plus_fft.yaml
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
NAME: ACEInference
|
2 |
+
DTYPE: bfloat16
|
3 |
+
VERSION: fft
|
4 |
+
IS_DEFAULT: True
|
5 |
+
MAX_SEQ_LEN: 4096
|
6 |
+
MODEL:
|
7 |
+
NAME: LatentDiffusionACEPlus
|
8 |
+
PARAMETERIZATION: rf
|
9 |
+
TIMESTEPS: 1000
|
10 |
+
GUIDE_SCALE: 1.0
|
11 |
+
PRETRAINED_MODEL:
|
12 |
+
IGNORE_KEYS: [ ]
|
13 |
+
USE_EMA: False
|
14 |
+
EVAL_EMA: False
|
15 |
+
SIZE_FACTOR: 8
|
16 |
+
DIFFUSION:
|
17 |
+
NAME: DiffusionFluxRF
|
18 |
+
PREDICTION_TYPE: raw
|
19 |
+
NOISE_NORM: True
|
20 |
+
# NOISE_SCHEDULER DESCRIPTION: TYPE: default: ''
|
21 |
+
NOISE_SCHEDULER:
|
22 |
+
NAME: FlowMatchFluxShiftScheduler
|
23 |
+
SHIFT: False
|
24 |
+
PRE_T_SAMPLE: True
|
25 |
+
PRE_T_SAMPLE_FOLD: 1
|
26 |
+
SIGMOID_SCALE: 1
|
27 |
+
BASE_SHIFT: 0.5
|
28 |
+
MAX_SHIFT: 1.15
|
29 |
+
SAMPLER_SCHEDULER:
|
30 |
+
NAME: FlowMatchFluxShiftScheduler
|
31 |
+
SHIFT: True
|
32 |
+
PRE_T_SAMPLE: False
|
33 |
+
SIGMOID_SCALE: 1
|
34 |
+
BASE_SHIFT: 0.5
|
35 |
+
MAX_SHIFT: 1.15
|
36 |
+
|
37 |
+
#
|
38 |
+
DIFFUSION_MODEL:
|
39 |
+
# NAME DESCRIPTION: TYPE: default: 'Flux'
|
40 |
+
NAME: FluxMRModiACEPlus
|
41 |
+
PRETRAINED_MODEL: ${ACE_PLUS_FFT_MODEL}
|
42 |
+
# IN_CHANNELS DESCRIPTION: model's input channels. TYPE: int default: 64
|
43 |
+
IN_CHANNELS: 448
|
44 |
+
# OUT_CHANNELS DESCRIPTION: model's input channels. TYPE: int default: 64
|
45 |
+
OUT_CHANNELS: 64
|
46 |
+
# HIDDEN_SIZE DESCRIPTION: model's hidden size. TYPE: int default: 1024
|
47 |
+
HIDDEN_SIZE: 3072
|
48 |
+
REDUX_DIM: 1152
|
49 |
+
# NUM_HEADS DESCRIPTION: number of heads in the transformer. TYPE: int default: 16
|
50 |
+
NUM_HEADS: 24
|
51 |
+
# AXES_DIM DESCRIPTION: dimensions of the axes of the positional encoding. TYPE: list default: [16, 56, 56]
|
52 |
+
AXES_DIM: [ 16, 56, 56 ]
|
53 |
+
# THETA DESCRIPTION: theta for positional encoding. TYPE: int default: 10000
|
54 |
+
THETA: 10000
|
55 |
+
# VEC_IN_DIM DESCRIPTION: dimension of the vector input. TYPE: int default: 768
|
56 |
+
VEC_IN_DIM: 768
|
57 |
+
# GUIDANCE_EMBED DESCRIPTION: whether to use guidance embedding. TYPE: bool default: False
|
58 |
+
GUIDANCE_EMBED: True
|
59 |
+
# CONTEXT_IN_DIM DESCRIPTION: dimension of the context input. TYPE: int default: 4096
|
60 |
+
CONTEXT_IN_DIM: 4096
|
61 |
+
# MLP_RATIO DESCRIPTION: ratio of mlp hidden size to hidden size. TYPE: float default: 4.0
|
62 |
+
MLP_RATIO: 4.0
|
63 |
+
# QKV_BIAS DESCRIPTION: whether to use bias in qkv projection. TYPE: bool default: True
|
64 |
+
QKV_BIAS: True
|
65 |
+
# DEPTH DESCRIPTION: number of transformer blocks. TYPE: int default: 19
|
66 |
+
DEPTH: 19
|
67 |
+
# DEPTH_SINGLE_BLOCKS DESCRIPTION: number of transformer blocks in the single stream block. TYPE: int default: 38
|
68 |
+
DEPTH_SINGLE_BLOCKS: 38
|
69 |
+
ATTN_BACKEND: flash_attn
|
70 |
+
|
71 |
+
#
|
72 |
+
FIRST_STAGE_MODEL:
|
73 |
+
NAME: AutoencoderKLFlux
|
74 |
+
EMBED_DIM: 16
|
75 |
+
PRETRAINED_MODEL: ${FLUX_FILL_PATH}@ae.safetensors
|
76 |
+
IGNORE_KEYS: [ ]
|
77 |
+
BATCH_SIZE: 8
|
78 |
+
USE_CONV: False
|
79 |
+
SCALE_FACTOR: 0.3611
|
80 |
+
SHIFT_FACTOR: 0.1159
|
81 |
+
#
|
82 |
+
ENCODER:
|
83 |
+
NAME: Encoder
|
84 |
+
CH: 128
|
85 |
+
OUT_CH: 3
|
86 |
+
NUM_RES_BLOCKS: 2
|
87 |
+
IN_CHANNELS: 3
|
88 |
+
ATTN_RESOLUTIONS: [ ]
|
89 |
+
CH_MULT: [ 1, 2, 4, 4 ]
|
90 |
+
Z_CHANNELS: 16
|
91 |
+
DOUBLE_Z: True
|
92 |
+
DROPOUT: 0.0
|
93 |
+
RESAMP_WITH_CONV: True
|
94 |
+
#
|
95 |
+
DECODER:
|
96 |
+
NAME: Decoder
|
97 |
+
CH: 128
|
98 |
+
OUT_CH: 3
|
99 |
+
NUM_RES_BLOCKS: 2
|
100 |
+
IN_CHANNELS: 3
|
101 |
+
ATTN_RESOLUTIONS: [ ]
|
102 |
+
CH_MULT: [ 1, 2, 4, 4 ]
|
103 |
+
Z_CHANNELS: 16
|
104 |
+
DROPOUT: 0.0
|
105 |
+
RESAMP_WITH_CONV: True
|
106 |
+
GIVE_PRE_END: False
|
107 |
+
TANH_OUT: False
|
108 |
+
#
|
109 |
+
COND_STAGE_MODEL:
|
110 |
+
# NAME DESCRIPTION: TYPE: default: 'T5PlusClipFluxEmbedder'
|
111 |
+
NAME: T5PlusClipFluxEmbedder
|
112 |
+
# T5_MODEL DESCRIPTION: TYPE: default: ''
|
113 |
+
T5_MODEL:
|
114 |
+
# NAME DESCRIPTION: TYPE: default: 'HFEmbedder'
|
115 |
+
NAME: HFEmbedder
|
116 |
+
# HF_MODEL_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
|
117 |
+
HF_MODEL_CLS: T5EncoderModel
|
118 |
+
# MODEL_PATH DESCRIPTION: model folder path TYPE: NoneType default: None
|
119 |
+
MODEL_PATH: ${FLUX_FILL_PATH}@text_encoder_2/
|
120 |
+
# HF_TOKENIZER_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
|
121 |
+
HF_TOKENIZER_CLS: T5Tokenizer
|
122 |
+
# TOKENIZER_PATH DESCRIPTION: tokenizer folder path TYPE: NoneType default: None
|
123 |
+
TOKENIZER_PATH: ${FLUX_FILL_PATH}@tokenizer_2/
|
124 |
+
ADDED_IDENTIFIER: [ '<img>','{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
|
125 |
+
# MAX_LENGTH DESCRIPTION: max length of input TYPE: int default: 77
|
126 |
+
MAX_LENGTH: 512
|
127 |
+
# OUTPUT_KEY DESCRIPTION: output key TYPE: str default: 'last_hidden_state'
|
128 |
+
OUTPUT_KEY: last_hidden_state
|
129 |
+
# D_TYPE DESCRIPTION: dtype TYPE: str default: 'bfloat16'
|
130 |
+
D_TYPE: bfloat16
|
131 |
+
# BATCH_INFER DESCRIPTION: batch infer TYPE: bool default: False
|
132 |
+
BATCH_INFER: False
|
133 |
+
CLEAN: whitespace
|
134 |
+
# CLIP_MODEL DESCRIPTION: TYPE: default: ''
|
135 |
+
CLIP_MODEL:
|
136 |
+
# NAME DESCRIPTION: TYPE: default: 'HFEmbedder'
|
137 |
+
NAME: HFEmbedder
|
138 |
+
# HF_MODEL_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
|
139 |
+
HF_MODEL_CLS: CLIPTextModel
|
140 |
+
# MODEL_PATH DESCRIPTION: model folder path TYPE: NoneType default: None
|
141 |
+
MODEL_PATH: ${FLUX_FILL_PATH}@text_encoder/
|
142 |
+
# HF_TOKENIZER_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
|
143 |
+
HF_TOKENIZER_CLS: CLIPTokenizer
|
144 |
+
# TOKENIZER_PATH DESCRIPTION: tokenizer folder path TYPE: NoneType default: None
|
145 |
+
TOKENIZER_PATH: ${FLUX_FILL_PATH}@tokenizer/
|
146 |
+
# MAX_LENGTH DESCRIPTION: max length of input TYPE: int default: 77
|
147 |
+
MAX_LENGTH: 77
|
148 |
+
# OUTPUT_KEY DESCRIPTION: output key TYPE: str default: 'last_hidden_state'
|
149 |
+
OUTPUT_KEY: pooler_output
|
150 |
+
# D_TYPE DESCRIPTION: dtype TYPE: str default: 'bfloat16'
|
151 |
+
D_TYPE: bfloat16
|
152 |
+
# BATCH_INFER DESCRIPTION: batch infer TYPE: bool default: False
|
153 |
+
BATCH_INFER: True
|
154 |
+
CLEAN: whitespace
|
155 |
+
|
156 |
+
PREPROCESSOR:
|
157 |
+
- TYPE: repainting
|
158 |
+
REPAINTING_SCALE: 1.0
|
159 |
+
ANNOTATOR:
|
160 |
+
- TYPE: no_preprocess
|
161 |
+
REPAINTING_SCALE: 0.0
|
162 |
+
ANNOTATOR:
|
163 |
+
- TYPE: mosaic_repainting
|
164 |
+
REPAINTING_SCALE: 0.0
|
165 |
+
ANNOTATOR:
|
166 |
+
NAME: ColorAnnotator
|
167 |
+
RATIO: 64
|
168 |
+
- TYPE: contour_repainting
|
169 |
+
REPAINTING_SCALE: 0.0
|
170 |
+
ANNOTATOR:
|
171 |
+
NAME: InfoDrawContourAnnotator
|
172 |
+
INPUT_NC: 3
|
173 |
+
OUTPUT_NC: 1
|
174 |
+
N_RESIDUAL_BLOCKS: 3
|
175 |
+
SIGMOID: True
|
176 |
+
PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/informative_drawing_contour_style.pth"
|
177 |
+
- TYPE: depth_repainting
|
178 |
+
REPAINTING_SCALE: 0.0
|
179 |
+
ANNOTATOR:
|
180 |
+
NAME: MidasDetector
|
181 |
+
PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
|
182 |
+
- TYPE: recolorizing
|
183 |
+
REPAINTING_SCALE: 0.0
|
184 |
+
ANNOTATOR:
|
185 |
+
NAME: GrayAnnotator
|
186 |
+
|
187 |
+
SAMPLE_ARGS:
|
188 |
+
SAMPLE_STEPS: 28
|
189 |
+
SAMPLER: flow_euler
|
190 |
+
SEED: 42
|
191 |
+
IMAGE_SIZE: [ 1024, 1024 ]
|
192 |
+
GUIDE_SCALE: 50
|
models/model_zoo.yaml
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
NAME: FaceSwapConfig
|
2 |
-
MODEL:
|
3 |
-
PORTRAIT:
|
4 |
-
MODEL_PATH: ${PORTRAIT_MODEL_PATH}
|
5 |
-
SUBJECT:
|
6 |
-
MODEL_PATH: ${SUBJECT_MODEL_PATH}
|
7 |
-
LOCAL_EDITING:
|
8 |
-
MODEL_PATH: ${LOCAL_MODEL_PATH}
|
9 |
-
REPAINTING_SCALE: 0.5
|
10 |
-
PREPROCESSOR:
|
11 |
-
- NAME: CannyAnnotator
|
12 |
-
TYPE: canny_repaintingß
|
13 |
-
LOW_THRESHOLD: 100
|
14 |
-
HIGH_THRESHOLD: 200
|
15 |
-
- NAME: ColorAnnotator
|
16 |
-
TYPE: mosaic_repainting
|
17 |
-
RATIO: 64
|
18 |
-
- NAME: InfoDrawContourAnnotator
|
19 |
-
TYPE: contour_repainting
|
20 |
-
INPUT_NC: 3
|
21 |
-
OUTPUT_NC: 1
|
22 |
-
N_RESIDUAL_BLOCKS: 3
|
23 |
-
SIGMOID: True
|
24 |
-
PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/informative_drawing_contour_style.pth"
|
25 |
-
- NAME: MidasDetector
|
26 |
-
PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
|
27 |
-
TYPE: depth_repainting
|
28 |
-
- NAME: GrayAnnotator
|
29 |
-
TYPE: recolorizing
|
30 |
-
MAX_SEQ_LEN: 77,
|
31 |
-
SAMPLE_ARGS: {
|
32 |
-
prompt: "Face swap"
|
33 |
-
}
|
34 |
-
DTYPE: bfloat16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|