ekhatskevich commited on
Commit
7802e94
·
1 Parent(s): f28659f

add config

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. config/ace_plus_fft.yaml +192 -0
  3. models/model_zoo.yaml +0 -34
app.py CHANGED
@@ -12,7 +12,7 @@ from inference.ace_plus_inference import ACEInference
12
  from scepter.modules.utils.config import Config
13
 
14
 
15
- config_path = os.path.join("models", "model_zoo.yaml")
16
  cfg = Config(load=True, cfg_file=config_path)
17
 
18
  # Instantiate the ACEInference object.
 
12
  from scepter.modules.utils.config import Config
13
 
14
 
15
+ config_path = os.path.join("config", "ace_plus_fft.yaml")
16
  cfg = Config(load=True, cfg_file=config_path)
17
 
18
  # Instantiate the ACEInference object.
config/ace_plus_fft.yaml ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NAME: ACEInference
2
+ DTYPE: bfloat16
3
+ VERSION: fft
4
+ IS_DEFAULT: True
5
+ MAX_SEQ_LEN: 4096
6
+ MODEL:
7
+ NAME: LatentDiffusionACEPlus
8
+ PARAMETERIZATION: rf
9
+ TIMESTEPS: 1000
10
+ GUIDE_SCALE: 1.0
11
+ PRETRAINED_MODEL:
12
+ IGNORE_KEYS: [ ]
13
+ USE_EMA: False
14
+ EVAL_EMA: False
15
+ SIZE_FACTOR: 8
16
+ DIFFUSION:
17
+ NAME: DiffusionFluxRF
18
+ PREDICTION_TYPE: raw
19
+ NOISE_NORM: True
20
+ # NOISE_SCHEDULER DESCRIPTION: TYPE: default: ''
21
+ NOISE_SCHEDULER:
22
+ NAME: FlowMatchFluxShiftScheduler
23
+ SHIFT: False
24
+ PRE_T_SAMPLE: True
25
+ PRE_T_SAMPLE_FOLD: 1
26
+ SIGMOID_SCALE: 1
27
+ BASE_SHIFT: 0.5
28
+ MAX_SHIFT: 1.15
29
+ SAMPLER_SCHEDULER:
30
+ NAME: FlowMatchFluxShiftScheduler
31
+ SHIFT: True
32
+ PRE_T_SAMPLE: False
33
+ SIGMOID_SCALE: 1
34
+ BASE_SHIFT: 0.5
35
+ MAX_SHIFT: 1.15
36
+
37
+ #
38
+ DIFFUSION_MODEL:
39
+ # NAME DESCRIPTION: TYPE: default: 'Flux'
40
+ NAME: FluxMRModiACEPlus
41
+ PRETRAINED_MODEL: ${ACE_PLUS_FFT_MODEL}
42
+ # IN_CHANNELS DESCRIPTION: model's input channels. TYPE: int default: 64
43
+ IN_CHANNELS: 448
44
+ # OUT_CHANNELS DESCRIPTION: model's input channels. TYPE: int default: 64
45
+ OUT_CHANNELS: 64
46
+ # HIDDEN_SIZE DESCRIPTION: model's hidden size. TYPE: int default: 1024
47
+ HIDDEN_SIZE: 3072
48
+ REDUX_DIM: 1152
49
+ # NUM_HEADS DESCRIPTION: number of heads in the transformer. TYPE: int default: 16
50
+ NUM_HEADS: 24
51
+ # AXES_DIM DESCRIPTION: dimensions of the axes of the positional encoding. TYPE: list default: [16, 56, 56]
52
+ AXES_DIM: [ 16, 56, 56 ]
53
+ # THETA DESCRIPTION: theta for positional encoding. TYPE: int default: 10000
54
+ THETA: 10000
55
+ # VEC_IN_DIM DESCRIPTION: dimension of the vector input. TYPE: int default: 768
56
+ VEC_IN_DIM: 768
57
+ # GUIDANCE_EMBED DESCRIPTION: whether to use guidance embedding. TYPE: bool default: False
58
+ GUIDANCE_EMBED: True
59
+ # CONTEXT_IN_DIM DESCRIPTION: dimension of the context input. TYPE: int default: 4096
60
+ CONTEXT_IN_DIM: 4096
61
+ # MLP_RATIO DESCRIPTION: ratio of mlp hidden size to hidden size. TYPE: float default: 4.0
62
+ MLP_RATIO: 4.0
63
+ # QKV_BIAS DESCRIPTION: whether to use bias in qkv projection. TYPE: bool default: True
64
+ QKV_BIAS: True
65
+ # DEPTH DESCRIPTION: number of transformer blocks. TYPE: int default: 19
66
+ DEPTH: 19
67
+ # DEPTH_SINGLE_BLOCKS DESCRIPTION: number of transformer blocks in the single stream block. TYPE: int default: 38
68
+ DEPTH_SINGLE_BLOCKS: 38
69
+ ATTN_BACKEND: flash_attn
70
+
71
+ #
72
+ FIRST_STAGE_MODEL:
73
+ NAME: AutoencoderKLFlux
74
+ EMBED_DIM: 16
75
+ PRETRAINED_MODEL: ${FLUX_FILL_PATH}@ae.safetensors
76
+ IGNORE_KEYS: [ ]
77
+ BATCH_SIZE: 8
78
+ USE_CONV: False
79
+ SCALE_FACTOR: 0.3611
80
+ SHIFT_FACTOR: 0.1159
81
+ #
82
+ ENCODER:
83
+ NAME: Encoder
84
+ CH: 128
85
+ OUT_CH: 3
86
+ NUM_RES_BLOCKS: 2
87
+ IN_CHANNELS: 3
88
+ ATTN_RESOLUTIONS: [ ]
89
+ CH_MULT: [ 1, 2, 4, 4 ]
90
+ Z_CHANNELS: 16
91
+ DOUBLE_Z: True
92
+ DROPOUT: 0.0
93
+ RESAMP_WITH_CONV: True
94
+ #
95
+ DECODER:
96
+ NAME: Decoder
97
+ CH: 128
98
+ OUT_CH: 3
99
+ NUM_RES_BLOCKS: 2
100
+ IN_CHANNELS: 3
101
+ ATTN_RESOLUTIONS: [ ]
102
+ CH_MULT: [ 1, 2, 4, 4 ]
103
+ Z_CHANNELS: 16
104
+ DROPOUT: 0.0
105
+ RESAMP_WITH_CONV: True
106
+ GIVE_PRE_END: False
107
+ TANH_OUT: False
108
+ #
109
+ COND_STAGE_MODEL:
110
+ # NAME DESCRIPTION: TYPE: default: 'T5PlusClipFluxEmbedder'
111
+ NAME: T5PlusClipFluxEmbedder
112
+ # T5_MODEL DESCRIPTION: TYPE: default: ''
113
+ T5_MODEL:
114
+ # NAME DESCRIPTION: TYPE: default: 'HFEmbedder'
115
+ NAME: HFEmbedder
116
+ # HF_MODEL_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
117
+ HF_MODEL_CLS: T5EncoderModel
118
+ # MODEL_PATH DESCRIPTION: model folder path TYPE: NoneType default: None
119
+ MODEL_PATH: ${FLUX_FILL_PATH}@text_encoder_2/
120
+ # HF_TOKENIZER_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
121
+ HF_TOKENIZER_CLS: T5Tokenizer
122
+ # TOKENIZER_PATH DESCRIPTION: tokenizer folder path TYPE: NoneType default: None
123
+ TOKENIZER_PATH: ${FLUX_FILL_PATH}@tokenizer_2/
124
+ ADDED_IDENTIFIER: [ '<img>','{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
125
+ # MAX_LENGTH DESCRIPTION: max length of input TYPE: int default: 77
126
+ MAX_LENGTH: 512
127
+ # OUTPUT_KEY DESCRIPTION: output key TYPE: str default: 'last_hidden_state'
128
+ OUTPUT_KEY: last_hidden_state
129
+ # D_TYPE DESCRIPTION: dtype TYPE: str default: 'bfloat16'
130
+ D_TYPE: bfloat16
131
+ # BATCH_INFER DESCRIPTION: batch infer TYPE: bool default: False
132
+ BATCH_INFER: False
133
+ CLEAN: whitespace
134
+ # CLIP_MODEL DESCRIPTION: TYPE: default: ''
135
+ CLIP_MODEL:
136
+ # NAME DESCRIPTION: TYPE: default: 'HFEmbedder'
137
+ NAME: HFEmbedder
138
+ # HF_MODEL_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
139
+ HF_MODEL_CLS: CLIPTextModel
140
+ # MODEL_PATH DESCRIPTION: model folder path TYPE: NoneType default: None
141
+ MODEL_PATH: ${FLUX_FILL_PATH}@text_encoder/
142
+ # HF_TOKENIZER_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
143
+ HF_TOKENIZER_CLS: CLIPTokenizer
144
+ # TOKENIZER_PATH DESCRIPTION: tokenizer folder path TYPE: NoneType default: None
145
+ TOKENIZER_PATH: ${FLUX_FILL_PATH}@tokenizer/
146
+ # MAX_LENGTH DESCRIPTION: max length of input TYPE: int default: 77
147
+ MAX_LENGTH: 77
148
+ # OUTPUT_KEY DESCRIPTION: output key TYPE: str default: 'last_hidden_state'
149
+ OUTPUT_KEY: pooler_output
150
+ # D_TYPE DESCRIPTION: dtype TYPE: str default: 'bfloat16'
151
+ D_TYPE: bfloat16
152
+ # BATCH_INFER DESCRIPTION: batch infer TYPE: bool default: False
153
+ BATCH_INFER: True
154
+ CLEAN: whitespace
155
+
156
+ PREPROCESSOR:
157
+ - TYPE: repainting
158
+ REPAINTING_SCALE: 1.0
159
+ ANNOTATOR:
160
+ - TYPE: no_preprocess
161
+ REPAINTING_SCALE: 0.0
162
+ ANNOTATOR:
163
+ - TYPE: mosaic_repainting
164
+ REPAINTING_SCALE: 0.0
165
+ ANNOTATOR:
166
+ NAME: ColorAnnotator
167
+ RATIO: 64
168
+ - TYPE: contour_repainting
169
+ REPAINTING_SCALE: 0.0
170
+ ANNOTATOR:
171
+ NAME: InfoDrawContourAnnotator
172
+ INPUT_NC: 3
173
+ OUTPUT_NC: 1
174
+ N_RESIDUAL_BLOCKS: 3
175
+ SIGMOID: True
176
+ PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/informative_drawing_contour_style.pth"
177
+ - TYPE: depth_repainting
178
+ REPAINTING_SCALE: 0.0
179
+ ANNOTATOR:
180
+ NAME: MidasDetector
181
+ PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
182
+ - TYPE: recolorizing
183
+ REPAINTING_SCALE: 0.0
184
+ ANNOTATOR:
185
+ NAME: GrayAnnotator
186
+
187
+ SAMPLE_ARGS:
188
+ SAMPLE_STEPS: 28
189
+ SAMPLER: flow_euler
190
+ SEED: 42
191
+ IMAGE_SIZE: [ 1024, 1024 ]
192
+ GUIDE_SCALE: 50
models/model_zoo.yaml DELETED
@@ -1,34 +0,0 @@
1
- NAME: FaceSwapConfig
2
- MODEL:
3
- PORTRAIT:
4
- MODEL_PATH: ${PORTRAIT_MODEL_PATH}
5
- SUBJECT:
6
- MODEL_PATH: ${SUBJECT_MODEL_PATH}
7
- LOCAL_EDITING:
8
- MODEL_PATH: ${LOCAL_MODEL_PATH}
9
- REPAINTING_SCALE: 0.5
10
- PREPROCESSOR:
11
- - NAME: CannyAnnotator
12
- TYPE: canny_repaintingß
13
- LOW_THRESHOLD: 100
14
- HIGH_THRESHOLD: 200
15
- - NAME: ColorAnnotator
16
- TYPE: mosaic_repainting
17
- RATIO: 64
18
- - NAME: InfoDrawContourAnnotator
19
- TYPE: contour_repainting
20
- INPUT_NC: 3
21
- OUTPUT_NC: 1
22
- N_RESIDUAL_BLOCKS: 3
23
- SIGMOID: True
24
- PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/informative_drawing_contour_style.pth"
25
- - NAME: MidasDetector
26
- PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
27
- TYPE: depth_repainting
28
- - NAME: GrayAnnotator
29
- TYPE: recolorizing
30
- MAX_SEQ_LEN: 77,
31
- SAMPLE_ARGS: {
32
- prompt: "Face swap"
33
- }
34
- DTYPE: bfloat16