{ | |
"experiment": { | |
"tokenizer_checkpoint": "tatitok_bl32_vae.bin", | |
"generator_checkpoint": "maskgen_kl_l.bin" | |
}, | |
"model": { | |
"vq_model": { | |
"quantize_mode": "vae", | |
"token_size": 16, | |
"vit_enc_model_size": "base", | |
"vit_dec_model_size": "large", | |
"vit_enc_patch_size": 16, | |
"vit_dec_patch_size": 16, | |
"num_latent_tokens": 32, | |
"scale_factor": 0.7525, | |
"finetune_decoder": false, | |
"is_legacy": false | |
}, | |
"maskgen": { | |
"decoder_embed_dim": 1024, | |
"decoder_depth": 16, | |
"decoder_num_heads": 16, | |
"micro_condition": true, | |
"micro_condition_embed_dim": 256, | |
"text_drop_prob": 0.1, | |
"cfg": 3.0, | |
"cfg_schedule": "linear", | |
"num_iter": 32, | |
"temperature": 1.0, | |
"sample_aesthetic_score": 6.5 | |
} | |
}, | |
"losses": { | |
"diffloss_d": 8, | |
"diffloss_w": 1024 | |
}, | |
"dataset": { | |
"preprocessing": { | |
"crop_size": 256 | |
} | |
} | |
} |