{ "experiment": { "tokenizer_checkpoint": "tokenizer_titok_s128.bin", "generator_checkpoint": "generator_titok_s128.bin", "output_dir": "titok_s_128" }, "model": { "vq_model": { "codebook_size": 4096, "token_size": 12, "use_l2_norm": true, "commitment_cost": 0.25, "vit_enc_model_size": "small", "vit_dec_model_size": "small", "vit_enc_patch_size": 16, "vit_dec_patch_size": 16, "num_latent_tokens": 128 }, "generator": { "model_type": "UViT", "hidden_size": 1024, "num_hidden_layers": 20, "num_attention_heads": 16, "intermediate_size": 4096, "dropout": 0.1, "attn_drop": 0.1, "num_steps": 64, "mask_schedule_strategy": "arccos", "class_label_dropout": 0.1, "image_seq_len": "${model.vq_model.num_latent_tokens}", "condition_num_classes": 1000, "randomize_temperature": 2.8, "guidance_scale": 6.9, "guidance_decay": "power-cosine" } }, "dataset": { "preprocessing": { "crop_size": 256 } } }