File size: 1,274 Bytes
ea3be33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
{
    "experiment": {
        "tokenizer_checkpoint": "tokenizer_titok_l32.bin",
        "generator_checkpoint": "generator_titok_l32.bin",
        "output_dir": "titok_l_32"
    },
    "model": {
        "vq_model": {
            "codebook_size": 4096,
            "token_size": 12,
            "use_l2_norm": true,
            "commitment_cost": 0.25,
            "vit_enc_model_size": "large",
            "vit_dec_model_size": "large",
            "vit_enc_patch_size": 16,
            "vit_dec_patch_size": 16,
            "num_latent_tokens": 32
        },
        "generator": {
            "model_type": "ViT",
            "hidden_size": 768,
            "num_hidden_layers": 24,
            "num_attention_heads": 16,
            "intermediate_size": 3072,
            "dropout": 0.1,
            "attn_drop": 0.1,
            "num_steps": 8,
            "mask_schedule_strategy": "arccos",
            "class_label_dropout": 0.1,
            "image_seq_len": "${model.vq_model.num_latent_tokens}",
            "condition_num_classes": 1000,
            "randomize_temperature": 9.5,
            "guidance_scale": 4.5,
            "guidance_decay": "linear"
        }
    },
    "dataset": {
        "preprocessing": {
            "crop_size": 256
        }
    }
}