File size: 3,117 Bytes
6cd0f71
 
 
 
 
 
709abe1
6cd0f71
 
 
 
 
 
 
 
 
 
709abe1
 
6cd0f71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41fd312
6cd0f71
709abe1
6cd0f71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6a1944
 
6cd0f71
709abe1
6cd0f71
 
709abe1
6cd0f71
 
b6a1944
6cd0f71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293fd5a
6cd0f71
 
 
 
 
709abe1
6cd0f71
d64fe2f
b6a1944
6cd0f71
 
 
 
d64fe2f
709abe1
 
6cd0f71
 
 
 
 
 
 
 
 
709abe1
b6a1944
6cd0f71
 
 
 
709abe1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
    "seed": 1,
    
    "decoder": {
        "unets": [
            {
                "dim": 256,
                "cond_dim": 512,
                "image_embed_dim": 768,
                "text_embed_dim": 768,
                "cond_on_text_encodings": true,
                "channels": 3,
                "dim_mults": [1, 2, 3, 4],
                "num_resnet_blocks": 4,
                "attn_heads": 8,
                "attn_dim_head": 64,
                "sparse_attn": true,
                "memory_efficient": true,
		        "self_attn": [false, true, true, true]
            }
        ],
        "clip": {
            "make": "openai",
            "model": "ViT-L/14"
        },
        "image_sizes": [64],
        "channels": 3,
        "timesteps": 1000,
        "loss_type": "l2",
        "beta_schedule": ["cosine"],
        "learned_variance": true
    },
    "data": {
        "webdataset_base_url": "pipe:aws s3 cp --quiet s3://s-datasets/laion-aesthetic/data/laion2B-en-aesthetic/{}.tar -", 
        "embeddings_url": "s3://s-datasets/laion-aesthetic/ordered_embeddings/",
        "num_workers": 12,
        "batch_size": 60,
        "start_shard": 0,
        "end_shard": 5247,
        "shard_width": 5,
        "index_width": 4,
        "splits": {
            "train": 0.75,
            "val": 0.15,
            "test": 0.1
        },
        "shuffle_train": false,
        "resample_train": true,
        "preprocessing": {
            "RandomResizedCrop": {
                "size": [64, 64],
                "scale": [0.75, 1.0],
                "ratio": [1.0, 1.0]
            },
            "ToTensor": true
        }
    },
    "train": {
        "epochs": 1000,
        "lr":1e-4,
        "wd": 0.01,
        "max_grad_norm": 0.5,
        "save_every_n_samples": 2000000,
        "n_sample_images": 10,
        "device": "cuda:0",
        "epoch_samples": 40000000,
        "validation_samples": 100000,
        "use_ema": true,
        "ema_beta": 0.99,
        "save_all": false,
        "save_latest": true,
        "save_best": true,
        "unet_training_mask": [true]
    },
    "evaluate": {
        "n_evaluation_samples": 30,
        "FID": {
            "feature": 64
        },
        "LPIPS": {
            "net_type": "vgg",
            "reduction": "mean"
        }
    },
    "tracker": {
        "data_path": ".tracker-data-2",
        "overwrite_data_path": true,

        "log": {
            "log_type": "wandb",

            "wandb_entity": "rom1504",
            "wandb_project": "dalle2_train_decoder",
            "wandb_resume": false,

            "verbose": true
        },

        "load": {
	    "resume": true,
            "source": "url",
            "wandb_file_path": "https://api.wandb.ai/files/rom1504/dalle2_train_decoder/3tmnv289/latest.pth"
        },

        "save": [{
            "save_to": "wandb"
        },
        {
            "save_to": "huggingface",
            "huggingface_repo": "laion/DALLE2-PyTorch",

            "save_latest_to": "decoder/small_32gpus/latest.pth",
            
            "save_type": "model"
        }]
    }
}