|
{ |
|
"seed": 1, |
|
|
|
"decoder": { |
|
"unets": [ |
|
{ |
|
"dim": 576, |
|
"image_embed_dim": 768, |
|
"text_embed_dim": 768, |
|
"cond_on_text_encodings": false, |
|
"channels": 3, |
|
"dim_mults": [1, 2, 3, 4], |
|
"self_attn": [false, true, true, true], |
|
"num_resnet_blocks": 4, |
|
"attn_heads": 16, |
|
"attn_dim_head": 64, |
|
"sparse_attn": false, |
|
"memory_efficient": true |
|
} |
|
], |
|
"image_sizes": [64], |
|
"channels": 3, |
|
"timesteps": 1000, |
|
"loss_type": "l2", |
|
"beta_schedule": ["cosine"], |
|
"learned_variance": false |
|
}, |
|
"data": { |
|
"webdataset_base_url": "pipe:aws s3 cp --quiet s3://s-datasets/laion-aesthetic/data/laion2B-en-aesthetic/{}.tar -", |
|
"img_embeddings_url": "s3://s-datasets/laion-aesthetic/ordered_embeddings/", |
|
"num_workers": 6, |
|
"batch_size": 40, |
|
"start_shard": 0, |
|
"end_shard": 5247, |
|
"shard_width": 5, |
|
"index_width": 4, |
|
"splits": { |
|
"train": 0.75, |
|
"val": 0.15, |
|
"test": 0.1 |
|
}, |
|
"shuffle_train": false, |
|
"resample_train": true, |
|
"preprocessing": { |
|
"RandomResizedCrop": { |
|
"size": [224, 224], |
|
"scale": [0.75, 1.0], |
|
"ratio": [1.0, 1.0] |
|
}, |
|
"ToTensor": true |
|
} |
|
}, |
|
"train": { |
|
"epochs": 1000, |
|
"lr":1.2e-4, |
|
"wd": 0.0, |
|
"warmup_steps": 1000, |
|
"max_grad_norm": 0.5, |
|
"save_every_n_samples": 1500000, |
|
"n_sample_images": 10, |
|
"device": "cuda:0", |
|
"epoch_samples": 3000000, |
|
"validation_samples": 100000, |
|
"use_ema": true, |
|
"ema_beta": 0.9999, |
|
"unet_training_mask": [true], |
|
"find_unused_parameters": false |
|
}, |
|
"evaluate": { |
|
"n_evaluation_samples": 10, |
|
"FID": { |
|
"feature": 64 |
|
}, |
|
"LPIPS": { |
|
"net_type": "vgg", |
|
"reduction": "mean" |
|
} |
|
}, |
|
"tracker": { |
|
"data_path": "/fsx/aidan/new/multinode/experiments/deepspeed_fp16/.tracker-data", |
|
"overwrite_data_path": true, |
|
|
|
"log": { |
|
"log_type": "wandb", |
|
|
|
"wandb_entity": "Veldrovive", |
|
"wandb_project": "dalle2_train_decoder", |
|
"wandb_run_name": "3B deepspeed fp16", |
|
|
|
"auto_resume": true, |
|
"verbose": true |
|
}, |
|
|
|
"load": { |
|
"load_from": "local", |
|
"only_auto_resume": true, |
|
"file_path": "/fsx/aidan/new/multinode/experiments/deepspeed_fp16/models/checkpoints/latest.pth" |
|
}, |
|
|
|
"save": [{ |
|
"save_to": "huggingface", |
|
"huggingface_repo": "laion/DALLE2-PyTorch", |
|
|
|
"save_meta_to": "decoder/2.4B_fp16/", |
|
"save_latest_to": "decoder/2.4B_fp16/latest.pth", |
|
|
|
"save_type": "model" |
|
},{ |
|
"save_to": "huggingface", |
|
"huggingface_repo": "laion/DALLE2-PyTorch", |
|
|
|
"save_latest_to": "decoder/2.4B_fp16/checkpoints/latest.pth", |
|
|
|
"save_type": "checkpoint" |
|
},{ |
|
"save_to": "local", |
|
"save_latest_to": "/fsx/aidan/new/multinode/experiments/deepspeed_fp16/models/checkpoints/latest.pth", |
|
|
|
"save_type": "checkpoint" |
|
}] |
|
} |
|
} |