{ "seed": 1, "decoder": { "unets": [ { "dim": 32, "cond_dim": 512, "image_embed_dim": 768, "text_embed_dim": 768, "cond_on_text_encodings": true, "channels": 3, "dim_mults": [1, 2, 3, 4], "num_resnet_blocks": 4, "attn_heads": 8, "attn_dim_head": 64, "sparse_attn": true, "memory_efficient": true, "self_attn": [false, true, true, true] }, { "dim": 32, "cond_dim": 512, "image_embed_dim": 768, "text_embed_dim": 768, "cond_on_text_encodings": true, "channels": 3, "dim_mults": [1, 2, 3, 4], "num_resnet_blocks": 4, "attn_heads": 8, "attn_dim_head": 64, "sparse_attn": true, "memory_efficient": true, "self_attn": [false, true, true, true] }, { "dim": 192, "cond_dim": 512, "image_embed_dim": 768, "text_embed_dim": 768, "cond_on_text_encodings": true, "init_cross_embed": false, "channels": 3, "dim_mults": [1, 2, 3, 4], "num_resnet_blocks": 3, "attn_heads": 8, "attn_dim_head": 64, "sparse_attn": false, "memory_efficient": true, "self_attn": [false, false, false, false] } ], "clip": { "make": "openai", "model": "ViT-L/14" }, "image_sizes": [64, 256, 1024], "random_crop_sizes": [null, null, 256], "channels": 3, "timesteps": 1000, "loss_type": "l2", "beta_schedule": ["cosine", "cosine", "cosine"], "learned_variance": true, "text_cond_drop_prob": 0.0, "image_cond_drop_prob": 0.0 }, "data": { "webdataset_base_url": "pipe:aws s3 cp --quiet s3://s-datasets/laion-high-resolution/{}.tar -", "num_workers": 6, "batch_size": 8, "start_shard": 0, "end_shard": 17535, "shard_width": 5, "index_width": 4, "splits": { "train": 0.75, "val": 0.15, "test": 0.1 }, "shuffle_train": false, "resample_train": true, "preprocessing": { "RandomResizedCrop": { "size": [1024, 1024], "scale": [0.75, 1.0], "ratio": [1.0, 1.0] }, "ToTensor": true } }, "train": { "epochs": 1000, "lr": 1.2e-4, "wd": 0.0, "max_grad_norm": 0.5, "save_every_n_samples": 2000000, "n_sample_images": 2, "device": "cuda:0", "epoch_samples": 10000000, "validation_samples": 100000, "use_ema": true, "ema_beta": 0.9999, "unet_training_mask": [false, false, true] }, "evaluate": { "n_evaluation_samples": 2, "FID": { "feature": 64 }, "LPIPS": { "net_type": "vgg", "reduction": "mean" } }, "tracker": { "data_path": "/fsx/aidan/new/multinode/experiments/decoder_1024/.tracker-data", "overwrite_data_path": true, "log": { "log_type": "wandb", "wandb_entity": "Veldrovive", "wandb_project": "upsamplers_1024", "wandb_resume": false, "auto_resume": true, "verbose": true }, "load": { "load_from": null, "only_auto_resume": true, "file_path": "/fsx/aidan/new/multinode/experiments/decoder_1024/models/checkpoints/latest.pth" }, "save": [ { "save_to": "huggingface", "huggingface_repo": "laion/DALLE2-PyTorch", "save_meta_to": "upsampler/1024/v1.0.3/", "save_latest_to": "upsampler/1024/v1.0.3/latest.pth", "save_type": "model" },{ "save_to": "huggingface", "huggingface_repo": "laion/DALLE2-PyTorch", "save_latest_to": "upsampler/1024/v1.0.2/checkpoints/latest.pth", "save_type": "checkpoint" },{ "save_to": "local", "save_latest_to": "/fsx/aidan/new/multinode/experiments/decoder_1024/models/checkpoints/latest.pth", "save_type": "checkpoint" }] } }