clip2latent / ffhq-sg2-510.yaml
justinpinkney's picture
Upload . with huggingface_hub
91c1c23
raw
history blame contribute delete
986 Bytes
model:
network:
dim: 512
num_timesteps: 1000
depth: 12
dim_head: 64
heads: 12
diffusion:
image_embed_dim: ${model.network.dim}
timesteps: ${model.network.num_timesteps}
cond_drop_prob: 0.2
image_embed_scale: 1.0
text_embed_scale: 1.0
beta_schedule: cosine
predict_x_start: true
data:
bs: 512
format: webdataset
path: data/webdataset/sg2-ffhq-1024-clip/{00000..99}.tar
embed_noise_scale: 1.0
sg_pkl: https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-1024x1024.pkl
clip_variant: ViT-B/32
n_latents: 1
latent_dim: 512
latent_repeats:
- 18
val_im_samples: 64
val_text_samples: data/text/face-val.txt
val_samples_per_text: 4
wandb_project: clip2latent
wandb_entity: null
name: baseline_noise_1
device: cuda:0
train:
znorm_embed: false
znorm_latent: true
max_it: 1000000
val_it: 10000
lr: 0.0001
weight_decay: 0.01
ema_update_every: 1
ema_beta: 0.99999