justinpinkney commited on
Commit
91c1c23
1 Parent(s): 8d346b5

Upload . with huggingface_hub

Browse files
.gitattributes CHANGED
@@ -30,3 +30,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
33
+ ffhq-sg2-510.ckpt filter=lfs diff=lfs merge=lfs -text
34
+ lhq-sg3-410-best.ckpt filter=lfs diff=lfs merge=lfs -text
ffhq-sg2-510.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126afd0477ab874921064cf8717308d3f715c80d0e25e2b966fd7e9a1746d71d
3
+ size 390606755
ffhq-sg2-510.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ network:
3
+ dim: 512
4
+ num_timesteps: 1000
5
+ depth: 12
6
+ dim_head: 64
7
+ heads: 12
8
+ diffusion:
9
+ image_embed_dim: ${model.network.dim}
10
+ timesteps: ${model.network.num_timesteps}
11
+ cond_drop_prob: 0.2
12
+ image_embed_scale: 1.0
13
+ text_embed_scale: 1.0
14
+ beta_schedule: cosine
15
+ predict_x_start: true
16
+ data:
17
+ bs: 512
18
+ format: webdataset
19
+ path: data/webdataset/sg2-ffhq-1024-clip/{00000..99}.tar
20
+ embed_noise_scale: 1.0
21
+ sg_pkl: https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-1024x1024.pkl
22
+ clip_variant: ViT-B/32
23
+ n_latents: 1
24
+ latent_dim: 512
25
+ latent_repeats:
26
+ - 18
27
+ val_im_samples: 64
28
+ val_text_samples: data/text/face-val.txt
29
+ val_samples_per_text: 4
30
+ wandb_project: clip2latent
31
+ wandb_entity: null
32
+ name: baseline_noise_1
33
+ device: cuda:0
34
+ train:
35
+ znorm_embed: false
36
+ znorm_latent: true
37
+ max_it: 1000000
38
+ val_it: 10000
39
+ lr: 0.0001
40
+ weight_decay: 0.01
41
+ ema_update_every: 1
42
+ ema_beta: 0.99999
lhq-sg3-410-best.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:740220d88d810c362f1aa575c2d488d48d14fe6c9b5e40712049841efe92d910
3
+ size 390606755
lhq-sg3-410-best.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ network:
3
+ dim: 512
4
+ num_timesteps: 1000
5
+ depth: 12
6
+ dim_head: 64
7
+ heads: 12
8
+ diffusion:
9
+ image_embed_dim: ${model.network.dim}
10
+ timesteps: ${model.network.num_timesteps}
11
+ cond_drop_prob: 0.2
12
+ image_embed_scale: 1.0
13
+ text_embed_scale: 1.0
14
+ beta_schedule: cosine
15
+ predict_x_start: true
16
+ data:
17
+ bs: 512
18
+ format: webdataset
19
+ path: data/webdataset/sg3-lhq-256-clip/{00000..99}.tar
20
+ embed_noise_scale: 1.0
21
+ sg_pkl: /home/jpinkney/code/clip2latent/data/models/lhq-256-stylegan3-t-25Mimg.pkl
22
+ clip_variant: ViT-B/32
23
+ n_latents: 1
24
+ latent_dim: 512
25
+ latent_repeats:
26
+ - 16
27
+ val_im_samples: 64
28
+ val_text_samples: data/text/landscape-val.txt
29
+ val_samples_per_text: 4
30
+ wandb_project: clip2latent
31
+ wandb_entity: null
32
+ name: lhq_noise_1
33
+ device: cuda:0
34
+ train:
35
+ znorm_embed: false
36
+ znorm_latent: true
37
+ max_it: 1000000
38
+ val_it: 10000
39
+ lr: 0.0001
40
+ weight_decay: 0.01
41
+ ema_update_every: 1
42
+ ema_beta: 0.99999