ArthurConmy commited on
Commit
8ea403c
·
1 Parent(s): 33f9023

Upload 15 files

Browse files
Files changed (15) hide show
  1. 2rqzhpl3.pt +3 -0
  2. dg6ryo96.json +1 -0
  3. dg6ryo96.pt +3 -0
  4. dw78izqm.json +1 -0
  5. dw78izqm.pt +3 -0
  6. sjuv1psg.json +1 -0
  7. sjuv1psg.pt +3 -0
  8. t5n9rno7.json +1 -0
  9. t5n9rno7.pt +3 -0
  10. vbuiid71.json +1 -0
  11. vbuiid71.pt +3 -0
  12. wtm6jes5.json +1 -0
  13. wtm6jes5.pt +3 -0
  14. yanhe13s.json +1 -0
  15. yanhe13s.pt +3 -0
2rqzhpl3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:242d04d7d6a60b813d1abfd44d6d9ac48a2dde682cd80d9d774410ffcc9e1a83
3
+ size 537008216
dg6ryo96.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "2048", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0007999999797903001", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
dg6ryo96.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2888bb7731b875761e6d702cd1405fb2f0a9756cfb4eb994507fe721ed4cf64
3
+ size 16787544
dw78izqm.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "65536", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0012000000569969416", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "2097152", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
dw78izqm.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f964bcee843ddcd2b1b18d5f30342b19226613d00fa0ec220b97612de075eefe
3
+ size 537008216
sjuv1psg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "131072", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0007999999797903001", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "2097152", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
sjuv1psg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5bcccbbed5ca4a89d694412f8692ffebdc6d0335026907ed5904a780813aa33
3
+ size 1074010200
t5n9rno7.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "16384", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0015999999595806005", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
t5n9rno7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:def9b65eef5fae0f27ced9e927d583be23609ba6c4e6d48240dfd119def19207
3
+ size 134256728
vbuiid71.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "16384", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0007999999797903001", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
vbuiid71.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a6c38a472ad284daf9ecab9d52fcb3663690e156a42373873a1529318acf06
3
+ size 134256728
wtm6jes5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "131072", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0012000000569969416", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "2097152", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
wtm6jes5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1500cdae326e67d2955c8e182bd278e087af317622f0e59b6c18a62e45b796
3
+ size 1074010200
yanhe13s.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "2048", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0015999999595806005", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "True", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
yanhe13s.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590e78c61450632e1bdf6fe5b928116689b95fe2168e78b5902dd53b92cff374
3
+ size 16787544