ArthurConmy
commited on
Commit
·
8ea403c
1
Parent(s):
33f9023
Upload 15 files
Browse files- 2rqzhpl3.pt +3 -0
- dg6ryo96.json +1 -0
- dg6ryo96.pt +3 -0
- dw78izqm.json +1 -0
- dw78izqm.pt +3 -0
- sjuv1psg.json +1 -0
- sjuv1psg.pt +3 -0
- t5n9rno7.json +1 -0
- t5n9rno7.pt +3 -0
- vbuiid71.json +1 -0
- vbuiid71.pt +3 -0
- wtm6jes5.json +1 -0
- wtm6jes5.pt +3 -0
- yanhe13s.json +1 -0
- yanhe13s.pt +3 -0
2rqzhpl3.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:242d04d7d6a60b813d1abfd44d6d9ac48a2dde682cd80d9d774410ffcc9e1a83
|
3 |
+
size 537008216
|
dg6ryo96.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "2048", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0007999999797903001", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
|
dg6ryo96.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2888bb7731b875761e6d702cd1405fb2f0a9756cfb4eb994507fe721ed4cf64
|
3 |
+
size 16787544
|
dw78izqm.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "65536", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0012000000569969416", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "2097152", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
|
dw78izqm.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f964bcee843ddcd2b1b18d5f30342b19226613d00fa0ec220b97612de075eefe
|
3 |
+
size 537008216
|
sjuv1psg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "131072", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0007999999797903001", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "2097152", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
|
sjuv1psg.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5bcccbbed5ca4a89d694412f8692ffebdc6d0335026907ed5904a780813aa33
|
3 |
+
size 1074010200
|
t5n9rno7.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "16384", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0015999999595806005", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
|
t5n9rno7.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:def9b65eef5fae0f27ced9e927d583be23609ba6c4e6d48240dfd119def19207
|
3 |
+
size 134256728
|
vbuiid71.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "16384", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0007999999797903001", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
|
vbuiid71.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56a6c38a472ad284daf9ecab9d52fcb3663690e156a42373873a1529318acf06
|
3 |
+
size 134256728
|
wtm6jes5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "131072", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0012000000569969416", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "2097152", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "False", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
|
wtm6jes5.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da1500cdae326e67d2955c8e182bd278e087af317622f0e59b6c18a62e45b796
|
3 |
+
size 1074010200
|
yanhe13s.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr": "0.0012", "d_in": "2048", "seed": "1", "beta1": "0.9", "beta2": "0.99", "d_sae": "2048", "dtype": "torch.bfloat16", "device": "cuda", "dataset": "c4", "seq_len": "128", "testing": "False", "act_name": "blocks.0.mlp.hook_post", "l1_lambda": "0.0015999999595806005", "batch_size": "32", "num_tokens": "2000000000000", "sched_type": "cosine_annealing", "test_every": "100", "buffer_size": "524288", "wandb_group": "None", "dataset_args": "['en']", "delete_cache": "True", "l1_loss_form": "l1", "l2_loss_form": "l2", "sched_epochs": "1000", "sched_finish": "True", "buffer_device": "cuda:0", "resample_mode": "anthropic", "dataset_kwargs": "{'split': 'train', 'streaming': True}", "log_everything": "False", "resample_factor": "0.01", "sched_lr_factor": "0.1", "sched_warmup_epochs": "1000", "test_set_batch_size": "100", "save_state_dict_every": "sae.utils.get_cfg.<locals>.<lambda>", "anthropic_resample_last": "7500", "resample_sae_neurons_at": "[10000, 20000, 50000, 75000, 100000]", "activation_training_order": "shuffled", "anthropic_resample_batches": "200000", "resample_sae_neurons_every": "2.050427598475984e+32", "wandb_mode_online_override": "False", "resample_sae_neurons_cutoff": "1e-06"}
|
yanhe13s.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:590e78c61450632e1bdf6fe5b928116689b95fe2168e78b5902dd53b92cff374
|
3 |
+
size 16787544
|