ojuschugh1 commited on
Commit
23e5071
·
verified ·
1 Parent(s): 88b8bcc

Upload 2 files

Browse files
local_similar_lpcoeff_20_l0_layer_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98f0a2be51e57e880268354796c84883372348872edfe7a816f2b02572145152
3
+ size 283305416
local_similar_lpcoeff_20_l0_layer_6.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ act_frequency_n_tokens: 500000
2
+ batch_size: 8
3
+ collect_act_frequency_every_n_samples: 40000
4
+ cooldown_samples: 0
5
+ effective_batch_size: 16
6
+ eval_data:
7
+ column_name: input_ids
8
+ dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
9
+ is_tokenized: true
10
+ n_ctx: 1024
11
+ seed: null
12
+ split: train
13
+ streaming: true
14
+ tokenizer_name: gpt2
15
+ eval_every_n_samples: 40000
16
+ eval_n_samples: 500
17
+ log_every_n_grad_steps: 20
18
+ loss:
19
+ in_to_orig: null
20
+ logits_kl:
21
+ coeff: 1.0
22
+ out_to_in:
23
+ coeff: 1.0
24
+ out_to_orig: null
25
+ sparsity:
26
+ coeff: 20.0
27
+ p_norm: 1.0
28
+ lr: 0.0005
29
+ lr_schedule: cosine
30
+ max_grad_norm: 10.0
31
+ min_lr_factor: 0.1
32
+ n_samples: 400000
33
+ saes:
34
+ dict_size_to_input_ratio: 60.0
35
+ k: null
36
+ pretrained_sae_paths: null
37
+ retrain_saes: false
38
+ sae_positions:
39
+ - blocks.6.hook_resid_pre
40
+ type_of_sparsifier: sae
41
+ save_dir: /data/jordan_tensor/e2e_sae-main/e2e_sae-main/e2e_sae/scripts/train_tlens_saes/out
42
+ save_every_n_samples: null
43
+ seed: 0
44
+ tlens_model_name: gpt2-small
45
+ tlens_model_path: null
46
+ train_data:
47
+ column_name: input_ids
48
+ dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
49
+ is_tokenized: true
50
+ n_ctx: 1024
51
+ seed: null
52
+ split: train
53
+ streaming: true
54
+ tokenizer_name: gpt2
55
+ wandb_project: gpt2-e2e_play
56
+ wandb_run_name: null
57
+ wandb_run_name_prefix: ''
58
+ warmup_samples: 20000