|
act_frequency_n_tokens: 500000
|
|
batch_size: 8
|
|
collect_act_frequency_every_n_samples: 40000
|
|
collect_output_metrics_every_n_samples: 0
|
|
cooldown_samples: 0
|
|
effective_batch_size: 16
|
|
eval_data:
|
|
column_name: input_ids
|
|
dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
|
|
is_tokenized: true
|
|
n_ctx: 1024
|
|
seed: 0
|
|
split: train
|
|
streaming: true
|
|
tokenizer_name: gpt2
|
|
eval_every_n_samples: 40000
|
|
eval_n_samples: 500
|
|
log_every_n_grad_steps: 20
|
|
loss:
|
|
in_to_orig: null
|
|
logits_kl: null
|
|
out_to_in:
|
|
coeff: 1.0
|
|
out_to_orig: null
|
|
sparsity:
|
|
coeff: 6.0
|
|
p_norm: 1.0
|
|
lr: 0.0005
|
|
lr_schedule: cosine
|
|
max_grad_norm: 10.0
|
|
min_lr_factor: 0.1
|
|
n_samples: 400000
|
|
saes:
|
|
dict_size_to_input_ratio: 60.0
|
|
k: null
|
|
pretrained_sae_paths: null
|
|
retrain_saes: false
|
|
sae_positions:
|
|
- blocks.6.hook_resid_pre
|
|
type_of_sparsifier: sae
|
|
save_dir: /mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
|
|
save_every_n_samples: null
|
|
seed: 0
|
|
tlens_model_name: gpt2-small
|
|
tlens_model_path: null
|
|
train_data:
|
|
column_name: input_ids
|
|
dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
|
|
is_tokenized: true
|
|
n_ctx: 1024
|
|
seed: 0
|
|
split: train
|
|
streaming: true
|
|
tokenizer_name: gpt2
|
|
wandb_project: gpt2-layerwise_play
|
|
wandb_run_name: null
|
|
wandb_run_name_prefix: ''
|
|
warmup_samples: 20000
|
|
|