diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..361fbabec4cf83d0344117fd501ac6e8cefce052 --- /dev/null +++ b/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: null +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: null +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/data-indices/rank1.tsv.gz b/data-indices/rank1.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae62b243f77625a037b7945793beca437ffa22a0 --- /dev/null +++ b/data-indices/rank1.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17026672f47aae9706b5772b8429fbc87efafb8768299c8435ff476b8fb0905d +size 263 diff --git a/data-indices/rank10.tsv.gz b/data-indices/rank10.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d210bbd092cbea93a55ffd1f4086f02c1ac8bd4 --- /dev/null +++ b/data-indices/rank10.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e077aada33fab55a8cc083752841484863fcfb7f131fa54dbe6f838650debd +size 262 diff --git a/data-indices/rank101.tsv.gz b/data-indices/rank101.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..51063097516e8081acbe1b85036bac5325505bdb --- /dev/null +++ b/data-indices/rank101.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ac3256d4d0b9a5dcd3c800d163fbb3214971accf4b25ddc2723d8f0b58693f +size 264 diff --git a/data-indices/rank103.tsv.gz b/data-indices/rank103.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..822ecd266bcf8d94a7475d0a07412e08dc1cfb08 --- /dev/null +++ b/data-indices/rank103.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6342aaa281ccb8d1cbd3c3ad0293047971fb7fbc2e6321320b42e2911d4fe15 +size 265 diff --git a/data-indices/rank105.tsv.gz b/data-indices/rank105.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..f1a89f83559b1af7b6d8a21591f0763a94e12033 --- /dev/null +++ b/data-indices/rank105.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4423c50dd5d91cb93e3cab859dd6d0310f81e74e09ac9a49fd7638f74be477b7 +size 261 diff --git a/data-indices/rank107.tsv.gz b/data-indices/rank107.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..4abf7a96b519410a3c3ce5add161db06421063ee --- /dev/null +++ b/data-indices/rank107.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d892989ce87cf58ebecf0940ed28a9769714723797df9ef97916f35cb5a6a47 +size 264 diff --git a/data-indices/rank108.tsv.gz b/data-indices/rank108.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c1ca8e94ac53563cea8ed8d50c367316373e5b4 --- /dev/null +++ b/data-indices/rank108.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8283ad18102bc1549fdf5fe4d4823808f13ee072fe2569eb702a173d3c94453c +size 265 diff --git a/data-indices/rank110.tsv.gz b/data-indices/rank110.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..79b62c28e3cbb2097c52540b3e99914d500542f2 --- /dev/null +++ b/data-indices/rank110.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bccd42baa21ec71d2459fea96d6ecdeac9fdd7d288abde7d31f527f902c6fab4 +size 264 diff --git a/data-indices/rank111.tsv.gz b/data-indices/rank111.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..f66c05696473444c5168c87c7721a02073098d17 --- /dev/null +++ b/data-indices/rank111.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b713ca7b95a7b5844cbcab23e5fe057b769c73f90e5762403ceeeb96aee37fd +size 266 diff --git a/data-indices/rank113.tsv.gz b/data-indices/rank113.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..58481ff2d806fdabc51111ba915209024513ec4f --- /dev/null +++ b/data-indices/rank113.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388cc0f068d149e23db35bb7afe48051a0bcb5ea9fdbc9dbc3e2e9cd18b8f4f0 +size 269 diff --git a/data-indices/rank115.tsv.gz b/data-indices/rank115.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..933a69a3d80bbf1af498263b31667e2602c7ea81 --- /dev/null +++ b/data-indices/rank115.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f28df4066ceda592e238373907527e4f163901f363d7209d5c1732ead32119 +size 262 diff --git a/data-indices/rank117.tsv.gz b/data-indices/rank117.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8fdae27d50c64fabdcaf50c4b7bbdcb110ef693 --- /dev/null +++ b/data-indices/rank117.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d4a8767e6e0ba248994de9e0071c2885771cddaa5731132b881b4dd0e5fa25 +size 265 diff --git a/data-indices/rank118.tsv.gz b/data-indices/rank118.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fd37d1b09f15be19968e87d39ba15009a85c5bb --- /dev/null +++ b/data-indices/rank118.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a88d160cf6b477b951928b2f6420e258faf29f98b8b65a65a66089809aa4a81c +size 265 diff --git a/data-indices/rank12.tsv.gz b/data-indices/rank12.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa75e7e69b9a9098855ad6f326bef3e8f1c70386 --- /dev/null +++ b/data-indices/rank12.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce02a9d5f4ddd81cfb51187d74d2c62b920680dbdd683a1984a24787f4d2ed8 +size 261 diff --git a/data-indices/rank120.tsv.gz b/data-indices/rank120.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..96a78e6b09db8f4707b5b486a237e8f8b327d970 --- /dev/null +++ b/data-indices/rank120.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4484bc50815a8bf41bfc016e57287fece4e25d53f2cdc1d382a6d94b8c401eef +size 263 diff --git a/data-indices/rank122.tsv.gz b/data-indices/rank122.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..04b4cd757b819560d4be3ea3f71f76cdf7de01e6 --- /dev/null +++ b/data-indices/rank122.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eacf7f13836b82f18e880682e3d0271fd6ff1904f762fc2e2d39429dc4c9b22 +size 263 diff --git a/data-indices/rank124.tsv.gz b/data-indices/rank124.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..a5775f16f877d7c72aef2a7fc1573f340537b85b --- /dev/null +++ b/data-indices/rank124.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e553204bb14eb625cf93c41703febcf1e5673f137b30203b9af155fbc788e388 +size 265 diff --git a/data-indices/rank125.tsv.gz b/data-indices/rank125.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..5ae278185bc1c5f19cb739492ffded8fad051662 --- /dev/null +++ b/data-indices/rank125.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7190e4e5c446ad28781b556d2412bde6a57df148718650f89fcdeb5a75589e13 +size 266 diff --git a/data-indices/rank126.tsv.gz b/data-indices/rank126.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b984d3c2a08964b250865d4debc60ce8677e264 --- /dev/null +++ b/data-indices/rank126.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb3c0f2dd47ca896782081793df3c204ad82abe144839d457d1b846f37ce21f +size 266 diff --git a/data-indices/rank129.tsv.gz b/data-indices/rank129.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..361e29713d3edd05df3dd7bb4cb7562fbee6e926 --- /dev/null +++ b/data-indices/rank129.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc3fc82f9d2f76e8de349f19efed426a23401fa09695da45cbe16d38c776bd18 +size 262 diff --git a/data-indices/rank130.tsv.gz b/data-indices/rank130.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..155bce96b4b53649963c28bc3f6a2f72130b95c9 --- /dev/null +++ b/data-indices/rank130.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d83dcf034d915768d2d1d5797fb4eadac7abd9dd55c77a7886846683e670242 +size 268 diff --git a/data-indices/rank132.tsv.gz b/data-indices/rank132.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..f87c4a06a0b124719dbbc5206c32fc601e8fd8df --- /dev/null +++ b/data-indices/rank132.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54d224fbf3d01ce6f40336115b83d5c933bb1cf23249edc36cc3c899699e294 +size 269 diff --git a/data-indices/rank134.tsv.gz b/data-indices/rank134.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..efb2334c7a5a724093440935357797e4f86c7668 --- /dev/null +++ b/data-indices/rank134.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac1a028bc268a1306a73a20f0ea14f119afba306b3b5558eb2bacf0c5da5a0e +size 269 diff --git a/data-indices/rank136.tsv.gz b/data-indices/rank136.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..4e01917d824a98cd2a545c430332bdfeb266e7a4 --- /dev/null +++ b/data-indices/rank136.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ac2bdce73368d311428ea76fa39ae38a3d53e7d11c1f320c9031bc9210eb5e +size 263 diff --git a/data-indices/rank139.tsv.gz b/data-indices/rank139.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..331713afd4b51b47c64ea1b6e9a7075f76b0715e --- /dev/null +++ b/data-indices/rank139.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa1fb7f69a58a122e1fe37e37300bde16f2cf9a07cdc1fd34fbb61d2d8ae6a94 +size 265 diff --git a/data-indices/rank141.tsv.gz b/data-indices/rank141.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..07b2b03ec27b8e944f98933ea736fee12b0b4264 --- /dev/null +++ b/data-indices/rank141.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b85b26e81ebef65fb2ca627cf3ea6eda0151f530dbac1c6aad7b39fe96bb01 +size 263 diff --git a/data-indices/rank143.tsv.gz b/data-indices/rank143.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..846a82a5740e8eec87fee487423cbe62263c9c60 --- /dev/null +++ b/data-indices/rank143.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3794f82cf23961a17794aa4b43a40007681492a8cbd110a5c01853656178a95f +size 257 diff --git a/data-indices/rank145.tsv.gz b/data-indices/rank145.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a5a2114dbabb838d2c2cb8a0152d05d14b9e8bf --- /dev/null +++ b/data-indices/rank145.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f138805a061806d834f1fc75657d8bb38137f46f73ade864da1bcb8db7e84ac8 +size 263 diff --git a/data-indices/rank147.tsv.gz b/data-indices/rank147.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..61ea75638d264bde69dafd61483790596a784a76 --- /dev/null +++ b/data-indices/rank147.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c605d030f2f5fa720b4bf6999f6decae731d83d7d1410c712ba3a0cb434cfa3d +size 269 diff --git a/data-indices/rank148.tsv.gz b/data-indices/rank148.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9394aca4c44de34bef6a7b3c6d41c805c03fc57a --- /dev/null +++ b/data-indices/rank148.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c83ba624fc96289b87da2a53fc1d757942513874b50e40082666155fe1e6497b +size 267 diff --git a/data-indices/rank15.tsv.gz b/data-indices/rank15.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba9ac8dcaadd1383fc611021927b9c8d1cbccfda --- /dev/null +++ b/data-indices/rank15.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad6282d1fcbf33f50f54cc5bea618c316e24d419a644f4c1a39ae6a6cd637a71 +size 265 diff --git a/data-indices/rank150.tsv.gz b/data-indices/rank150.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..11018a262fb26b676fbf4031b98b2bcbe95eb301 --- /dev/null +++ b/data-indices/rank150.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce79659850b346a85318e4702e6ea69213d2f2630996622c0af24efe49b55ff +size 266 diff --git a/data-indices/rank151.tsv.gz b/data-indices/rank151.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..db0767902cf5f2046f58f48163fbaababcf3e140 --- /dev/null +++ b/data-indices/rank151.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8a32631c7c71e085cfe1e2ed49a98eaaf7492409ed661a3c03ea23e8426748 +size 266 diff --git a/data-indices/rank153.tsv.gz b/data-indices/rank153.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..d1616b5908b3aacd6f3605e493b07a349c28c955 --- /dev/null +++ b/data-indices/rank153.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:152b2c2342a01e86e3fa7efcb9a14a38ebe0cf6201f09e05e5065adf8b49365a +size 264 diff --git a/data-indices/rank155.tsv.gz b/data-indices/rank155.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..6a45bafb027f126702cf5a424d163e1ae3977d37 --- /dev/null +++ b/data-indices/rank155.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5385bf0f9f76417653296bfe0c2f93160d784fb2627c105f187f7bb87058d482 +size 266 diff --git a/data-indices/rank157.tsv.gz b/data-indices/rank157.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..e6671da02463bb6c40eeae086f89493335ade294 --- /dev/null +++ b/data-indices/rank157.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c4131154b273540e51e2d024f0e8e8282f6181abf3ce9f636b20b6e28ebf99 +size 265 diff --git a/data-indices/rank158.tsv.gz b/data-indices/rank158.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..67cfba002741feb6b38d4ea350c61f9000185aed --- /dev/null +++ b/data-indices/rank158.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432a5e3415ec17b9a80e9c01a372457fc27a91259885537c466057a40fbce1af +size 264 diff --git a/data-indices/rank16.tsv.gz b/data-indices/rank16.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..66b31a9a246a926eecdef555e6f918e0a9be1695 --- /dev/null +++ b/data-indices/rank16.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261ce636493b2723a994bb2e711a76cad9bad08a4a84bbb73805e9727678c646 +size 267 diff --git a/data-indices/rank160.tsv.gz b/data-indices/rank160.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a4ba019da1ccfafbcde9e63db9d0741d5059d31 --- /dev/null +++ b/data-indices/rank160.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b81eded14f83b3691f313e84ea9b181ef0472a77f8175f631216b2caf27819b +size 261 diff --git a/data-indices/rank162.tsv.gz b/data-indices/rank162.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae2d4f4a6f3de32774f958ced81c51459b1770ea --- /dev/null +++ b/data-indices/rank162.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7784670bd5859738bc679fe9086c6b88b7160a53379ee81f0f02f4cc61c783f +size 265 diff --git a/data-indices/rank164.tsv.gz b/data-indices/rank164.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..f382b0ad6a165f953ac3c7a204cc0774e7e59e5b --- /dev/null +++ b/data-indices/rank164.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f97903e47431e4003ce959d84077acd420a595d2ddaf797f21ed7d3e0396ff4 +size 265 diff --git a/data-indices/rank166.tsv.gz b/data-indices/rank166.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..e814358691b03c118cc993bd38f859eef3c8ef24 --- /dev/null +++ b/data-indices/rank166.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4fcb0f11356b785f5b3cbf91351d9a4970c45e049bf65f25d20da9a6b3849a6 +size 264 diff --git a/data-indices/rank169.tsv.gz b/data-indices/rank169.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..de297b804d925a76ab706d97eeefa24ab906e84b --- /dev/null +++ b/data-indices/rank169.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0c9f1ecaba1c49e0dd1593de1bd49a7dfa819362786f12865b6cca710b067d +size 268 diff --git a/data-indices/rank170.tsv.gz b/data-indices/rank170.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9b3ee9f26a9c587e2252412c1b61d122c463af2 --- /dev/null +++ b/data-indices/rank170.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaaef928448bddf22255529006985bdcf616883d6ca460db0c02999aeeb8fc0 +size 265 diff --git a/data-indices/rank172.tsv.gz b/data-indices/rank172.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..542fc9a59a42af5fff72ce8e835e695c8d0b402c --- /dev/null +++ b/data-indices/rank172.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54bff5c760e5a79e8d8b8ad411894d41e9c9f9c6e1def7435d66024632eb52f3 +size 263 diff --git a/data-indices/rank173.tsv.gz b/data-indices/rank173.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..8de04e0aca3bce4e8fcd502fc741c2c8e4893b45 --- /dev/null +++ b/data-indices/rank173.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00929dfe665250dd4563cc3259ddfe210b8ec14a269767d198b4416286023e47 +size 265 diff --git a/data-indices/rank174.tsv.gz b/data-indices/rank174.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f2ae6f1175ae1d66c993d7442612f845aba15d1 --- /dev/null +++ b/data-indices/rank174.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdc6c69f0395fce0a3971135167384b320204daf0a800fce1959e9fbdda05c61 +size 264 diff --git a/data-indices/rank176.tsv.gz b/data-indices/rank176.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f8b5738b0ecaf4c241116bf3829b4d766855c51 --- /dev/null +++ b/data-indices/rank176.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97876411612159f0dbb6a0159dde9ef898625db9d4fcf96566aacca4b712c87e +size 265 diff --git a/data-indices/rank179.tsv.gz b/data-indices/rank179.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3ef9d78fe1f889c4a5a27ed9797e5b34d9ebb05 --- /dev/null +++ b/data-indices/rank179.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d010937169af73d2229e1941c52a6e6204fbb48deff7a8a640c58511288f7865 +size 263 diff --git a/data-indices/rank181.tsv.gz b/data-indices/rank181.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..4d14e6cbb7bd960df6989ac51d3cc67c4b093346 --- /dev/null +++ b/data-indices/rank181.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b804b37e2f2af67cd547dcce436a8289e148d935fa39d52f54478937acc19e9 +size 262 diff --git a/data-indices/rank183.tsv.gz b/data-indices/rank183.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd5f64f096697f9e02405cea4d5d4a2c5bed3509 --- /dev/null +++ b/data-indices/rank183.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca943b845d23f3df21bf289447cf1fabbb4f25aeaac680efc4863dc531637dda +size 266 diff --git a/data-indices/rank186.tsv.gz b/data-indices/rank186.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..15034dc0541b2c5e3af3aa7dacd00e4e24ebe670 --- /dev/null +++ b/data-indices/rank186.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a92081e2311e178fca885f33ae2ea3bb55e1c961978fabf16b3bd023ed0684e +size 263 diff --git a/data-indices/rank187.tsv.gz b/data-indices/rank187.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f9cffc2c9ee175cd06eda3623349352fe88ffbd --- /dev/null +++ b/data-indices/rank187.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f82620af70ba6e39190dff225210c9a1884c1e977e17af2875f432de84a7916 +size 266 diff --git a/data-indices/rank188.tsv.gz b/data-indices/rank188.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9c4272846baa45f034e5fe6b3797569db3abd46f --- /dev/null +++ b/data-indices/rank188.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d172b5694980adab1e4ced2e7bede17b9cac23629d32144e90674b6d33b7781d +size 264 diff --git a/data-indices/rank19.tsv.gz b/data-indices/rank19.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f9dd6c6e56449a948e0911213b7bc5f0c372202 --- /dev/null +++ b/data-indices/rank19.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89120300b61438ec12b4166ba4167aa40cea900e924ad17aa4f2809f77bd3ada +size 267 diff --git a/data-indices/rank192.tsv.gz b/data-indices/rank192.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1b48f5bd1981dc2b6f969ef3aeed6b1e6dc3159 --- /dev/null +++ b/data-indices/rank192.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ca93f172642101f31c9c9f58bf3420060a7f9a25e9cc1f744f694e0220c309 +size 264 diff --git a/data-indices/rank193.tsv.gz b/data-indices/rank193.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..ce9c23cf18f89565fa72f752161dd0880937e1c7 --- /dev/null +++ b/data-indices/rank193.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b046b06844e9b5d10ffffa1f6653e6a5dbbd931aa2e1beea57c0c85cb86ae25 +size 268 diff --git a/data-indices/rank195.tsv.gz b/data-indices/rank195.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..1804d5ab8c99d157d87a9f541d1e050cf578d46a --- /dev/null +++ b/data-indices/rank195.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1104c34a60b2b2ef71e3eb6fad5d4dc03c5e5e494a994e994c93b08ee7942be1 +size 265 diff --git a/data-indices/rank197.tsv.gz b/data-indices/rank197.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..075351ef0849b9fec91ea06a308a160bf3550e27 --- /dev/null +++ b/data-indices/rank197.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a02a1ba1bb9e9fa133bb67ac02c6ffa675df38d6361cba5e8c2e217a3ec0443b +size 262 diff --git a/data-indices/rank198.tsv.gz b/data-indices/rank198.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..3b29456ee556a01e0bcbc0956cb17d85930ca7bf --- /dev/null +++ b/data-indices/rank198.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef906d811a4825a09a85013fc642b402924bd81d691a8be2b2c9e7ca5d7c1571 +size 263 diff --git a/data-indices/rank200.tsv.gz b/data-indices/rank200.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..a6cf44ea401fad3009740f8eb1d40e9911d1f3c6 --- /dev/null +++ b/data-indices/rank200.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098d862523bdb4ca56bec0d4d4c97da5d83681c4c793e3635f1f8dbd4e582cd8 +size 267 diff --git a/data-indices/rank202.tsv.gz b/data-indices/rank202.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c999db77fc077756a3ec39fd65d274ed98197ee4 --- /dev/null +++ b/data-indices/rank202.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0918ead967bc98f8ff91b8bbede535263b27b952456d4a1c76d66ed24ddc9ebe +size 262 diff --git a/data-indices/rank205.tsv.gz b/data-indices/rank205.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..1787e323760d1a02524e6a841b51d7142bf09f64 --- /dev/null +++ b/data-indices/rank205.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03263cf88e44f85b6a755c611456bad14b4f4557b10e085367931490208a7698 +size 264 diff --git a/data-indices/rank206.tsv.gz b/data-indices/rank206.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..08b763ab9b160ebe1eb7139228b4d27fe1f363a1 --- /dev/null +++ b/data-indices/rank206.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a817395403249977ec3a011ee7072da014ae189b8ea12b2e303110548338bf2 +size 265 diff --git a/data-indices/rank209.tsv.gz b/data-indices/rank209.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa1ae80a4002ef363608ea4729398b5a9bc0bbbf --- /dev/null +++ b/data-indices/rank209.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3d484b9ace416209b4f17b72fa4fc859e687a4bda90f031079d514c942ab5b +size 262 diff --git a/data-indices/rank21.tsv.gz b/data-indices/rank21.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..07e8f6d144e6c6c8f883bbeccbef0b7be9f57286 --- /dev/null +++ b/data-indices/rank21.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d837edee4799e8b736c32724033e59a101762913fed1dc46f0f37b0a2ac4c5df +size 261 diff --git a/data-indices/rank210.tsv.gz b/data-indices/rank210.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a529335c3f1dc38a0511c08600b0a5ef09d4ac7 --- /dev/null +++ b/data-indices/rank210.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f8c1c34abf7acf829b010f040734a417aafb6826e7e3b9bd511ac16c33812d9 +size 266 diff --git a/data-indices/rank214.tsv.gz b/data-indices/rank214.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..003dae355edbd357ab7cb20f9151129be42906fb --- /dev/null +++ b/data-indices/rank214.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9b5c72d7efeabd8086d05550f30cf0b547b7e6d796d63112e6709592b40d42 +size 268 diff --git a/data-indices/rank216.tsv.gz b/data-indices/rank216.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..059626b0e16960d076eb5bebf407c0e5dee29bef --- /dev/null +++ b/data-indices/rank216.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803a6baac9bf2d325a09c361ea5f03712d8a713e91a9baf330b85adaf06566da +size 265 diff --git a/data-indices/rank219.tsv.gz b/data-indices/rank219.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..0ec4839ad266772a5280772482e284eb634244c4 --- /dev/null +++ b/data-indices/rank219.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac094a8c41eebe193bd4915933e5c5b7edfaf59a64bde00763fa146a1474492 +size 265 diff --git a/data-indices/rank221.tsv.gz b/data-indices/rank221.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c26c5e353cdbd731f9212936b188e9f7a0a9197c --- /dev/null +++ b/data-indices/rank221.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e816856f4026fbe8adccb24b6eb04dc1a8d57babc362b9708b6e941e2699cc9b +size 264 diff --git a/data-indices/rank223.tsv.gz b/data-indices/rank223.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d68285fbe712cf87e9963d170cffa189214ba56 --- /dev/null +++ b/data-indices/rank223.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa5bb8b8ae35d04dd6288b041df55fbbef50433290eb58e4f2d714f1c9909fb +size 263 diff --git a/data-indices/rank225.tsv.gz b/data-indices/rank225.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..154c7986f449454b50e8a232ac0699db42435a36 --- /dev/null +++ b/data-indices/rank225.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837676a48f0684e160eb8ea4e8b2bbbd0a8d1f837ba5c10ea45da29d43dc4f03 +size 267 diff --git a/data-indices/rank227.tsv.gz b/data-indices/rank227.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..88787ab4eb38cbd447593e77ef1e5dd334e5be0c --- /dev/null +++ b/data-indices/rank227.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e3594c54ed36cbeb8ad4d0ee8d5219b7adcc83ad375c5a120c93ae02ffa8ea +size 265 diff --git a/data-indices/rank228.tsv.gz b/data-indices/rank228.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..455af3038764dd0d17f39837a2444adf0ed567a7 --- /dev/null +++ b/data-indices/rank228.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ecbc6080129f7ef6329693dfb4d437138c33e5d6f66f057ba45c5dbafd925b +size 267 diff --git a/data-indices/rank23.tsv.gz b/data-indices/rank23.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..779294ba86fac7822be817c569a17129f7c3cc98 --- /dev/null +++ b/data-indices/rank23.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c497a2ed01ecf46c8111f054c91f5cea93008db06a580336496ccd8e181fe8 +size 266 diff --git a/data-indices/rank231.tsv.gz b/data-indices/rank231.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..086370e46cac983c88cbb4db06701e7b416591ed --- /dev/null +++ b/data-indices/rank231.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4164652ad61eb2102035b11a59084a3c71d68a82f27a8d292fc730f4b9e6e324 +size 264 diff --git a/data-indices/rank233.tsv.gz b/data-indices/rank233.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..af385ae110444359591b1aa322aabd925d0a3963 --- /dev/null +++ b/data-indices/rank233.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:830a7d47d5842a1462a22597574d774ff9d4073c27693873587be7de1a45fb9e +size 267 diff --git a/data-indices/rank235.tsv.gz b/data-indices/rank235.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..39ec95f7b3aa4ab46c0ea5d6f6e81cd570f2d1aa --- /dev/null +++ b/data-indices/rank235.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eeaebd14c1125fe41f593e2bec8b3d07d3db426d7ccb1c0497a25e3c674b102 +size 266 diff --git a/data-indices/rank237.tsv.gz b/data-indices/rank237.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..d5d833182b9a89e6013cb2a0b82ae51f23232712 --- /dev/null +++ b/data-indices/rank237.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63eb352606ee6aa16c145c9c7b0cfc228ce5847769c0c6bd90d5519cfb8d917d +size 264 diff --git a/data-indices/rank238.tsv.gz b/data-indices/rank238.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff4a2d385a9d0ae9a0cd499f7c369d444daa236b --- /dev/null +++ b/data-indices/rank238.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff9e7637ef7cf16f6fa1ee88e7b6c695ed4ac1adeb62039d5185e691f34385d +size 268 diff --git a/data-indices/rank240.tsv.gz b/data-indices/rank240.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..74e5a57aeea58a7aa1c5dff0357775fdd8bed0e9 --- /dev/null +++ b/data-indices/rank240.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eda56dc4a05d2237b0546a994df0fdc57b808700eb308d3a24a89c2bcf415538 +size 263 diff --git a/data-indices/rank242.tsv.gz b/data-indices/rank242.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..2f96715e225cffc416f5b6cfa2f8d7790175a6d4 --- /dev/null +++ b/data-indices/rank242.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092e5fe16cb2d64ab98b9e6f6cabd1e6cce37001cef95e1ad023aea2acab2317 +size 262 diff --git a/data-indices/rank245.tsv.gz b/data-indices/rank245.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..de0f4c4364a81167c3ff3011a43206e964f0b41e --- /dev/null +++ b/data-indices/rank245.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1866d76e59c25c7801144d4cc24e7d2272914995c7547608e1dbb80117ff4149 +size 266 diff --git a/data-indices/rank246.tsv.gz b/data-indices/rank246.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9df8f6e7738c05658deb8e2eb0aa69039e1412f4 --- /dev/null +++ b/data-indices/rank246.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0cee3407c24ed757701325c40f2dfe1ca84a448b8f867d151713ff466bdbf57 +size 269 diff --git a/data-indices/rank247.tsv.gz b/data-indices/rank247.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9cc9b1a80f4ee42233038976187f16018298daf7 --- /dev/null +++ b/data-indices/rank247.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d019a256e065aed257b048a9c3406f33c7257647ee00f1525ac94a155e1037 +size 267 diff --git a/data-indices/rank249.tsv.gz b/data-indices/rank249.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..af1e07899135f5efec6055f52508b044c22e22d6 --- /dev/null +++ b/data-indices/rank249.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa5ee70d55676981564b370d2413291f669321e2fe6b514f5a706662fa3c4730 +size 265 diff --git a/data-indices/rank25.tsv.gz b/data-indices/rank25.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9fa6da23f6db6647a8a740b47cb5e390183289e2 --- /dev/null +++ b/data-indices/rank25.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d481315913603c0de366116995985a4402ad5a3ebcf3d47f1828a9efdf4d4d16 +size 263 diff --git a/data-indices/rank250.tsv.gz b/data-indices/rank250.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..a707db5fc0171d4f72293db6a00131ae9ca6838a --- /dev/null +++ b/data-indices/rank250.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe721cd130609b3c8a50fe3929440cb63402915b58db59377180653ae0b6388 +size 265 diff --git a/data-indices/rank252.tsv.gz b/data-indices/rank252.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..077d885be8fb6fa7349a40eedec486bc0d2f7423 --- /dev/null +++ b/data-indices/rank252.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a776eda2e5f465563be3d9b6b58f2833a10296c6fe7eea121c6e9c00a2743960 +size 261 diff --git a/data-indices/rank254.tsv.gz b/data-indices/rank254.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1aa7871c5351e642c5b000ba0f2cf806b5d4d38 --- /dev/null +++ b/data-indices/rank254.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955f684acdcc25a047bf3fb659e21c6e1492194b8d6d2366a66a811c355221a2 +size 263 diff --git a/data-indices/rank27.tsv.gz b/data-indices/rank27.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c74315dadfb83686a3d09196babf6c105df47cc9 --- /dev/null +++ b/data-indices/rank27.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e46e4eaf3a1d0d04fd5a653b77a31ad95b60bf3c7fcc6254cb785dca767fedd2 +size 260 diff --git a/data-indices/rank28.tsv.gz b/data-indices/rank28.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..76b799a3e85e0678c201828373bad5828e4387d7 --- /dev/null +++ b/data-indices/rank28.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:014d315cb031ce4b3adb656f6c468f6787b9ac6ec4692149c38f5ce99d2d9e0d +size 262 diff --git a/data-indices/rank3.tsv.gz b/data-indices/rank3.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..310ff122de3ecc8b220a6ccb46c717962348f1a3 --- /dev/null +++ b/data-indices/rank3.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f14def8eb4ba464b321cc2912eb1e4a3010d03a4a78fc1a9abd3c66ae64df7 +size 262 diff --git a/data-indices/rank31.tsv.gz b/data-indices/rank31.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d845b598b9912436c6af75192653d3637e06952 --- /dev/null +++ b/data-indices/rank31.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899439ead5fb232f17091f207e7480f35279306554f40d57e636da38bd79ad34 +size 264 diff --git a/data-indices/rank33.tsv.gz b/data-indices/rank33.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..92c270811e53abc46dd9d013258c8d3337d1b9cf --- /dev/null +++ b/data-indices/rank33.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d23cdced55f16c6d997803579c3222be9616104d21fd79f7ed0bdfea8e3e47 +size 263 diff --git a/data-indices/rank35.tsv.gz b/data-indices/rank35.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..11c0fc5a12bfa18027ae51d464bf66147aadfc8d --- /dev/null +++ b/data-indices/rank35.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997a50ae68cc421d5d1a050bef86c0f895c3e714506ac2dcf404c8ab28252929 +size 266 diff --git a/data-indices/rank38.tsv.gz b/data-indices/rank38.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9580bddc17c131596319417d3fa767b27965922a --- /dev/null +++ b/data-indices/rank38.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c151eb42b65e3172dd1450fa7d3ee63636c963c9ace4d46035dc3a8db7e87380 +size 266 diff --git a/data-indices/rank39.tsv.gz b/data-indices/rank39.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..02f13f43ae11014962a0b1154e4b24e1e920fd24 --- /dev/null +++ b/data-indices/rank39.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:766749d3ba2df0cbe73abcbe1c1107b66c96c1212d6f93bf9cc624603b748f52 +size 262 diff --git a/data-indices/rank40.tsv.gz b/data-indices/rank40.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1894c1a4ba831dcfc624d3605a740f5b18c6213 --- /dev/null +++ b/data-indices/rank40.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be9b4036c0ad228f2a9e1c270a9bcbe3697991306ce5d8fa99f5e0292e469bdb +size 264 diff --git a/data-indices/rank42.tsv.gz b/data-indices/rank42.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..f07e1d53f7ff69303ff2345fe3c4ad945af99816 --- /dev/null +++ b/data-indices/rank42.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0340c8a3e6c32844b1b9a566c43dfe2c03054a9f2793f43761889d69f4246b34 +size 262 diff --git a/data-indices/rank44.tsv.gz b/data-indices/rank44.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..ffc11fb19c68ac25509c18239b4e0cfbc511ad11 --- /dev/null +++ b/data-indices/rank44.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0ce089fa4b4c10fa8ecc1e148e2aefbd7458b2f155153aae7026e1a9b1cc6c +size 268 diff --git a/data-indices/rank46.tsv.gz b/data-indices/rank46.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..61be229f7b180028733b45f3b9367ddf2147b79d --- /dev/null +++ b/data-indices/rank46.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:498764efa8fb2d5821b93f6f720b1e0c4db4f9c44cf6e8751ce5cd32e7404652 +size 259 diff --git a/data-indices/rank49.tsv.gz b/data-indices/rank49.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..763830e3b2ada5d722af8786d8d8ca1cf5dd81e1 --- /dev/null +++ b/data-indices/rank49.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad4ceba40f2c8e26a285cadfbea64a6773945d5a557a2d07190b1c564cda764 +size 267 diff --git a/data-indices/rank5.tsv.gz b/data-indices/rank5.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..17ef7675127f564cecf712739850af4b1877f4f9 --- /dev/null +++ b/data-indices/rank5.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b654df91db59fc081ba2de8f91f6f9b735dc50bdfc338765ba1677bc1fc4a3 +size 261 diff --git a/data-indices/rank50.tsv.gz b/data-indices/rank50.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..0bbc890eb9de4b18611f6300b2eb250abf67ac49 --- /dev/null +++ b/data-indices/rank50.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395492de00257190a35577d5d7ac0b9f6eba382ed21462964091ccdd37d0df78 +size 261 diff --git a/data-indices/rank52.tsv.gz b/data-indices/rank52.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..e3e33837b4fa7165af98fd55798097e2fd430ace --- /dev/null +++ b/data-indices/rank52.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e85ed3be0a3adaf6384dd378ce92e2973c9f93b5332937d7b18d415239e5e70 +size 264 diff --git a/data-indices/rank54.tsv.gz b/data-indices/rank54.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..d6194e9e2a43b19dbd2b99354958bc28aac025e2 --- /dev/null +++ b/data-indices/rank54.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac9a273282ceeac08c159daef15a6ea9def65d192e0f8a318916669e194a0ce +size 263 diff --git a/data-indices/rank56.tsv.gz b/data-indices/rank56.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..0acf237761b0668332381b2fcda7957877f0fbf6 --- /dev/null +++ b/data-indices/rank56.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e27589b7f5040154b18fe22508d009647336479536ec7addce3c0e847cd31a +size 267 diff --git a/data-indices/rank57.tsv.gz b/data-indices/rank57.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..7f4441170b2c086833c9ccbc7ef1af24e85bcbc8 --- /dev/null +++ b/data-indices/rank57.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d385b54e4f486405d128153d5775d33648698820ade6a0299b200183f27723d +size 262 diff --git a/data-indices/rank59.tsv.gz b/data-indices/rank59.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..56d2869507e396e2ac766623647acc15fe8977c0 --- /dev/null +++ b/data-indices/rank59.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc30d095b667ca18816dd650f7e430e207cba99ee996b5cdb83cd30b10d2474 +size 265 diff --git a/data-indices/rank61.tsv.gz b/data-indices/rank61.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d48ba71cdb9319eb289a246ee32ce74fe2cc4b0 --- /dev/null +++ b/data-indices/rank61.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f712dcd71649b57e0f8606462fd4ab67fc19836bcfa92ce9734e3a6daab35a7 +size 264 diff --git a/data-indices/rank63.tsv.gz b/data-indices/rank63.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..5dec34025208a3a85bb81e9f060925cb752a5992 --- /dev/null +++ b/data-indices/rank63.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d83c243e463361f186f79d94fd3771435d1d61e22a8510b27561c54cf0b8e4f1 +size 262 diff --git a/data-indices/rank65.tsv.gz b/data-indices/rank65.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..e5aaab7d6eaa8afd3c3ffa312c0ff82ba6491789 --- /dev/null +++ b/data-indices/rank65.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5958a19ee91ebeb745c2084798223d35335136d7b8e719282f0864e2a7b6e84 +size 264 diff --git a/data-indices/rank67.tsv.gz b/data-indices/rank67.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b56000562d6ca97e56f7005d00086a8070345fc --- /dev/null +++ b/data-indices/rank67.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ae99a7c056848826cf5f92faa9fb075bab4ce9e372f1d79a6d48c2ee0ed4a3 +size 266 diff --git a/data-indices/rank68.tsv.gz b/data-indices/rank68.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..190ec9dae2abf9c17672fb26b73c923d7f86b014 --- /dev/null +++ b/data-indices/rank68.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b384b965bb31b32819ada7bfb7454dd7de97e1375e150e4eb4f8d0b33379f5 +size 266 diff --git a/data-indices/rank7.tsv.gz b/data-indices/rank7.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..743ee3ab85462afaf0192703d641ca80752e769d --- /dev/null +++ b/data-indices/rank7.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4a40ba55b77d35e4868035528a9ef51aca46e7b39e4551c5f55db3d2351ed5 +size 268 diff --git a/data-indices/rank71.tsv.gz b/data-indices/rank71.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..4921ebf7dcd1e4cbc41f730a536c23be4b5594fa --- /dev/null +++ b/data-indices/rank71.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b7173d2639f4413bff66fd91eb345c35fb781f837cce5262f0ba73d8a4c379 +size 264 diff --git a/data-indices/rank73.tsv.gz b/data-indices/rank73.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..394858a33b65f5f1848461ff7ea7d11a4f34c2df --- /dev/null +++ b/data-indices/rank73.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2541e5f170dfdfbe4ef2abefef7985df9959ba452bdfe7bc4845e7dc22dbfa98 +size 262 diff --git a/data-indices/rank75.tsv.gz b/data-indices/rank75.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d69b98859472e271b08b2d251690cfc26468f7a --- /dev/null +++ b/data-indices/rank75.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3589b89e13a189b789d3562acccdb9a831b6c3abb64d054abea954c99747a0b +size 266 diff --git a/data-indices/rank77.tsv.gz b/data-indices/rank77.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1f92a3e878daf28ca699f7ea6f113185fb1f10b --- /dev/null +++ b/data-indices/rank77.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc479c884b8b7242a7431d4a45db34de972c555d0970a21702c065e6054f31c +size 266 diff --git a/data-indices/rank78.tsv.gz b/data-indices/rank78.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..86e731bcfcc81e168ca46ca849dd39bc5748552a --- /dev/null +++ b/data-indices/rank78.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5841be16f864e23c8c67a4628bfc430a421314ca5eb8204a8f0ebbda7a3f8ec0 +size 267 diff --git a/data-indices/rank79.tsv.gz b/data-indices/rank79.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..36b9dd1e9d53c315a94cd48785e169f27aa33416 --- /dev/null +++ b/data-indices/rank79.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6feee3ac88f98130bb8b3714eceb0e5d5954d86884f83216347c0c95faabb7f4 +size 266 diff --git a/data-indices/rank8.tsv.gz b/data-indices/rank8.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..3592a2fbb0fbfcf1c4451d5bd8f7c151aa24317f --- /dev/null +++ b/data-indices/rank8.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5dc0d3430d4d3e602a0ba39190cde8463dd526e80f687e62538b07dd9f2802 +size 264 diff --git a/data-indices/rank80.tsv.gz b/data-indices/rank80.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..e09233a46a9575b747713613132de3fb198edb61 --- /dev/null +++ b/data-indices/rank80.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e94e9cdf2894ec935fcf027e9a8881f76ac0e144c3af10c49fb21883a74475b +size 265 diff --git a/data-indices/rank82.tsv.gz b/data-indices/rank82.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..f63d3b6e3b0ed924213107f9621eb82f73963465 --- /dev/null +++ b/data-indices/rank82.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1113d32ad3620a042e0780874606bbbc0839b09120b1e265182fce3d272bb825 +size 262 diff --git a/data-indices/rank83.tsv.gz b/data-indices/rank83.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb45a1b7618ab99f1406e969f11bb9cb1a25e5c0 --- /dev/null +++ b/data-indices/rank83.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a72dc558624dad2d0181274e5b98f372e435e869c72f7d40c64d0a8bbe14b1 +size 263 diff --git a/data-indices/rank84.tsv.gz b/data-indices/rank84.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e07805fe3c7a89b4b96e805c6ff8bd1292dc9c1 --- /dev/null +++ b/data-indices/rank84.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c156606fe1ffba03ffcea1e203a354ba34f2256becd9e94b291a1572d9283c6 +size 265 diff --git a/data-indices/rank86.tsv.gz b/data-indices/rank86.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..268d033e444465d932fbb50bc38ea91b76bfa2db --- /dev/null +++ b/data-indices/rank86.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbae3d1f550f37edad954e186bea67f8e0241205905553ce5c3655660257bb05 +size 262 diff --git a/data-indices/rank89.tsv.gz b/data-indices/rank89.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0e91a13f2b1b9dbff97d0c12865b6cc6c017edb --- /dev/null +++ b/data-indices/rank89.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0948aa4a0f2714ee7cf39d6d11a63067e10e09de1c8083110c98f9cabf6b92aa +size 265 diff --git a/data-indices/rank90.tsv.gz b/data-indices/rank90.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..6d021e212640afe39b84e8ec2f7948158ebaae6d --- /dev/null +++ b/data-indices/rank90.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095c16422dff7f75ad2728362d64c101637c208417e7aaa94ae6e51f2b50fe63 +size 267 diff --git a/data-indices/rank92.tsv.gz b/data-indices/rank92.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..80d6cffe70507f1a18323b5f83cdfc052450cf71 --- /dev/null +++ b/data-indices/rank92.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9291465ebe05b01583e40d056d8e63722ee5866c86326338fd739995aaf378f +size 266 diff --git a/data-indices/rank94.tsv.gz b/data-indices/rank94.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ca01095e1757996ce93c8c97ec51ab6fd944cd2 --- /dev/null +++ b/data-indices/rank94.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1061e0314e4520b8a2cb3903518f58d4a618cdbfb75b447291ae494dfc16b7 +size 263 diff --git a/data-indices/rank97.tsv.gz b/data-indices/rank97.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ecba84553359591be192f342d1a825a4e9d28db --- /dev/null +++ b/data-indices/rank97.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:762fcc98080b594ff1fe0ff0926f9a0b9b719d8df8b0d75c8f6c83cf52a0ddb7 +size 265 diff --git a/data-indices/rank98.tsv.gz b/data-indices/rank98.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1f557d320b99a2191db2047ec87e96fa47d0ed4 --- /dev/null +++ b/data-indices/rank98.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c55fa0289bcdf673e992b83d2af5abaa4ea0b38334fdaf10834ee17c4e2c5b +size 264 diff --git a/data-indices/rank99.tsv.gz b/data-indices/rank99.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa833d9174165aa0bbc280c6a9758b6bc76ffc0d --- /dev/null +++ b/data-indices/rank99.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37d3c385f777456adf7e63ae40469b4ed352c13de85ba770182f75abdb6e9dfb +size 264 diff --git a/step10000-unsharded/config.yaml b/step10000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..864b06e6e4668656681fcd16dd315390cbb15e54 --- /dev/null +++ b/step10000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: null +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step110000-unsharded/config.yaml b/step110000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step110000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step110000-unsharded/train.pt b/step110000-unsharded/train.pt new file mode 100644 index 0000000000000000000000000000000000000000..71c61f4ad41e1c5b2336d88f1c77304b81b3c78d --- /dev/null +++ b/step110000-unsharded/train.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91e6e815190b33d6b59924f3233f2d9c7cb510bd48df8ac585f23fc4dfc325af +size 14988 diff --git a/step120000-unsharded/config.yaml b/step120000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step120000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step130000-unsharded/config.yaml b/step130000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step130000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step140000-unsharded/config.yaml b/step140000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step140000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step150000/config.yaml b/step150000/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step150000/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step150010-unsharded/config.yaml b/step150010-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step150010-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step30000-unsharded/config.yaml b/step30000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step30000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step40000-unsharded/config.yaml b/step40000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step40000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step50000-unsharded/config.yaml b/step50000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step50000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step60000-unsharded/config.yaml b/step60000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step60000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null diff --git a/step90000-unsharded/config.yaml b/step90000-unsharded/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962355ac378c4f394b429fbf56cba8f5fa512e96 --- /dev/null +++ b/step90000-unsharded/config.yaml @@ -0,0 +1,465 @@ +run_name: OLMo-190M-17values +seed: 6198 +epoch: null +dry_run: false +model: + d_model: 768 + n_heads: 12 + n_kv_heads: null + clip_qkv: null + n_layers: 16 + mlp_ratio: 4 + mlp_hidden_size: 4096 + activation_type: swiglu + block_type: sequential + block_group_size: 1 + alibi: false + alibi_bias_max: 8.0 + rope: true + rope_full_precision: true + rope_theta: 10000 + flash_attention: false + attention_dropout: 0.0 + multi_query_attention: false + attention_layer_norm: false + residual_dropout: 0.0 + embedding_dropout: 0.0 + embedding_layer_norm: false + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-05 + attention_layer_norm_with_affine: false + max_sequence_length: 2048 + include_bias: false + bias_for_layer_norm: false + scale_logits: false + vocab_size: 50277 + embedding_size: 50304 + weight_tying: false + eos_token_id: 50277 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + init_std: 0.02 + init_cutoff_factor: null + precision: amp_bf16 + scale_emb_init: false + emb_init_std: null + norm_after: false + linear_type: values17 + num_trilm_matrix_scales: 1 +optimizer: + name: adamw + learning_rate: 0.0004 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + no_decay_norm_and_bias: null + selective_updates: false + decay_norm_and_bias: false + decay_embeddings: false + metrics_log_interval: 10 + record_update_metrics: false +scheduler: + name: cosine_with_warmup + units: steps + t_warmup: 375 + t_max: null + alpha_f: 0.1 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: null + remove_weight_decay_in_second_half: false +data: + paths: + - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy + memmap_dtype: uint16 + datasets: null + label_mask_paths: null + pad_direction: right + generate_attention_mask: false + generate_doc_lengths: false + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + seed: null + instance_filter: null +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 500 +tokenizer: + identifier: ../spectra_tokenizer/tokenizer.json + truncate_direction: right +save_folder: checkpoints/olmo-190M-17values/ +remote_save_folder: null +canceled_check_interval: 50 +save_interval: 500 +save_interval_unsharded: 10000 +save_interval_ephemeral: null +save_num_checkpoints_to_keep: 4 +save_num_unsharded_checkpoints_to_keep: -1 +save_overwrite: true +force_save_unsharded: false +no_pre_train_checkpoint: false +load_path: checkpoints/olmo-190M-17values//latest +load_path_sharded_checkpointer: null +try_load_latest_save: false +reset_optimizer_state: false +reset_trainer_state: false +sharded_checkpointer: torch_legacy +new_style_checkpoints: null +max_duration: 150000 +global_train_batch_size: 1024 +device_train_batch_size: 4 +device_train_microbatch_size: 4 +device_eval_batch_size: 4 +eval_subset_num_batches: -1 +eval_on_load: false +device_train_grad_accum: 1 +max_grad_norm: 1.0 +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: null +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 1 +gen1_gc_interval: 1 +compile: null +distributed_strategy: fsdp +fsdp: + use_orig_params: true + sharding_strategy: _HYBRID_SHARD_ZERO2 + wrapping_strategy: null + precision: pure + hybrid_sharding_num_model_replicas: null +ddp: null +softmax_auxiliary_loss: false +auxiliary_loss_multiplier: 0.0001 +time_limit: null +extra_steps_after_cancel: 10 +early_stopping_factor: null +save_data_indices: true +python_profiling: false +torch_profiling: false +stop_at: 150010 +stop_after: null +activation_checkpointing: null +fused_loss: null +hf_datasets_cache_dir: null +module_outputs_save_steps: null