File size: 37,709 Bytes
21bc021
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
run_name: OLMo-190M-17values
seed: 6198
epoch: null
dry_run: false
model:
  d_model: 768
  n_heads: 12
  n_kv_heads: null
  clip_qkv: null
  n_layers: 16
  mlp_ratio: 4
  mlp_hidden_size: 4096
  activation_type: swiglu
  block_type: sequential
  block_group_size: 1
  alibi: false
  alibi_bias_max: 8.0
  rope: true
  rope_full_precision: true
  rope_theta: 10000
  flash_attention: false
  attention_dropout: 0.0
  multi_query_attention: false
  attention_layer_norm: false
  residual_dropout: 0.0
  embedding_dropout: 0.0
  embedding_layer_norm: false
  layer_norm_type: rms
  layer_norm_with_affine: true
  layer_norm_eps: 1.0e-05
  attention_layer_norm_with_affine: false
  max_sequence_length: 2048
  include_bias: false
  bias_for_layer_norm: false
  scale_logits: false
  vocab_size: 50277
  embedding_size: 50304
  weight_tying: false
  eos_token_id: 50277
  pad_token_id: 1
  init_device: meta
  init_fn: mitchell
  init_std: 0.02
  init_cutoff_factor: null
  precision: amp_bf16
  scale_emb_init: false
  emb_init_std: null
  norm_after: false
  linear_type: values17
  num_trilm_matrix_scales: 1
optimizer:
  name: adamw
  learning_rate: 0.0004
  weight_decay: 0.1
  betas:
  - 0.9
  - 0.95
  eps: 1.0e-05
  no_decay_norm_and_bias: null
  selective_updates: false
  decay_norm_and_bias: false
  decay_embeddings: false
  metrics_log_interval: 10
  record_update_metrics: false
scheduler:
  name: cosine_with_warmup
  units: steps
  t_warmup: 375
  t_max: null
  alpha_f: 0.1
  grad_clip_warmup_steps: null
  grad_clip_warmup_factor: null
  warmup_min_lr: null
  remove_weight_decay_in_second_half: false
data:
  paths:
  - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
  memmap_dtype: uint16
  datasets: null
  label_mask_paths: null
  pad_direction: right
  generate_attention_mask: false
  generate_doc_lengths: false
  num_workers: 0
  drop_last: true
  pin_memory: true
  prefetch_factor: 16
  persistent_workers: true
  timeout: 0
  seed: null
  instance_filter: null
restore_dataloader: true
fast_forward_batches: null
evaluators: []
eval_interval: 500
tokenizer:
  identifier: ../spectra_tokenizer/tokenizer.json
  truncate_direction: right
save_folder: checkpoints/olmo-190M-17values/
remote_save_folder: null
canceled_check_interval: 50
save_interval: 500
save_interval_unsharded: 10000
save_interval_ephemeral: null
save_num_checkpoints_to_keep: 4
save_num_unsharded_checkpoints_to_keep: -1
save_overwrite: true
force_save_unsharded: false
no_pre_train_checkpoint: false
load_path: checkpoints/olmo-190M-17values//latest
load_path_sharded_checkpointer: null
try_load_latest_save: false
reset_optimizer_state: false
reset_trainer_state: false
sharded_checkpointer: torch_legacy
new_style_checkpoints: null
max_duration: 150000
global_train_batch_size: 1024
device_train_batch_size: 4
device_train_microbatch_size: 4
device_eval_batch_size: 4
eval_subset_num_batches: -1
eval_on_load: false
device_train_grad_accum: 1
max_grad_norm: 1.0
max_grad_norm_ratio: null
precision: amp_bf16
wandb: null
speed_monitor:
  window_size: 20
  gpu_flops_available: null
console_log_interval: 1
gen1_gc_interval: 1
compile: null
distributed_strategy: fsdp
fsdp:
  use_orig_params: true
  sharding_strategy: _HYBRID_SHARD_ZERO2
  wrapping_strategy: null
  precision: pure
  hybrid_sharding_num_model_replicas: null
ddp: null
softmax_auxiliary_loss: false
auxiliary_loss_multiplier: 0.0001
time_limit: null
extra_steps_after_cancel: 10
early_stopping_factor: null
save_data_indices: true
python_profiling: false
torch_profiling: false
stop_at: 150010
stop_after: null
activation_checkpointing: null
fused_loss: null
hf_datasets_cache_dir: null
module_outputs_save_steps: null