abaddon182 commited on
Commit
727969d
·
verified ·
1 Parent(s): 757c5ed

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac54e7a7ee44983b14f6e81c3224088fde6e918a9548ecf4eb99f1f4386e6e75
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dba3a9c8daa4bdd1e58028a57631ab9c6677819c890282b6e771c9f4e453820e
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5328de3a04156e2ffdb90d4af3fad8b222382888acdbd4f3bd35e0f570ba23a
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b77f30fc7d4d2a0f3c13953f1483910cb776aeb74503dc67517e33f2a8699ffa
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaf84182c5327cba99ea00da4d1ce9b4d9102b3389d1bc82bb43ffb6c8949bb6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e085fde017ec5c64b8d00529e5946a3fdd2cc2b597c7b3cf7373c02ad0c4c8b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d79de3c6a6d4b735b3fef56d029a72688ed64900a10e70511282f31905a33cc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2b145cc02354c01563cd3053c6b3f03d7f93c87dfd6b3852b83f2c8fa5f1fd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 10.319465637207031,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.5486968449931413,
5
  "eval_steps": 150,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -241,6 +241,119 @@
241
  "eval_samples_per_second": 651.966,
242
  "eval_steps_per_second": 163.522,
243
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  }
245
  ],
246
  "logging_steps": 10,
@@ -269,7 +382,7 @@
269
  "attributes": {}
270
  }
271
  },
272
- "total_flos": 17360071557120.0,
273
  "train_batch_size": 8,
274
  "trial_name": null,
275
  "trial_params": null
 
1
  {
2
+ "best_metric": 10.3164701461792,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 0.823045267489712,
5
  "eval_steps": 150,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
241
  "eval_samples_per_second": 651.966,
242
  "eval_steps_per_second": 163.522,
243
  "step": 300
244
+ },
245
+ {
246
+ "epoch": 0.566986739826246,
247
+ "grad_norm": 0.07304774969816208,
248
+ "learning_rate": 4.63685608183771e-05,
249
+ "loss": 10.3357,
250
+ "step": 310
251
+ },
252
+ {
253
+ "epoch": 0.5852766346593508,
254
+ "grad_norm": 0.0741581916809082,
255
+ "learning_rate": 4.322570304519023e-05,
256
+ "loss": 10.3264,
257
+ "step": 320
258
+ },
259
+ {
260
+ "epoch": 0.6035665294924554,
261
+ "grad_norm": 0.07260976731777191,
262
+ "learning_rate": 4.0109903950724134e-05,
263
+ "loss": 10.3205,
264
+ "step": 330
265
+ },
266
+ {
267
+ "epoch": 0.6218564243255601,
268
+ "grad_norm": 0.0811510682106018,
269
+ "learning_rate": 3.7033609019317374e-05,
270
+ "loss": 10.319,
271
+ "step": 340
272
+ },
273
+ {
274
+ "epoch": 0.6401463191586648,
275
+ "grad_norm": 0.24966461956501007,
276
+ "learning_rate": 3.400910594322121e-05,
277
+ "loss": 10.3137,
278
+ "step": 350
279
+ },
280
+ {
281
+ "epoch": 0.6584362139917695,
282
+ "grad_norm": 0.07861746102571487,
283
+ "learning_rate": 3.104847554168106e-05,
284
+ "loss": 10.3348,
285
+ "step": 360
286
+ },
287
+ {
288
+ "epoch": 0.6767261088248743,
289
+ "grad_norm": 0.06537245959043503,
290
+ "learning_rate": 2.816354350633411e-05,
291
+ "loss": 10.3262,
292
+ "step": 370
293
+ },
294
+ {
295
+ "epoch": 0.695016003657979,
296
+ "grad_norm": 0.08251766115427017,
297
+ "learning_rate": 2.5365833165666946e-05,
298
+ "loss": 10.3195,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 0.7133058984910837,
303
+ "grad_norm": 0.10933877527713776,
304
+ "learning_rate": 2.266651945720694e-05,
305
+ "loss": 10.317,
306
+ "step": 390
307
+ },
308
+ {
309
+ "epoch": 0.7315957933241883,
310
+ "grad_norm": 0.20270827412605286,
311
+ "learning_rate": 2.0076384291297134e-05,
312
+ "loss": 10.3157,
313
+ "step": 400
314
+ },
315
+ {
316
+ "epoch": 0.7498856881572931,
317
+ "grad_norm": 0.07105053216218948,
318
+ "learning_rate": 1.7605773484745547e-05,
319
+ "loss": 10.3334,
320
+ "step": 410
321
+ },
322
+ {
323
+ "epoch": 0.7681755829903978,
324
+ "grad_norm": 0.07826591283082962,
325
+ "learning_rate": 1.5264555436369744e-05,
326
+ "loss": 10.3251,
327
+ "step": 420
328
+ },
329
+ {
330
+ "epoch": 0.7864654778235025,
331
+ "grad_norm": 0.07811526954174042,
332
+ "learning_rate": 1.3062081709499303e-05,
333
+ "loss": 10.3205,
334
+ "step": 430
335
+ },
336
+ {
337
+ "epoch": 0.8047553726566072,
338
+ "grad_norm": 0.08905858546495438,
339
+ "learning_rate": 1.1007149678882329e-05,
340
+ "loss": 10.319,
341
+ "step": 440
342
+ },
343
+ {
344
+ "epoch": 0.823045267489712,
345
+ "grad_norm": 0.24548448622226715,
346
+ "learning_rate": 9.107967391195903e-06,
347
+ "loss": 10.315,
348
+ "step": 450
349
+ },
350
+ {
351
+ "epoch": 0.823045267489712,
352
+ "eval_loss": 10.3164701461792,
353
+ "eval_runtime": 1.4196,
354
+ "eval_samples_per_second": 648.792,
355
+ "eval_steps_per_second": 162.726,
356
+ "step": 450
357
  }
358
  ],
359
  "logging_steps": 10,
 
382
  "attributes": {}
383
  }
384
  },
385
+ "total_flos": 26019191586816.0,
386
  "train_batch_size": 8,
387
  "trial_name": null,
388
  "trial_params": null