abaddon182 commited on
Commit
d831123
·
verified ·
1 Parent(s): a3502ed

Training in progress, step 547, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dba3a9c8daa4bdd1e58028a57631ab9c6677819c890282b6e771c9f4e453820e
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c56401c5e3fc86419d48de1cc99bcf3059b6b2cc7c8ac93d54e779d71019847
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b77f30fc7d4d2a0f3c13953f1483910cb776aeb74503dc67517e33f2a8699ffa
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86334b328c56e2d5a9e3f1ae6382f96c9d62d87b13ac1c2e5f6cf6724664fd79
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e085fde017ec5c64b8d00529e5946a3fdd2cc2b597c7b3cf7373c02ad0c4c8b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a57af447cd6f38421f2a08b9c33ca21be0092487897eea84e8bb352a10ec5de0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb2b145cc02354c01563cd3053c6b3f03d7f93c87dfd6b3852b83f2c8fa5f1fd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e993d6d0decca6557382fb23f4a5c12589ee11e4e8e3addd876b8b9972e94e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 10.3164701461792,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
- "epoch": 0.823045267489712,
5
  "eval_steps": 150,
6
- "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -354,6 +354,69 @@
354
  "eval_samples_per_second": 648.792,
355
  "eval_steps_per_second": 162.726,
356
  "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  }
358
  ],
359
  "logging_steps": 10,
@@ -377,12 +440,12 @@
377
  "should_evaluate": false,
378
  "should_log": false,
379
  "should_save": true,
380
- "should_training_stop": false
381
  },
382
  "attributes": {}
383
  }
384
  },
385
- "total_flos": 26019191586816.0,
386
  "train_batch_size": 8,
387
  "trial_name": null,
388
  "trial_params": null
 
1
  {
2
  "best_metric": 10.3164701461792,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 1.0004572473708275,
5
  "eval_steps": 150,
6
+ "global_step": 547,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
354
  "eval_samples_per_second": 648.792,
355
  "eval_steps_per_second": 162.726,
356
  "step": 450
357
+ },
358
+ {
359
+ "epoch": 0.8413351623228167,
360
+ "grad_norm": 0.06713691353797913,
361
+ "learning_rate": 7.3721207795187876e-06,
362
+ "loss": 10.3323,
363
+ "step": 460
364
+ },
365
+ {
366
+ "epoch": 0.8596250571559213,
367
+ "grad_norm": 0.06836876273155212,
368
+ "learning_rate": 5.806543362721945e-06,
369
+ "loss": 10.3244,
370
+ "step": 470
371
+ },
372
+ {
373
+ "epoch": 0.877914951989026,
374
+ "grad_norm": 0.0769273117184639,
375
+ "learning_rate": 4.417488550807386e-06,
376
+ "loss": 10.3197,
377
+ "step": 480
378
+ },
379
+ {
380
+ "epoch": 0.8962048468221308,
381
+ "grad_norm": 0.08537283539772034,
382
+ "learning_rate": 3.210504666816133e-06,
383
+ "loss": 10.3179,
384
+ "step": 490
385
+ },
386
+ {
387
+ "epoch": 0.9144947416552355,
388
+ "grad_norm": 0.20503534376621246,
389
+ "learning_rate": 2.1904127850760457e-06,
390
+ "loss": 10.314,
391
+ "step": 500
392
+ },
393
+ {
394
+ "epoch": 0.9327846364883402,
395
+ "grad_norm": 0.07405146211385727,
396
+ "learning_rate": 1.3612874743103189e-06,
397
+ "loss": 10.3337,
398
+ "step": 510
399
+ },
400
+ {
401
+ "epoch": 0.9510745313214449,
402
+ "grad_norm": 0.07247216254472733,
403
+ "learning_rate": 7.264405225248294e-07,
404
+ "loss": 10.3237,
405
+ "step": 520
406
+ },
407
+ {
408
+ "epoch": 0.9693644261545497,
409
+ "grad_norm": 0.08077077567577362,
410
+ "learning_rate": 2.8840770868230894e-07,
411
+ "loss": 10.3192,
412
+ "step": 530
413
+ },
414
+ {
415
+ "epoch": 0.9876543209876543,
416
+ "grad_norm": 0.12774144113063812,
417
+ "learning_rate": 4.89386740013198e-08,
418
+ "loss": 10.3162,
419
+ "step": 540
420
  }
421
  ],
422
  "logging_steps": 10,
 
440
  "should_evaluate": false,
441
  "should_log": false,
442
  "should_save": true,
443
+ "should_training_stop": true
444
  },
445
  "attributes": {}
446
  }
447
  },
448
+ "total_flos": 31629841219584.0,
449
  "train_batch_size": 8,
450
  "trial_name": null,
451
  "trial_params": null