abaddon182 commited on
Commit
c03c609
·
verified ·
1 Parent(s): 42c7a63

Training in progress, step 792, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de05afcc546a2ee34140cbd4c32264a9ae4e90701579048d280b564f7b4fae2d
3
  size 138995824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4dbee798d44dbcabb914ec35238d10388310cb47d10a041107ee960504806ab
3
  size 138995824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7120ad04c657280076e854a3a45c0cd0c9d81eb27b78e6be287df6e01cdaa069
3
  size 71078228
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f8d0477825cd9026da09ca1c4553d0e044616640851cd4193c564cc41fb807c
3
  size 71078228
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07dd64b81a4a8ba6cd05967d431431caeff586cc6936097e54c47ce11512713b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33fb007f8d857f9b183129c19566dda94989de04ecbd84ee572519e630f2f12a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:031d9d4a3b196c7a36a2854f0921f12cf44faf0e25ee086b85b6802cdb926b70
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ecdcea16334b7d49ab05a4dbe60d78446124eb14a01af7ac3ef45a43be808c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7934648990631104,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-750",
4
- "epoch": 0.946969696969697,
5
  "eval_steps": 150,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -580,6 +580,34 @@
580
  "eval_samples_per_second": 75.269,
581
  "eval_steps_per_second": 18.846,
582
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  }
584
  ],
585
  "logging_steps": 10,
@@ -603,12 +631,12 @@
603
  "should_evaluate": false,
604
  "should_log": false,
605
  "should_save": true,
606
- "should_training_stop": false
607
  },
608
  "attributes": {}
609
  }
610
  },
611
- "total_flos": 2.575829827584e+16,
612
  "train_batch_size": 8,
613
  "trial_name": null,
614
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7934648990631104,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-750",
4
+ "epoch": 1.0,
5
  "eval_steps": 150,
6
+ "global_step": 792,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
580
  "eval_samples_per_second": 75.269,
581
  "eval_steps_per_second": 18.846,
582
  "step": 750
583
+ },
584
+ {
585
+ "epoch": 0.9595959595959596,
586
+ "grad_norm": 0.2528385818004608,
587
+ "learning_rate": 4.582130475683011e-07,
588
+ "loss": 0.7733,
589
+ "step": 760
590
+ },
591
+ {
592
+ "epoch": 0.9722222222222222,
593
+ "grad_norm": 0.3325110673904419,
594
+ "learning_rate": 2.167520804698009e-07,
595
+ "loss": 0.8165,
596
+ "step": 770
597
+ },
598
+ {
599
+ "epoch": 0.9848484848484849,
600
+ "grad_norm": 0.3532811105251312,
601
+ "learning_rate": 6.452098811035035e-08,
602
+ "loss": 0.8199,
603
+ "step": 780
604
+ },
605
+ {
606
+ "epoch": 0.9974747474747475,
607
+ "grad_norm": 0.4770316779613495,
608
+ "learning_rate": 1.7926245497179583e-09,
609
+ "loss": 0.7625,
610
+ "step": 790
611
  }
612
  ],
613
  "logging_steps": 10,
 
631
  "should_evaluate": false,
632
  "should_log": false,
633
  "should_save": true,
634
+ "should_training_stop": true
635
  },
636
  "attributes": {}
637
  }
638
  },
639
+ "total_flos": 2.719432340471808e+16,
640
  "train_batch_size": 8,
641
  "trial_name": null,
642
  "trial_params": null