Training in progress, step 41, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97cc7577d0f902a30758c158628c10c680e0aa38c66f56123dc399a75b2a11a4
 size 559894228

 version https://git-lfs.github.com/spec/v1
+oid sha256:749b11098b51372aa97a9377e67626a9d1fd556be6e737d2c16f6424537ce16a
 size 559894228

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99d94fbae5e40bcc4ba53e9bcb208547ba2e6d4559a8effd0e434f5324ca5d65
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:709845be6c7b0153ec33cd6dc88171bf680eeca30c3ebe33356310ed5f9df676
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52eae71780bb50f7c2e5062daed541602ead6044daddeb7988083bc2a796e5b6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:84fc982e8229d2121603832a24f4c07bbdbf63985ed1d38e11762164a04dad3b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 1.8545454545454545,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,118 @@
       "eval_samples_per_second": 7.81,
       "eval_steps_per_second": 1.019,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +333,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.01237683027968e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 3.090909090909091,
   "eval_steps": 25,
+  "global_step": 41,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.81,
       "eval_steps_per_second": 1.019,
       "step": 25
+    },
+    {
+      "epoch": 1.9272727272727272,
+      "grad_norm": NaN,
+      "learning_rate": 8.117449009293668e-05,
+      "loss": 0.0,
+      "step": 26
+    },
+    {
+      "epoch": 2.036363636363636,
+      "grad_norm": NaN,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.0,
+      "step": 27
+    },
+    {
+      "epoch": 2.109090909090909,
+      "grad_norm": NaN,
+      "learning_rate": 6.826705121831976e-05,
+      "loss": 0.0,
+      "step": 28
+    },
+    {
+      "epoch": 2.1818181818181817,
+      "grad_norm": NaN,
+      "learning_rate": 6.112604669781572e-05,
+      "loss": 0.0,
+      "step": 29
+    },
+    {
+      "epoch": 2.2545454545454544,
+      "grad_norm": NaN,
+      "learning_rate": 5.373650467932122e-05,
+      "loss": 0.0,
+      "step": 30
+    },
+    {
+      "epoch": 2.327272727272727,
+      "grad_norm": NaN,
+      "learning_rate": 4.626349532067879e-05,
+      "loss": 0.0,
+      "step": 31
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": NaN,
+      "learning_rate": 3.887395330218429e-05,
+      "loss": 0.0,
+      "step": 32
+    },
+    {
+      "epoch": 2.4727272727272727,
+      "grad_norm": NaN,
+      "learning_rate": 3.173294878168025e-05,
+      "loss": 0.0,
+      "step": 33
+    },
+    {
+      "epoch": 2.5454545454545454,
+      "grad_norm": NaN,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.0,
+      "step": 34
+    },
+    {
+      "epoch": 2.618181818181818,
+      "grad_norm": NaN,
+      "learning_rate": 1.8825509907063327e-05,
+      "loss": 0.0,
+      "step": 35
+    },
+    {
+      "epoch": 2.690909090909091,
+      "grad_norm": NaN,
+      "learning_rate": 1.3347406408508695e-05,
+      "loss": 0.0,
+      "step": 36
+    },
+    {
+      "epoch": 2.7636363636363637,
+      "grad_norm": NaN,
+      "learning_rate": 8.688061284200266e-06,
+      "loss": 0.0,
+      "step": 37
+    },
+    {
+      "epoch": 2.8363636363636364,
+      "grad_norm": NaN,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 0.0,
+      "step": 38
+    },
+    {
+      "epoch": 2.909090909090909,
+      "grad_norm": NaN,
+      "learning_rate": 2.221359710692961e-06,
+      "loss": 0.0,
+      "step": 39
+    },
+    {
+      "epoch": 3.018181818181818,
+      "grad_norm": NaN,
+      "learning_rate": 5.584586887435739e-07,
+      "loss": 0.0,
+      "step": 40
+    },
+    {
+      "epoch": 3.090909090909091,
+      "grad_norm": NaN,
+      "learning_rate": 0.0,
+      "loss": 0.0,
+      "step": 41
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1500298001658675e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null