Training in progress, step 214, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:106b74ea3601fd9707f4d23f588ca0b3da2860e706b91a94a2fa0c18d22ec0b3
 size 73911112

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2f38ed21bc8c5c9fecd0f8ff925f2317d77fe97af022f9c059a7840e2a2dc67
 size 73911112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d66ab68c4fa014f4da887dd89326cf6f0650184d99f346b39310d8011d695983
 size 37965300

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb0c2c2e3f896879971c737feec6155a8ba78caf1364c2051011a8edce89aa22
 size 37965300

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21fddc574ae167cca5bc69fb1c46e2adfba9c0da8f31f308912edc914cb9882e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f4cc819653ed1f55ce643a6c9060864848c25c052d08d3678103e838defc2ad
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81ebd5cd18dfce87b4c13a120f63e848ddd310a8ad58a4ea4a53a8fb3982cdad
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:802f115045aeeb27907b881b744440b538c2582f39dcf8d05da0ddabad9a975b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.6292288303375244,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.9305030532131433,
   "eval_steps": 50,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1447,6 +1447,104 @@
       "eval_samples_per_second": 34.61,
       "eval_steps_per_second": 8.652,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1470,12 +1568,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.045015069458432e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.6292288303375244,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.9956382669380633,
   "eval_steps": 50,
+  "global_step": 214,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 34.61,
       "eval_steps_per_second": 8.652,
       "step": 200
+    },
+    {
+      "epoch": 0.9351555684792091,
+      "grad_norm": 1.598107933998108,
+      "learning_rate": 1.9973082526568154e-06,
+      "loss": 1.4567,
+      "step": 201
+    },
+    {
+      "epoch": 0.9398080837452748,
+      "grad_norm": 2.187147855758667,
+      "learning_rate": 1.7026900316098215e-06,
+      "loss": 1.6859,
+      "step": 202
+    },
+    {
+      "epoch": 0.9444605990113405,
+      "grad_norm": 1.804359793663025,
+      "learning_rate": 1.4313834308486097e-06,
+      "loss": 1.614,
+      "step": 203
+    },
+    {
+      "epoch": 0.9491131142774062,
+      "grad_norm": 1.7503759860992432,
+      "learning_rate": 1.1834527918740623e-06,
+      "loss": 1.68,
+      "step": 204
+    },
+    {
+      "epoch": 0.953765629543472,
+      "grad_norm": 2.0529308319091797,
+      "learning_rate": 9.589569124794916e-07,
+      "loss": 1.7563,
+      "step": 205
+    },
+    {
+      "epoch": 0.9584181448095377,
+      "grad_norm": 1.7820945978164673,
+      "learning_rate": 7.579490328064265e-07,
+      "loss": 1.516,
+      "step": 206
+    },
+    {
+      "epoch": 0.9630706600756034,
+      "grad_norm": 1.8575091361999512,
+      "learning_rate": 5.804768227185565e-07,
+      "loss": 1.6248,
+      "step": 207
+    },
+    {
+      "epoch": 0.9677231753416691,
+      "grad_norm": 1.8180886507034302,
+      "learning_rate": 4.2658237049655323e-07,
+      "loss": 1.6101,
+      "step": 208
+    },
+    {
+      "epoch": 0.9723756906077348,
+      "grad_norm": 1.6702853441238403,
+      "learning_rate": 2.963021728567106e-07,
+      "loss": 1.5597,
+      "step": 209
+    },
+    {
+      "epoch": 0.9770282058738006,
+      "grad_norm": 1.678638219833374,
+      "learning_rate": 1.8966712629558957e-07,
+      "loss": 1.5329,
+      "step": 210
+    },
+    {
+      "epoch": 0.9816807211398663,
+      "grad_norm": 1.6849240064620972,
+      "learning_rate": 1.0670251976275803e-07,
+      "loss": 1.5622,
+      "step": 211
+    },
+    {
+      "epoch": 0.986333236405932,
+      "grad_norm": 1.7889765501022339,
+      "learning_rate": 4.74280286634099e-08,
+      "loss": 1.4413,
+      "step": 212
+    },
+    {
+      "epoch": 0.9909857516719977,
+      "grad_norm": 1.7154433727264404,
+      "learning_rate": 1.1857710192308969e-08,
+      "loss": 1.5593,
+      "step": 213
+    },
+    {
+      "epoch": 0.9956382669380633,
+      "grad_norm": 1.744558572769165,
+      "learning_rate": 0.0,
+      "loss": 1.5847,
+      "step": 214
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1181661243205222e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null