Training in progress, step 161, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb2d3d9e8d7ec675b3e43f5c283ad2675075043caad890a51a022f6369ec00d5
 size 54285928

 version https://git-lfs.github.com/spec/v1
+oid sha256:760bda12f5c5b7e15eda326544b425ccd31b0eaf38eb9b6c1e87966f3bc25e98
 size 54285928

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab53db8c36072d9378bc162c14778de517aac278f2fb49cea4ee35e27a3febe1
 size 27753594

 version https://git-lfs.github.com/spec/v1
+oid sha256:91199b5572304c08fd902f8e3b95d8019eab276c6b12367513c75708c30d2425
 size 27753594

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63d1865d918fcaf3d3d0a7e84913195ca00f48469c4a77ac6afd7d7a954f1151
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:689c38ea02810eca2c0a3eb99417056ffe6fbce462787c2c31ec0ab130e19490
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1695e833f91394cfc1373064c3b63226c9413c6a2dba33dce448f2a54442ccc8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9894ede36c8990c1d07340fd043b3108c81296de071b9f1229e5ab74cbe8b5a8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.7004172801971436,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.803738317757009,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,83 @@
       "eval_samples_per_second": 147.948,
       "eval_steps_per_second": 37.809,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1189,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4461463747952640.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.7004172801971436,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 3.0093457943925235,
   "eval_steps": 50,
+  "global_step": 161,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 147.948,
       "eval_steps_per_second": 37.809,
       "step": 150
+    },
+    {
+      "epoch": 2.822429906542056,
+      "grad_norm": 1.3084477186203003,
+      "learning_rate": 6.469488982370408e-07,
+      "loss": 1.5997,
+      "step": 151
+    },
+    {
+      "epoch": 2.8411214953271027,
+      "grad_norm": 1.5699142217636108,
+      "learning_rate": 5.24388113332288e-07,
+      "loss": 1.5886,
+      "step": 152
+    },
+    {
+      "epoch": 2.8598130841121496,
+      "grad_norm": 1.3877416849136353,
+      "learning_rate": 4.1458563443553365e-07,
+      "loss": 1.514,
+      "step": 153
+    },
+    {
+      "epoch": 2.878504672897196,
+      "grad_norm": 1.6604681015014648,
+      "learning_rate": 3.1758898875779094e-07,
+      "loss": 1.5889,
+      "step": 154
+    },
+    {
+      "epoch": 2.897196261682243,
+      "grad_norm": 1.5936368703842163,
+      "learning_rate": 2.334401605975478e-07,
+      "loss": 1.6741,
+      "step": 155
+    },
+    {
+      "epoch": 2.9158878504672896,
+      "grad_norm": 1.4755374193191528,
+      "learning_rate": 1.621755731681329e-07,
+      "loss": 1.5295,
+      "step": 156
+    },
+    {
+      "epoch": 2.9345794392523366,
+      "grad_norm": 1.510498046875,
+      "learning_rate": 1.0382607283221379e-07,
+      "loss": 1.4382,
+      "step": 157
+    },
+    {
+      "epoch": 2.953271028037383,
+      "grad_norm": 1.3615158796310425,
+      "learning_rate": 5.841691575015018e-08,
+      "loss": 1.5324,
+      "step": 158
+    },
+    {
+      "epoch": 2.97196261682243,
+      "grad_norm": 1.8873556852340698,
+      "learning_rate": 2.5967756948068744e-08,
+      "loss": 1.415,
+      "step": 159
+    },
+    {
+      "epoch": 2.9906542056074765,
+      "grad_norm": 1.5102689266204834,
+      "learning_rate": 6.492641810311728e-09,
+      "loss": 1.5735,
+      "step": 160
+    },
+    {
+      "epoch": 3.0093457943925235,
+      "grad_norm": 2.594985008239746,
+      "learning_rate": 0.0,
+      "loss": 2.4443,
+      "step": 161
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4787050134896640.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null