Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +91 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee338dc939b7c042fee22bc3023129715453e09ab7503d0fbab68f2a9d9e7429
 size 125048

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f9922b08d6d27775f70906696e738967a9dfda26360726a77d0336c22db2a94
 size 125048

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1685e33900566f466c94654c0999c9110defe07d1d83c288d3920ab9d12b2a82
 size 162868

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ac6daf0448867278648636f54dadb2e9c3630816a32c6d44b1a905f09e6668e
 size 162868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07dfcab91d136e6f3877a1517e4a4558c6ab33a709be94712f36072861ae0974
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:917aa8551bb3ce136b5c3a60b81a06542b98442edf18388bba6bc0c0d0ada806
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c049ad9892b8ae242eb26f06a6af3edec6b865f6613ddc97103e21f4231f6420
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8fa6a7a6709edeb55cdf23229934c07be2d8aae0e4056fbdb6ff2482d0eb3d3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.026728630065918,
-  "best_model_checkpoint": "miner_id_24/checkpoint-140",
-  "epoch": 0.006327540620550948,
   "eval_steps": 5,
-  "global_step": 140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1219,6 +1219,92 @@
       "eval_samples_per_second": 52.878,
       "eval_steps_per_second": 26.442,
       "step": 140
     }
   ],
   "logging_steps": 1,
@@ -1247,7 +1333,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1470942412800.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.025545120239258,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.006779507807733159,
   "eval_steps": 5,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 52.878,
       "eval_steps_per_second": 26.442,
       "step": 140
+    },
+    {
+      "epoch": 0.006372737339269169,
+      "grad_norm": 0.47628021240234375,
+      "learning_rate": 0.0001667540876547148,
+      "loss": 44.1197,
+      "step": 141
+    },
+    {
+      "epoch": 0.00641793405798739,
+      "grad_norm": 0.4244273006916046,
+      "learning_rate": 0.0001662753422842123,
+      "loss": 44.0529,
+      "step": 142
+    },
+    {
+      "epoch": 0.006463130776705611,
+      "grad_norm": 0.4019363820552826,
+      "learning_rate": 0.00016579387259397127,
+      "loss": 44.107,
+      "step": 143
+    },
+    {
+      "epoch": 0.0065083274954238325,
+      "grad_norm": 0.41666439175605774,
+      "learning_rate": 0.00016530969837532487,
+      "loss": 44.1185,
+      "step": 144
+    },
+    {
+      "epoch": 0.006553524214142053,
+      "grad_norm": 0.52204829454422,
+      "learning_rate": 0.00016482283953077887,
+      "loss": 44.0868,
+      "step": 145
+    },
+    {
+      "epoch": 0.006553524214142053,
+      "eval_loss": 11.026100158691406,
+      "eval_runtime": 175.9985,
+      "eval_samples_per_second": 52.938,
+      "eval_steps_per_second": 26.472,
+      "step": 145
+    },
+    {
+      "epoch": 0.006598720932860274,
+      "grad_norm": 0.4917082190513611,
+      "learning_rate": 0.00016433331607319343,
+      "loss": 44.0786,
+      "step": 146
+    },
+    {
+      "epoch": 0.006643917651578495,
+      "grad_norm": 0.6054917573928833,
+      "learning_rate": 0.00016384114812496056,
+      "loss": 44.0952,
+      "step": 147
+    },
+    {
+      "epoch": 0.006689114370296716,
+      "grad_norm": 0.46359196305274963,
+      "learning_rate": 0.00016334635591717703,
+      "loss": 44.1401,
+      "step": 148
+    },
+    {
+      "epoch": 0.0067343110890149376,
+      "grad_norm": 0.5335073471069336,
+      "learning_rate": 0.00016284895978881236,
+      "loss": 44.0664,
+      "step": 149
+    },
+    {
+      "epoch": 0.006779507807733159,
+      "grad_norm": 0.3754950761795044,
+      "learning_rate": 0.00016234898018587337,
+      "loss": 44.1361,
+      "step": 150
+    },
+    {
+      "epoch": 0.006779507807733159,
+      "eval_loss": 11.025545120239258,
+      "eval_runtime": 176.2544,
+      "eval_samples_per_second": 52.861,
+      "eval_steps_per_second": 26.433,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1576009728000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null