Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cec26e5856981d94c03ac2e931964989a6c215066a9ed21c99329a1e94b6135d
 size 138995824

 version https://git-lfs.github.com/spec/v1
+oid sha256:309572333a72657acd0780f89ef1ec5958dbc801e9e970d81da2b7527373bb03
 size 138995824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19cf96a530981a93679f1bb5eab16a72407ec3667bb01036c26b6449ffd950a9
 size 71077780

 version https://git-lfs.github.com/spec/v1
+oid sha256:777e7ab1545ae0af75d3826d6293e22652c0df4f616dfa133b77bda2f07b99e3
 size 71077780

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77e68a1404c0a7bada1c8fcbd120b2c8dc2314cf6adcc9d16846ddb8bd5be81c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b21e190d1ad2af6593bfbc3680746d079b8a209d965c6e6f62c1d39c3b28f616
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7431a5f5e7b667e1ad74dfbaf504ae1d5a622ff6eb54e4f385c161883d2319ba
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1748c065d82adf475a51dba0ff56fd123a17810f07662718ca6d5704be8f9bd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.9586992263793945,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.15471892728210418,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 39.49,
       "eval_steps_per_second": 9.945,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2618760324710400.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.9437130689620972,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.20629190304280556,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 39.49,
       "eval_steps_per_second": 9.945,
       "step": 150
+    },
+    {
+      "epoch": 0.1557503867973182,
+      "grad_norm": 0.26916560530662537,
+      "learning_rate": 2.6124736842105265e-05,
+      "loss": 1.8411,
+      "step": 151
+    },
+    {
+      "epoch": 0.15678184631253222,
+      "grad_norm": 0.2694656550884247,
+      "learning_rate": 2.5591578947368422e-05,
+      "loss": 1.8929,
+      "step": 152
+    },
+    {
+      "epoch": 0.15781330582774625,
+      "grad_norm": 0.2849469482898712,
+      "learning_rate": 2.5058421052631576e-05,
+      "loss": 1.8891,
+      "step": 153
+    },
+    {
+      "epoch": 0.1588447653429603,
+      "grad_norm": 0.38153189420700073,
+      "learning_rate": 2.4525263157894737e-05,
+      "loss": 1.7995,
+      "step": 154
+    },
+    {
+      "epoch": 0.15987622485817432,
+      "grad_norm": 0.34073978662490845,
+      "learning_rate": 2.3992105263157894e-05,
+      "loss": 1.9067,
+      "step": 155
+    },
+    {
+      "epoch": 0.16090768437338834,
+      "grad_norm": 0.3542872965335846,
+      "learning_rate": 2.345894736842105e-05,
+      "loss": 1.9616,
+      "step": 156
+    },
+    {
+      "epoch": 0.16193914388860237,
+      "grad_norm": 0.3663066625595093,
+      "learning_rate": 2.292578947368421e-05,
+      "loss": 1.9418,
+      "step": 157
+    },
+    {
+      "epoch": 0.1629706034038164,
+      "grad_norm": 0.3481178283691406,
+      "learning_rate": 2.2392631578947366e-05,
+      "loss": 1.9517,
+      "step": 158
+    },
+    {
+      "epoch": 0.16400206291903044,
+      "grad_norm": 0.3736833333969116,
+      "learning_rate": 2.1859473684210527e-05,
+      "loss": 1.8932,
+      "step": 159
+    },
+    {
+      "epoch": 0.16503352243424446,
+      "grad_norm": 0.4098275601863861,
+      "learning_rate": 2.132631578947368e-05,
+      "loss": 2.0285,
+      "step": 160
+    },
+    {
+      "epoch": 0.16606498194945848,
+      "grad_norm": 0.427293062210083,
+      "learning_rate": 2.0793157894736842e-05,
+      "loss": 1.9431,
+      "step": 161
+    },
+    {
+      "epoch": 0.1670964414646725,
+      "grad_norm": 0.407110333442688,
+      "learning_rate": 2.026e-05,
+      "loss": 1.92,
+      "step": 162
+    },
+    {
+      "epoch": 0.16812790097988653,
+      "grad_norm": 0.43119099736213684,
+      "learning_rate": 1.9726842105263157e-05,
+      "loss": 1.9463,
+      "step": 163
+    },
+    {
+      "epoch": 0.16915936049510058,
+      "grad_norm": 0.5015885233879089,
+      "learning_rate": 1.9193684210526314e-05,
+      "loss": 1.7968,
+      "step": 164
+    },
+    {
+      "epoch": 0.1701908200103146,
+      "grad_norm": 0.4589356482028961,
+      "learning_rate": 1.866052631578947e-05,
+      "loss": 1.9099,
+      "step": 165
+    },
+    {
+      "epoch": 0.17122227952552863,
+      "grad_norm": 0.46650373935699463,
+      "learning_rate": 1.8127368421052632e-05,
+      "loss": 1.9559,
+      "step": 166
+    },
+    {
+      "epoch": 0.17225373904074265,
+      "grad_norm": 0.4671856462955475,
+      "learning_rate": 1.759421052631579e-05,
+      "loss": 2.1502,
+      "step": 167
+    },
+    {
+      "epoch": 0.17328519855595667,
+      "grad_norm": 0.45730796456336975,
+      "learning_rate": 1.7061052631578947e-05,
+      "loss": 1.9763,
+      "step": 168
+    },
+    {
+      "epoch": 0.1743166580711707,
+      "grad_norm": 0.4965425729751587,
+      "learning_rate": 1.6527894736842104e-05,
+      "loss": 2.0582,
+      "step": 169
+    },
+    {
+      "epoch": 0.17534811758638474,
+      "grad_norm": 0.5197814106941223,
+      "learning_rate": 1.599473684210526e-05,
+      "loss": 2.0598,
+      "step": 170
+    },
+    {
+      "epoch": 0.17637957710159877,
+      "grad_norm": 0.5016813278198242,
+      "learning_rate": 1.546157894736842e-05,
+      "loss": 1.9687,
+      "step": 171
+    },
+    {
+      "epoch": 0.1774110366168128,
+      "grad_norm": 0.5683993697166443,
+      "learning_rate": 1.4928421052631576e-05,
+      "loss": 1.8609,
+      "step": 172
+    },
+    {
+      "epoch": 0.1784424961320268,
+      "grad_norm": 0.5386666655540466,
+      "learning_rate": 1.4395263157894735e-05,
+      "loss": 1.8949,
+      "step": 173
+    },
+    {
+      "epoch": 0.17947395564724083,
+      "grad_norm": 0.581087589263916,
+      "learning_rate": 1.3862105263157895e-05,
+      "loss": 2.0025,
+      "step": 174
+    },
+    {
+      "epoch": 0.18050541516245489,
+      "grad_norm": 0.6015302538871765,
+      "learning_rate": 1.3328947368421052e-05,
+      "loss": 1.8594,
+      "step": 175
+    },
+    {
+      "epoch": 0.1815368746776689,
+      "grad_norm": 0.5565942525863647,
+      "learning_rate": 1.2795789473684211e-05,
+      "loss": 1.8845,
+      "step": 176
+    },
+    {
+      "epoch": 0.18256833419288293,
+      "grad_norm": 0.5846641659736633,
+      "learning_rate": 1.2262631578947368e-05,
+      "loss": 1.9508,
+      "step": 177
+    },
+    {
+      "epoch": 0.18359979370809695,
+      "grad_norm": 0.6111920475959778,
+      "learning_rate": 1.1729473684210526e-05,
+      "loss": 1.944,
+      "step": 178
+    },
+    {
+      "epoch": 0.18463125322331098,
+      "grad_norm": 0.625225305557251,
+      "learning_rate": 1.1196315789473683e-05,
+      "loss": 2.035,
+      "step": 179
+    },
+    {
+      "epoch": 0.18566271273852503,
+      "grad_norm": 0.6737083196640015,
+      "learning_rate": 1.066315789473684e-05,
+      "loss": 1.9672,
+      "step": 180
+    },
+    {
+      "epoch": 0.18669417225373905,
+      "grad_norm": 0.6812677383422852,
+      "learning_rate": 1.013e-05,
+      "loss": 1.7043,
+      "step": 181
+    },
+    {
+      "epoch": 0.18772563176895307,
+      "grad_norm": 0.6897796392440796,
+      "learning_rate": 9.596842105263157e-06,
+      "loss": 1.896,
+      "step": 182
+    },
+    {
+      "epoch": 0.1887570912841671,
+      "grad_norm": 0.7232050895690918,
+      "learning_rate": 9.063684210526316e-06,
+      "loss": 1.6724,
+      "step": 183
+    },
+    {
+      "epoch": 0.18978855079938112,
+      "grad_norm": 0.7442830204963684,
+      "learning_rate": 8.530526315789473e-06,
+      "loss": 1.8291,
+      "step": 184
+    },
+    {
+      "epoch": 0.19082001031459514,
+      "grad_norm": 0.7353584170341492,
+      "learning_rate": 7.99736842105263e-06,
+      "loss": 1.9337,
+      "step": 185
+    },
+    {
+      "epoch": 0.1918514698298092,
+      "grad_norm": 0.7994479537010193,
+      "learning_rate": 7.464210526315788e-06,
+      "loss": 1.8791,
+      "step": 186
+    },
+    {
+      "epoch": 0.1928829293450232,
+      "grad_norm": 0.7931816577911377,
+      "learning_rate": 6.931052631578947e-06,
+      "loss": 1.8401,
+      "step": 187
+    },
+    {
+      "epoch": 0.19391438886023724,
+      "grad_norm": 0.7966747879981995,
+      "learning_rate": 6.3978947368421055e-06,
+      "loss": 1.9807,
+      "step": 188
+    },
+    {
+      "epoch": 0.19494584837545126,
+      "grad_norm": 0.8468846082687378,
+      "learning_rate": 5.864736842105263e-06,
+      "loss": 1.7966,
+      "step": 189
+    },
+    {
+      "epoch": 0.19597730789066528,
+      "grad_norm": 0.9058783650398254,
+      "learning_rate": 5.33157894736842e-06,
+      "loss": 2.2768,
+      "step": 190
+    },
+    {
+      "epoch": 0.19700876740587933,
+      "grad_norm": 0.8024225831031799,
+      "learning_rate": 4.7984210526315785e-06,
+      "loss": 1.7921,
+      "step": 191
+    },
+    {
+      "epoch": 0.19804022692109335,
+      "grad_norm": 0.9358747601509094,
+      "learning_rate": 4.265263157894737e-06,
+      "loss": 1.8449,
+      "step": 192
+    },
+    {
+      "epoch": 0.19907168643630738,
+      "grad_norm": 1.0498905181884766,
+      "learning_rate": 3.732105263157894e-06,
+      "loss": 1.8713,
+      "step": 193
+    },
+    {
+      "epoch": 0.2001031459515214,
+      "grad_norm": 1.086319088935852,
+      "learning_rate": 3.1989473684210527e-06,
+      "loss": 2.1394,
+      "step": 194
+    },
+    {
+      "epoch": 0.20113460546673542,
+      "grad_norm": 1.038862705230713,
+      "learning_rate": 2.66578947368421e-06,
+      "loss": 1.7978,
+      "step": 195
+    },
+    {
+      "epoch": 0.20216606498194944,
+      "grad_norm": 1.6305444240570068,
+      "learning_rate": 2.1326315789473684e-06,
+      "loss": 2.1832,
+      "step": 196
+    },
+    {
+      "epoch": 0.2031975244971635,
+      "grad_norm": 1.320383071899414,
+      "learning_rate": 1.5994736842105264e-06,
+      "loss": 1.8102,
+      "step": 197
+    },
+    {
+      "epoch": 0.20422898401237752,
+      "grad_norm": 1.3276463747024536,
+      "learning_rate": 1.0663157894736842e-06,
+      "loss": 1.8395,
+      "step": 198
+    },
+    {
+      "epoch": 0.20526044352759154,
+      "grad_norm": 1.8404937982559204,
+      "learning_rate": 5.331578947368421e-07,
+      "loss": 2.3687,
+      "step": 199
+    },
+    {
+      "epoch": 0.20629190304280556,
+      "grad_norm": 3.119647741317749,
+      "learning_rate": 0.0,
+      "loss": 2.5499,
+      "step": 200
+    },
+    {
+      "epoch": 0.20629190304280556,
+      "eval_loss": 1.9437130689620972,
+      "eval_runtime": 10.3766,
+      "eval_samples_per_second": 39.416,
+      "eval_steps_per_second": 9.926,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3494542466088960.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null