Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e697aa4a0971e096dcb2c0242a2ff96f7d351b64996acf2acf9e8b75ca50dbed
 size 1163996488

 version https://git-lfs.github.com/spec/v1
+oid sha256:957fbc70285ecf6cef92499eb209a1bcd0447c947d20c1c032fd35676ad035ad
 size 1163996488

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd39766d4afd3b8fe56b018489dbf10a14395de24b2acad54ed09bbf5a38b5d5
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0089cd35ee2f0e917439400ce9ea883d6e87b88f31a251b1ec36a7b7ce236ca
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69c1d36ca3da780239ce9138d8d8e6825d08c674e14e37f4632aaaecb7b088a3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b907b37a5243703a7153e6180084d50d88016d5fd99beffbee3d491b70b8bf5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:123ecf29cfd4fe3b008c987ce1ef9f63c2ad00365e06a3691aa36827aaded381
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d703f9adb617aa1ec13556a7b7482c741f765121a5a04f3cafdcfbce6ed485ee
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.0172547101974487,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.17152658662092624,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 11.229,
       "eval_steps_per_second": 2.83,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.847802254124646e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.010448932647705,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.22870211549456831,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.229,
       "eval_steps_per_second": 2.83,
       "step": 150
+    },
+    {
+      "epoch": 0.1726700971983991,
+      "grad_norm": 0.1942352056503296,
+      "learning_rate": 2.6253684210526317e-05,
+      "loss": 0.7878,
+      "step": 151
+    },
+    {
+      "epoch": 0.17381360777587193,
+      "grad_norm": 0.24811097979545593,
+      "learning_rate": 2.5717894736842106e-05,
+      "loss": 1.1611,
+      "step": 152
+    },
+    {
+      "epoch": 0.17495711835334476,
+      "grad_norm": 0.24453826248645782,
+      "learning_rate": 2.518210526315789e-05,
+      "loss": 0.9794,
+      "step": 153
+    },
+    {
+      "epoch": 0.1761006289308176,
+      "grad_norm": 0.23959362506866455,
+      "learning_rate": 2.4646315789473683e-05,
+      "loss": 1.1261,
+      "step": 154
+    },
+    {
+      "epoch": 0.17724413950829046,
+      "grad_norm": 0.24163787066936493,
+      "learning_rate": 2.411052631578947e-05,
+      "loss": 1.017,
+      "step": 155
+    },
+    {
+      "epoch": 0.1783876500857633,
+      "grad_norm": 0.25836434960365295,
+      "learning_rate": 2.357473684210526e-05,
+      "loss": 1.2222,
+      "step": 156
+    },
+    {
+      "epoch": 0.17953116066323613,
+      "grad_norm": 0.27292728424072266,
+      "learning_rate": 2.3038947368421052e-05,
+      "loss": 1.1383,
+      "step": 157
+    },
+    {
+      "epoch": 0.180674671240709,
+      "grad_norm": 0.25207802653312683,
+      "learning_rate": 2.250315789473684e-05,
+      "loss": 1.1166,
+      "step": 158
+    },
+    {
+      "epoch": 0.18181818181818182,
+      "grad_norm": 0.25218677520751953,
+      "learning_rate": 2.196736842105263e-05,
+      "loss": 1.1572,
+      "step": 159
+    },
+    {
+      "epoch": 0.18296169239565466,
+      "grad_norm": 0.256805956363678,
+      "learning_rate": 2.1431578947368418e-05,
+      "loss": 1.1692,
+      "step": 160
+    },
+    {
+      "epoch": 0.1841052029731275,
+      "grad_norm": 0.25223690271377563,
+      "learning_rate": 2.089578947368421e-05,
+      "loss": 1.0468,
+      "step": 161
+    },
+    {
+      "epoch": 0.18524871355060035,
+      "grad_norm": 0.2556026875972748,
+      "learning_rate": 2.036e-05,
+      "loss": 1.0576,
+      "step": 162
+    },
+    {
+      "epoch": 0.1863922241280732,
+      "grad_norm": 0.25864240527153015,
+      "learning_rate": 1.9824210526315787e-05,
+      "loss": 1.0228,
+      "step": 163
+    },
+    {
+      "epoch": 0.18753573470554602,
+      "grad_norm": 0.37401801347732544,
+      "learning_rate": 1.928842105263158e-05,
+      "loss": 1.2331,
+      "step": 164
+    },
+    {
+      "epoch": 0.18867924528301888,
+      "grad_norm": 0.2586139440536499,
+      "learning_rate": 1.8752631578947367e-05,
+      "loss": 1.1819,
+      "step": 165
+    },
+    {
+      "epoch": 0.18982275586049172,
+      "grad_norm": 0.28469181060791016,
+      "learning_rate": 1.8216842105263156e-05,
+      "loss": 1.1134,
+      "step": 166
+    },
+    {
+      "epoch": 0.19096626643796455,
+      "grad_norm": 0.2891313135623932,
+      "learning_rate": 1.7681052631578948e-05,
+      "loss": 1.1394,
+      "step": 167
+    },
+    {
+      "epoch": 0.19210977701543738,
+      "grad_norm": 0.2869350016117096,
+      "learning_rate": 1.7145263157894736e-05,
+      "loss": 1.2324,
+      "step": 168
+    },
+    {
+      "epoch": 0.19325328759291024,
+      "grad_norm": 0.2780541479587555,
+      "learning_rate": 1.6609473684210525e-05,
+      "loss": 1.0539,
+      "step": 169
+    },
+    {
+      "epoch": 0.19439679817038308,
+      "grad_norm": 0.3022194802761078,
+      "learning_rate": 1.6073684210526313e-05,
+      "loss": 1.0295,
+      "step": 170
+    },
+    {
+      "epoch": 0.1955403087478559,
+      "grad_norm": 0.28873467445373535,
+      "learning_rate": 1.5537894736842105e-05,
+      "loss": 0.8893,
+      "step": 171
+    },
+    {
+      "epoch": 0.19668381932532875,
+      "grad_norm": 0.30483803153038025,
+      "learning_rate": 1.5002105263157892e-05,
+      "loss": 1.1619,
+      "step": 172
+    },
+    {
+      "epoch": 0.1978273299028016,
+      "grad_norm": 0.30440232157707214,
+      "learning_rate": 1.4466315789473684e-05,
+      "loss": 0.965,
+      "step": 173
+    },
+    {
+      "epoch": 0.19897084048027444,
+      "grad_norm": 0.687065601348877,
+      "learning_rate": 1.3930526315789474e-05,
+      "loss": 1.1708,
+      "step": 174
+    },
+    {
+      "epoch": 0.20011435105774728,
+      "grad_norm": 0.33709561824798584,
+      "learning_rate": 1.3394736842105261e-05,
+      "loss": 1.1682,
+      "step": 175
+    },
+    {
+      "epoch": 0.20125786163522014,
+      "grad_norm": 0.3644546866416931,
+      "learning_rate": 1.2858947368421053e-05,
+      "loss": 1.017,
+      "step": 176
+    },
+    {
+      "epoch": 0.20240137221269297,
+      "grad_norm": 0.27532678842544556,
+      "learning_rate": 1.2323157894736842e-05,
+      "loss": 0.7339,
+      "step": 177
+    },
+    {
+      "epoch": 0.2035448827901658,
+      "grad_norm": 0.30165594816207886,
+      "learning_rate": 1.178736842105263e-05,
+      "loss": 0.8808,
+      "step": 178
+    },
+    {
+      "epoch": 0.20468839336763864,
+      "grad_norm": 0.2191629558801651,
+      "learning_rate": 1.125157894736842e-05,
+      "loss": 0.6094,
+      "step": 179
+    },
+    {
+      "epoch": 0.2058319039451115,
+      "grad_norm": 0.3130112886428833,
+      "learning_rate": 1.0715789473684209e-05,
+      "loss": 0.9785,
+      "step": 180
+    },
+    {
+      "epoch": 0.20697541452258433,
+      "grad_norm": 0.3028048872947693,
+      "learning_rate": 1.018e-05,
+      "loss": 0.8878,
+      "step": 181
+    },
+    {
+      "epoch": 0.20811892510005717,
+      "grad_norm": 0.311773419380188,
+      "learning_rate": 9.64421052631579e-06,
+      "loss": 1.0961,
+      "step": 182
+    },
+    {
+      "epoch": 0.20926243567753003,
+      "grad_norm": 0.2816356122493744,
+      "learning_rate": 9.108421052631578e-06,
+      "loss": 1.1126,
+      "step": 183
+    },
+    {
+      "epoch": 0.21040594625500286,
+      "grad_norm": 0.28161346912384033,
+      "learning_rate": 8.572631578947368e-06,
+      "loss": 1.3081,
+      "step": 184
+    },
+    {
+      "epoch": 0.2115494568324757,
+      "grad_norm": 0.30611908435821533,
+      "learning_rate": 8.036842105263157e-06,
+      "loss": 1.0617,
+      "step": 185
+    },
+    {
+      "epoch": 0.21269296740994853,
+      "grad_norm": 0.36937642097473145,
+      "learning_rate": 7.501052631578946e-06,
+      "loss": 0.9797,
+      "step": 186
+    },
+    {
+      "epoch": 0.2138364779874214,
+      "grad_norm": 0.29416340589523315,
+      "learning_rate": 6.965263157894737e-06,
+      "loss": 0.9259,
+      "step": 187
+    },
+    {
+      "epoch": 0.21497998856489423,
+      "grad_norm": 0.2920408844947815,
+      "learning_rate": 6.4294736842105265e-06,
+      "loss": 0.62,
+      "step": 188
+    },
+    {
+      "epoch": 0.21612349914236706,
+      "grad_norm": 0.30948537588119507,
+      "learning_rate": 5.893684210526315e-06,
+      "loss": 1.0818,
+      "step": 189
+    },
+    {
+      "epoch": 0.21726700971983992,
+      "grad_norm": 0.28673791885375977,
+      "learning_rate": 5.3578947368421044e-06,
+      "loss": 0.9172,
+      "step": 190
+    },
+    {
+      "epoch": 0.21841052029731275,
+      "grad_norm": 0.30014482140541077,
+      "learning_rate": 4.822105263157895e-06,
+      "loss": 0.9343,
+      "step": 191
+    },
+    {
+      "epoch": 0.2195540308747856,
+      "grad_norm": 0.30985647439956665,
+      "learning_rate": 4.286315789473684e-06,
+      "loss": 0.901,
+      "step": 192
+    },
+    {
+      "epoch": 0.22069754145225842,
+      "grad_norm": 0.29817381501197815,
+      "learning_rate": 3.750526315789473e-06,
+      "loss": 0.9511,
+      "step": 193
+    },
+    {
+      "epoch": 0.22184105202973128,
+      "grad_norm": 0.37334078550338745,
+      "learning_rate": 3.2147368421052633e-06,
+      "loss": 0.9329,
+      "step": 194
+    },
+    {
+      "epoch": 0.22298456260720412,
+      "grad_norm": 0.4006776511669159,
+      "learning_rate": 2.6789473684210522e-06,
+      "loss": 1.0734,
+      "step": 195
+    },
+    {
+      "epoch": 0.22412807318467695,
+      "grad_norm": 0.5243512392044067,
+      "learning_rate": 2.143157894736842e-06,
+      "loss": 0.7941,
+      "step": 196
+    },
+    {
+      "epoch": 0.2252715837621498,
+      "grad_norm": 0.8219733834266663,
+      "learning_rate": 1.6073684210526316e-06,
+      "loss": 0.7681,
+      "step": 197
+    },
+    {
+      "epoch": 0.22641509433962265,
+      "grad_norm": 0.8885450959205627,
+      "learning_rate": 1.071578947368421e-06,
+      "loss": 0.8465,
+      "step": 198
+    },
+    {
+      "epoch": 0.22755860491709548,
+      "grad_norm": 1.3124046325683594,
+      "learning_rate": 5.357894736842105e-07,
+      "loss": 0.6274,
+      "step": 199
+    },
+    {
+      "epoch": 0.22870211549456831,
+      "grad_norm": 1.9900838136672974,
+      "learning_rate": 0.0,
+      "loss": 0.6783,
+      "step": 200
+    },
+    {
+      "epoch": 0.22870211549456831,
+      "eval_loss": 1.010448932647705,
+      "eval_runtime": 32.855,
+      "eval_samples_per_second": 11.231,
+      "eval_steps_per_second": 2.831,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1791525936417997e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null