Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d5bdf2858907cb5b0c2571e454d78a64e05cf660a6f040b717f3bbcb72ce9eb
 size 222865880

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ff835f1f21989a3d4733d641cac249d4ccbc7f1520ee8112e6718c2ecf7ac5d
 size 222865880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d14b8b0eab1204c35c183a2014820ae61879edcb1beb06127cbe0cd03e38c863
 size 445914554

 version https://git-lfs.github.com/spec/v1
+oid sha256:db032628f6c8192670cc89673b5ac60078b2e6fe29667dd88b2eb6883f60806a
 size 445914554

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99c550f2754b8fcc0f720ea8c793843fd0f02a611c3d6eb617352a83a7b5ae17
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c72114d030c829f37cff9ef14eff1f310889671cf2b07411b3d653c521dea3b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9506127238273621,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.9090909090909091,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 5.675,
       "eval_steps_per_second": 0.794,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.340370898944e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9298770427703857,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.2121212121212122,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.675,
       "eval_steps_per_second": 0.794,
       "step": 150
+    },
+    {
+      "epoch": 0.9151515151515152,
+      "grad_norm": 1.1755346059799194,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 3.6364,
+      "step": 151
+    },
+    {
+      "epoch": 0.9212121212121213,
+      "grad_norm": 1.2692265510559082,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 3.727,
+      "step": 152
+    },
+    {
+      "epoch": 0.9272727272727272,
+      "grad_norm": 1.140787959098816,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 3.6425,
+      "step": 153
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 1.1743643283843994,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 4.3752,
+      "step": 154
+    },
+    {
+      "epoch": 0.9393939393939394,
+      "grad_norm": 1.1625086069107056,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 3.5581,
+      "step": 155
+    },
+    {
+      "epoch": 0.9454545454545454,
+      "grad_norm": 1.2037782669067383,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 3.9433,
+      "step": 156
+    },
+    {
+      "epoch": 0.9515151515151515,
+      "grad_norm": 1.213274598121643,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 4.0385,
+      "step": 157
+    },
+    {
+      "epoch": 0.9575757575757575,
+      "grad_norm": 1.2573509216308594,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 3.9195,
+      "step": 158
+    },
+    {
+      "epoch": 0.9636363636363636,
+      "grad_norm": 1.1490145921707153,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 3.9835,
+      "step": 159
+    },
+    {
+      "epoch": 0.9696969696969697,
+      "grad_norm": 1.2340681552886963,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 3.8201,
+      "step": 160
+    },
+    {
+      "epoch": 0.9757575757575757,
+      "grad_norm": 1.6921660900115967,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 4.168,
+      "step": 161
+    },
+    {
+      "epoch": 0.9818181818181818,
+      "grad_norm": 1.3426408767700195,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 4.4568,
+      "step": 162
+    },
+    {
+      "epoch": 0.9878787878787879,
+      "grad_norm": 1.1291062831878662,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 2.9587,
+      "step": 163
+    },
+    {
+      "epoch": 0.9939393939393939,
+      "grad_norm": 5.067478179931641,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 5.7136,
+      "step": 164
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.9252928495407104,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 3.5246,
+      "step": 165
+    },
+    {
+      "epoch": 1.006060606060606,
+      "grad_norm": 0.745075523853302,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 2.1109,
+      "step": 166
+    },
+    {
+      "epoch": 1.0121212121212122,
+      "grad_norm": 0.7394888997077942,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 2.4167,
+      "step": 167
+    },
+    {
+      "epoch": 1.018181818181818,
+      "grad_norm": 0.7494075894355774,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 2.0683,
+      "step": 168
+    },
+    {
+      "epoch": 1.0242424242424242,
+      "grad_norm": 0.4794514775276184,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 2.6452,
+      "step": 169
+    },
+    {
+      "epoch": 1.0303030303030303,
+      "grad_norm": 0.5055110454559326,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 3.154,
+      "step": 170
+    },
+    {
+      "epoch": 1.0363636363636364,
+      "grad_norm": 0.43595442175865173,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 3.5276,
+      "step": 171
+    },
+    {
+      "epoch": 1.0424242424242425,
+      "grad_norm": 0.4404388666152954,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 3.8259,
+      "step": 172
+    },
+    {
+      "epoch": 1.0484848484848486,
+      "grad_norm": 0.4553149342536926,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 3.9166,
+      "step": 173
+    },
+    {
+      "epoch": 1.0545454545454545,
+      "grad_norm": 0.5771802663803101,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 3.8935,
+      "step": 174
+    },
+    {
+      "epoch": 1.0606060606060606,
+      "grad_norm": 0.4853135645389557,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 4.3058,
+      "step": 175
+    },
+    {
+      "epoch": 1.0606060606060606,
+      "eval_loss": 0.929128110408783,
+      "eval_runtime": 8.8122,
+      "eval_samples_per_second": 5.674,
+      "eval_steps_per_second": 0.794,
+      "step": 175
+    },
+    {
+      "epoch": 1.0666666666666667,
+      "grad_norm": 0.4742906987667084,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 4.0653,
+      "step": 176
+    },
+    {
+      "epoch": 1.0727272727272728,
+      "grad_norm": 0.4662216901779175,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 4.6002,
+      "step": 177
+    },
+    {
+      "epoch": 1.0787878787878789,
+      "grad_norm": 0.4336952865123749,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 4.5435,
+      "step": 178
+    },
+    {
+      "epoch": 1.084848484848485,
+      "grad_norm": 0.472969114780426,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 4.2765,
+      "step": 179
+    },
+    {
+      "epoch": 1.0909090909090908,
+      "grad_norm": 0.5912119150161743,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 3.6061,
+      "step": 180
+    },
+    {
+      "epoch": 1.096969696969697,
+      "grad_norm": 0.8748385906219482,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 3.612,
+      "step": 181
+    },
+    {
+      "epoch": 1.103030303030303,
+      "grad_norm": 0.7963136434555054,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 2.6961,
+      "step": 182
+    },
+    {
+      "epoch": 1.1090909090909091,
+      "grad_norm": 1.059278964996338,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 2.7539,
+      "step": 183
+    },
+    {
+      "epoch": 1.1151515151515152,
+      "grad_norm": 0.8208073377609253,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 3.0603,
+      "step": 184
+    },
+    {
+      "epoch": 1.121212121212121,
+      "grad_norm": 0.8458943963050842,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 2.9592,
+      "step": 185
+    },
+    {
+      "epoch": 1.1272727272727272,
+      "grad_norm": 0.8511922359466553,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 3.3705,
+      "step": 186
+    },
+    {
+      "epoch": 1.1333333333333333,
+      "grad_norm": 0.8590157628059387,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 2.7145,
+      "step": 187
+    },
+    {
+      "epoch": 1.1393939393939394,
+      "grad_norm": 0.862501859664917,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 3.4529,
+      "step": 188
+    },
+    {
+      "epoch": 1.1454545454545455,
+      "grad_norm": 0.8538833856582642,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 3.336,
+      "step": 189
+    },
+    {
+      "epoch": 1.1515151515151516,
+      "grad_norm": 1.0022493600845337,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 2.8941,
+      "step": 190
+    },
+    {
+      "epoch": 1.1575757575757575,
+      "grad_norm": 1.0205844640731812,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 2.904,
+      "step": 191
+    },
+    {
+      "epoch": 1.1636363636363636,
+      "grad_norm": 1.2487444877624512,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 2.4457,
+      "step": 192
+    },
+    {
+      "epoch": 1.1696969696969697,
+      "grad_norm": 1.0014578104019165,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 2.9078,
+      "step": 193
+    },
+    {
+      "epoch": 1.1757575757575758,
+      "grad_norm": 0.9595588445663452,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 2.9862,
+      "step": 194
+    },
+    {
+      "epoch": 1.1818181818181819,
+      "grad_norm": 0.959829568862915,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 3.4768,
+      "step": 195
+    },
+    {
+      "epoch": 1.187878787878788,
+      "grad_norm": 1.1415780782699585,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 2.9397,
+      "step": 196
+    },
+    {
+      "epoch": 1.1939393939393939,
+      "grad_norm": 1.0246975421905518,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 3.4324,
+      "step": 197
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 1.1691887378692627,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 2.9577,
+      "step": 198
+    },
+    {
+      "epoch": 1.206060606060606,
+      "grad_norm": 1.4010579586029053,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 3.0046,
+      "step": 199
+    },
+    {
+      "epoch": 1.2121212121212122,
+      "grad_norm": 0.9462568759918213,
+      "learning_rate": 0.0,
+      "loss": 3.2705,
+      "step": 200
+    },
+    {
+      "epoch": 1.2121212121212122,
+      "eval_loss": 0.9298770427703857,
+      "eval_runtime": 8.8168,
+      "eval_samples_per_second": 5.671,
+      "eval_steps_per_second": 0.794,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.78625695465472e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null