Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b85eb6a3d7bda3e6dfdd6a6fd5b18b7e3567513fa7bb7e7125b4f1487431aa7
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:5be9b0d1f7049376c6ac9281f99999da158491e09a823b28d2cc2ffa41cc793c
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e06358f2fc8fad9da99f05e1d5bcf5a11df12a6fedde3df08d34cf1689daf97
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:b40dfb4665cbdcabc7db941f8f0cbc3c699cf1a3c3fe2cc4b01de89606fda463
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df7396f1ae858cac6cf44d64ee7a80546fa73c7f9c0e8d765e0c769fa1156556
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0566559177883c4194a08d8779faaa6d6b1ff550d28e94c7ac54029287fd8bc0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:123ecf29cfd4fe3b008c987ce1ef9f63c2ad00365e06a3691aa36827aaded381
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d703f9adb617aa1ec13556a7b7482c741f765121a5a04f3cafdcfbce6ed485ee
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6460583209991455,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.08525149190110827,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 19.779,
       "eval_steps_per_second": 4.965,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.105777533727539e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.630453109741211,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.11366865586814436,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.779,
       "eval_steps_per_second": 4.965,
       "step": 150
+    },
+    {
+      "epoch": 0.08581983518044899,
+      "grad_norm": 0.33179932832717896,
+      "learning_rate": 2.6253684210526317e-05,
+      "loss": 1.7188,
+      "step": 151
+    },
+    {
+      "epoch": 0.08638817845978972,
+      "grad_norm": 0.32185089588165283,
+      "learning_rate": 2.5717894736842106e-05,
+      "loss": 1.6621,
+      "step": 152
+    },
+    {
+      "epoch": 0.08695652173913043,
+      "grad_norm": 0.35517582297325134,
+      "learning_rate": 2.518210526315789e-05,
+      "loss": 1.5507,
+      "step": 153
+    },
+    {
+      "epoch": 0.08752486501847116,
+      "grad_norm": 0.43270987272262573,
+      "learning_rate": 2.4646315789473683e-05,
+      "loss": 1.7868,
+      "step": 154
+    },
+    {
+      "epoch": 0.08809320829781188,
+      "grad_norm": 0.4225779175758362,
+      "learning_rate": 2.411052631578947e-05,
+      "loss": 1.5988,
+      "step": 155
+    },
+    {
+      "epoch": 0.0886615515771526,
+      "grad_norm": 0.4306216537952423,
+      "learning_rate": 2.357473684210526e-05,
+      "loss": 1.6786,
+      "step": 156
+    },
+    {
+      "epoch": 0.08922989485649332,
+      "grad_norm": 0.44885534048080444,
+      "learning_rate": 2.3038947368421052e-05,
+      "loss": 1.5094,
+      "step": 157
+    },
+    {
+      "epoch": 0.08979823813583404,
+      "grad_norm": 0.5068619251251221,
+      "learning_rate": 2.250315789473684e-05,
+      "loss": 1.6784,
+      "step": 158
+    },
+    {
+      "epoch": 0.09036658141517477,
+      "grad_norm": 0.4916083514690399,
+      "learning_rate": 2.196736842105263e-05,
+      "loss": 1.65,
+      "step": 159
+    },
+    {
+      "epoch": 0.09093492469451549,
+      "grad_norm": 0.5132988095283508,
+      "learning_rate": 2.1431578947368418e-05,
+      "loss": 1.6544,
+      "step": 160
+    },
+    {
+      "epoch": 0.0915032679738562,
+      "grad_norm": 0.5349146127700806,
+      "learning_rate": 2.089578947368421e-05,
+      "loss": 1.6086,
+      "step": 161
+    },
+    {
+      "epoch": 0.09207161125319693,
+      "grad_norm": 0.5187984704971313,
+      "learning_rate": 2.036e-05,
+      "loss": 1.3669,
+      "step": 162
+    },
+    {
+      "epoch": 0.09263995453253765,
+      "grad_norm": 0.5901036262512207,
+      "learning_rate": 1.9824210526315787e-05,
+      "loss": 1.7017,
+      "step": 163
+    },
+    {
+      "epoch": 0.09320829781187838,
+      "grad_norm": 0.5522847175598145,
+      "learning_rate": 1.928842105263158e-05,
+      "loss": 1.4784,
+      "step": 164
+    },
+    {
+      "epoch": 0.0937766410912191,
+      "grad_norm": 0.6255651116371155,
+      "learning_rate": 1.8752631578947367e-05,
+      "loss": 1.7601,
+      "step": 165
+    },
+    {
+      "epoch": 0.09434498437055981,
+      "grad_norm": 0.586127519607544,
+      "learning_rate": 1.8216842105263156e-05,
+      "loss": 1.6422,
+      "step": 166
+    },
+    {
+      "epoch": 0.09491332764990054,
+      "grad_norm": 0.6434412598609924,
+      "learning_rate": 1.7681052631578948e-05,
+      "loss": 1.6697,
+      "step": 167
+    },
+    {
+      "epoch": 0.09548167092924126,
+      "grad_norm": 0.5888460278511047,
+      "learning_rate": 1.7145263157894736e-05,
+      "loss": 1.4801,
+      "step": 168
+    },
+    {
+      "epoch": 0.09605001420858199,
+      "grad_norm": 0.6054426431655884,
+      "learning_rate": 1.6609473684210525e-05,
+      "loss": 1.6244,
+      "step": 169
+    },
+    {
+      "epoch": 0.0966183574879227,
+      "grad_norm": 0.7235743999481201,
+      "learning_rate": 1.6073684210526313e-05,
+      "loss": 1.7011,
+      "step": 170
+    },
+    {
+      "epoch": 0.09718670076726342,
+      "grad_norm": 0.6796721816062927,
+      "learning_rate": 1.5537894736842105e-05,
+      "loss": 1.5895,
+      "step": 171
+    },
+    {
+      "epoch": 0.09775504404660415,
+      "grad_norm": 0.6769363284111023,
+      "learning_rate": 1.5002105263157892e-05,
+      "loss": 1.4874,
+      "step": 172
+    },
+    {
+      "epoch": 0.09832338732594487,
+      "grad_norm": 0.6305172443389893,
+      "learning_rate": 1.4466315789473684e-05,
+      "loss": 1.3513,
+      "step": 173
+    },
+    {
+      "epoch": 0.0988917306052856,
+      "grad_norm": 0.7441319823265076,
+      "learning_rate": 1.3930526315789474e-05,
+      "loss": 1.4042,
+      "step": 174
+    },
+    {
+      "epoch": 0.09946007388462631,
+      "grad_norm": 0.7381362318992615,
+      "learning_rate": 1.3394736842105261e-05,
+      "loss": 1.3791,
+      "step": 175
+    },
+    {
+      "epoch": 0.10002841716396704,
+      "grad_norm": 0.816709041595459,
+      "learning_rate": 1.2858947368421053e-05,
+      "loss": 1.6178,
+      "step": 176
+    },
+    {
+      "epoch": 0.10059676044330776,
+      "grad_norm": 0.7869638800621033,
+      "learning_rate": 1.2323157894736842e-05,
+      "loss": 1.3603,
+      "step": 177
+    },
+    {
+      "epoch": 0.10116510372264848,
+      "grad_norm": 0.9357262253761292,
+      "learning_rate": 1.178736842105263e-05,
+      "loss": 1.5296,
+      "step": 178
+    },
+    {
+      "epoch": 0.1017334470019892,
+      "grad_norm": 0.8037711381912231,
+      "learning_rate": 1.125157894736842e-05,
+      "loss": 1.4484,
+      "step": 179
+    },
+    {
+      "epoch": 0.10230179028132992,
+      "grad_norm": 0.9005003571510315,
+      "learning_rate": 1.0715789473684209e-05,
+      "loss": 1.4668,
+      "step": 180
+    },
+    {
+      "epoch": 0.10287013356067065,
+      "grad_norm": 0.9204942584037781,
+      "learning_rate": 1.018e-05,
+      "loss": 1.6658,
+      "step": 181
+    },
+    {
+      "epoch": 0.10343847684001137,
+      "grad_norm": 1.056218147277832,
+      "learning_rate": 9.64421052631579e-06,
+      "loss": 1.7106,
+      "step": 182
+    },
+    {
+      "epoch": 0.10400682011935208,
+      "grad_norm": 1.1173593997955322,
+      "learning_rate": 9.108421052631578e-06,
+      "loss": 1.7539,
+      "step": 183
+    },
+    {
+      "epoch": 0.10457516339869281,
+      "grad_norm": 1.0534868240356445,
+      "learning_rate": 8.572631578947368e-06,
+      "loss": 1.3405,
+      "step": 184
+    },
+    {
+      "epoch": 0.10514350667803353,
+      "grad_norm": 0.9765002131462097,
+      "learning_rate": 8.036842105263157e-06,
+      "loss": 1.5626,
+      "step": 185
+    },
+    {
+      "epoch": 0.10571184995737426,
+      "grad_norm": 1.197899341583252,
+      "learning_rate": 7.501052631578946e-06,
+      "loss": 1.596,
+      "step": 186
+    },
+    {
+      "epoch": 0.10628019323671498,
+      "grad_norm": 1.2872265577316284,
+      "learning_rate": 6.965263157894737e-06,
+      "loss": 1.5636,
+      "step": 187
+    },
+    {
+      "epoch": 0.10684853651605569,
+      "grad_norm": 1.211369276046753,
+      "learning_rate": 6.4294736842105265e-06,
+      "loss": 1.6695,
+      "step": 188
+    },
+    {
+      "epoch": 0.10741687979539642,
+      "grad_norm": 1.3407747745513916,
+      "learning_rate": 5.893684210526315e-06,
+      "loss": 1.675,
+      "step": 189
+    },
+    {
+      "epoch": 0.10798522307473714,
+      "grad_norm": 1.2818570137023926,
+      "learning_rate": 5.3578947368421044e-06,
+      "loss": 1.496,
+      "step": 190
+    },
+    {
+      "epoch": 0.10855356635407787,
+      "grad_norm": 1.1480392217636108,
+      "learning_rate": 4.822105263157895e-06,
+      "loss": 1.5432,
+      "step": 191
+    },
+    {
+      "epoch": 0.10912190963341858,
+      "grad_norm": 1.3711426258087158,
+      "learning_rate": 4.286315789473684e-06,
+      "loss": 1.4977,
+      "step": 192
+    },
+    {
+      "epoch": 0.1096902529127593,
+      "grad_norm": 1.4758083820343018,
+      "learning_rate": 3.750526315789473e-06,
+      "loss": 1.0595,
+      "step": 193
+    },
+    {
+      "epoch": 0.11025859619210003,
+      "grad_norm": 1.748803734779358,
+      "learning_rate": 3.2147368421052633e-06,
+      "loss": 1.5774,
+      "step": 194
+    },
+    {
+      "epoch": 0.11082693947144075,
+      "grad_norm": 1.7619532346725464,
+      "learning_rate": 2.6789473684210522e-06,
+      "loss": 1.5964,
+      "step": 195
+    },
+    {
+      "epoch": 0.11139528275078148,
+      "grad_norm": 1.7181921005249023,
+      "learning_rate": 2.143157894736842e-06,
+      "loss": 1.1225,
+      "step": 196
+    },
+    {
+      "epoch": 0.11196362603012219,
+      "grad_norm": 1.7596096992492676,
+      "learning_rate": 1.6073684210526316e-06,
+      "loss": 1.6769,
+      "step": 197
+    },
+    {
+      "epoch": 0.11253196930946291,
+      "grad_norm": 2.1839563846588135,
+      "learning_rate": 1.071578947368421e-06,
+      "loss": 1.4875,
+      "step": 198
+    },
+    {
+      "epoch": 0.11310031258880364,
+      "grad_norm": 2.683014154434204,
+      "learning_rate": 5.357894736842105e-07,
+      "loss": 2.4549,
+      "step": 199
+    },
+    {
+      "epoch": 0.11366865586814436,
+      "grad_norm": 3.496476888656616,
+      "learning_rate": 0.0,
+      "loss": 2.27,
+      "step": 200
+    },
+    {
+      "epoch": 0.11366865586814436,
+      "eval_loss": 1.630453109741211,
+      "eval_runtime": 37.4531,
+      "eval_samples_per_second": 19.785,
+      "eval_steps_per_second": 4.966,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.835422040267162e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null