Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3864acfd1014d1a0df8fa27f82cfce48218ec637e63876f792c3bed4c0f1a5e
 size 406863720

 version https://git-lfs.github.com/spec/v1
+oid sha256:6dad9abbf8bc0bc85d7a6855eef266e8316865f25cbf437b81052bd71bb0925d
 size 406863720

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a211302c27b69a338500ec852ad9e883d840459d8a2d3446d7d8c14b02d137fa
 size 207013892

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d93587f99744a48f80040138f37c810fedc14962defd778baf99b1b370e8b66
 size 207013892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e1aa6e4674a8b4ab54dbebd0dd5739511029a591dcd8c88101728092c1e72f7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4573b1446910d3d1f9cc5fe520213739360e4bb421639354996d7d0e468220ce
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3388093bd4c4d331382d0016aa94fd7868af3fa4dd50305298d21879ce809e93
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c16a9a2b8856e8f64eb7194578c6fcbc8625033d1caa318cc7b80ad824088880
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.1450011283159256,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.2544529262086514,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 16.324,
       "eval_steps_per_second": 4.13,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6838380486656e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.1360974758863449,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.3816793893129771,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.324,
       "eval_steps_per_second": 4.13,
       "step": 100
+    },
+    {
+      "epoch": 0.25699745547073793,
+      "grad_norm": 0.9659016728401184,
+      "learning_rate": 5.219384210526315e-05,
+      "loss": 0.2955,
+      "step": 101
+    },
+    {
+      "epoch": 0.2595419847328244,
+      "grad_norm": 0.5893524885177612,
+      "learning_rate": 5.1666631578947374e-05,
+      "loss": 0.158,
+      "step": 102
+    },
+    {
+      "epoch": 0.26208651399491095,
+      "grad_norm": 0.561215877532959,
+      "learning_rate": 5.113942105263158e-05,
+      "loss": 0.1543,
+      "step": 103
+    },
+    {
+      "epoch": 0.26463104325699743,
+      "grad_norm": 0.5634675621986389,
+      "learning_rate": 5.061221052631579e-05,
+      "loss": 0.1578,
+      "step": 104
+    },
+    {
+      "epoch": 0.26717557251908397,
+      "grad_norm": 0.7371407747268677,
+      "learning_rate": 5.0085e-05,
+      "loss": 0.1396,
+      "step": 105
+    },
+    {
+      "epoch": 0.2697201017811705,
+      "grad_norm": 0.62689608335495,
+      "learning_rate": 4.955778947368421e-05,
+      "loss": 0.1542,
+      "step": 106
+    },
+    {
+      "epoch": 0.272264631043257,
+      "grad_norm": 0.5553103685379028,
+      "learning_rate": 4.903057894736842e-05,
+      "loss": 0.0909,
+      "step": 107
+    },
+    {
+      "epoch": 0.2748091603053435,
+      "grad_norm": 0.5198187828063965,
+      "learning_rate": 4.850336842105263e-05,
+      "loss": 0.0785,
+      "step": 108
+    },
+    {
+      "epoch": 0.27735368956743,
+      "grad_norm": 0.7179524898529053,
+      "learning_rate": 4.797615789473684e-05,
+      "loss": 0.1036,
+      "step": 109
+    },
+    {
+      "epoch": 0.27989821882951654,
+      "grad_norm": 0.44508594274520874,
+      "learning_rate": 4.744894736842105e-05,
+      "loss": 0.1104,
+      "step": 110
+    },
+    {
+      "epoch": 0.2824427480916031,
+      "grad_norm": 0.7336511015892029,
+      "learning_rate": 4.692173684210526e-05,
+      "loss": 0.1067,
+      "step": 111
+    },
+    {
+      "epoch": 0.28498727735368956,
+      "grad_norm": 0.9355735778808594,
+      "learning_rate": 4.639452631578947e-05,
+      "loss": 0.1675,
+      "step": 112
+    },
+    {
+      "epoch": 0.2875318066157761,
+      "grad_norm": 0.46843382716178894,
+      "learning_rate": 4.586731578947368e-05,
+      "loss": 0.0723,
+      "step": 113
+    },
+    {
+      "epoch": 0.2900763358778626,
+      "grad_norm": 0.5565648078918457,
+      "learning_rate": 4.5340105263157894e-05,
+      "loss": 0.0579,
+      "step": 114
+    },
+    {
+      "epoch": 0.2926208651399491,
+      "grad_norm": 0.35323649644851685,
+      "learning_rate": 4.48128947368421e-05,
+      "loss": 0.0532,
+      "step": 115
+    },
+    {
+      "epoch": 0.2951653944020356,
+      "grad_norm": 0.41509339213371277,
+      "learning_rate": 4.428568421052632e-05,
+      "loss": 0.0788,
+      "step": 116
+    },
+    {
+      "epoch": 0.29770992366412213,
+      "grad_norm": 0.4781738817691803,
+      "learning_rate": 4.3758473684210525e-05,
+      "loss": 0.0939,
+      "step": 117
+    },
+    {
+      "epoch": 0.30025445292620867,
+      "grad_norm": 0.5751485824584961,
+      "learning_rate": 4.323126315789474e-05,
+      "loss": 0.0883,
+      "step": 118
+    },
+    {
+      "epoch": 0.30279898218829515,
+      "grad_norm": 0.2596683204174042,
+      "learning_rate": 4.270405263157895e-05,
+      "loss": 0.0365,
+      "step": 119
+    },
+    {
+      "epoch": 0.3053435114503817,
+      "grad_norm": 0.572528600692749,
+      "learning_rate": 4.217684210526316e-05,
+      "loss": 0.0871,
+      "step": 120
+    },
+    {
+      "epoch": 0.30788804071246817,
+      "grad_norm": 0.5119253396987915,
+      "learning_rate": 4.164963157894737e-05,
+      "loss": 0.0973,
+      "step": 121
+    },
+    {
+      "epoch": 0.3104325699745547,
+      "grad_norm": 0.5054477453231812,
+      "learning_rate": 4.112242105263158e-05,
+      "loss": 0.0494,
+      "step": 122
+    },
+    {
+      "epoch": 0.31297709923664124,
+      "grad_norm": 0.3897090256214142,
+      "learning_rate": 4.059521052631579e-05,
+      "loss": 0.0333,
+      "step": 123
+    },
+    {
+      "epoch": 0.3155216284987277,
+      "grad_norm": 0.2573760747909546,
+      "learning_rate": 4.0068e-05,
+      "loss": 0.0229,
+      "step": 124
+    },
+    {
+      "epoch": 0.31806615776081426,
+      "grad_norm": 0.28332197666168213,
+      "learning_rate": 3.954078947368421e-05,
+      "loss": 0.0257,
+      "step": 125
+    },
+    {
+      "epoch": 0.32061068702290074,
+      "grad_norm": 0.446418434381485,
+      "learning_rate": 3.901357894736842e-05,
+      "loss": 0.0825,
+      "step": 126
+    },
+    {
+      "epoch": 0.3231552162849873,
+      "grad_norm": 0.29756420850753784,
+      "learning_rate": 3.848636842105263e-05,
+      "loss": 0.0242,
+      "step": 127
+    },
+    {
+      "epoch": 0.3256997455470738,
+      "grad_norm": 0.5935866236686707,
+      "learning_rate": 3.795915789473684e-05,
+      "loss": 0.0925,
+      "step": 128
+    },
+    {
+      "epoch": 0.3282442748091603,
+      "grad_norm": 0.2986157536506653,
+      "learning_rate": 3.743194736842105e-05,
+      "loss": 0.0154,
+      "step": 129
+    },
+    {
+      "epoch": 0.33078880407124683,
+      "grad_norm": 0.03564433753490448,
+      "learning_rate": 3.690473684210526e-05,
+      "loss": 0.0024,
+      "step": 130
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.021472949534654617,
+      "learning_rate": 3.6377526315789475e-05,
+      "loss": 0.0011,
+      "step": 131
+    },
+    {
+      "epoch": 0.33587786259541985,
+      "grad_norm": 1.2343424558639526,
+      "learning_rate": 3.585031578947368e-05,
+      "loss": 0.3226,
+      "step": 132
+    },
+    {
+      "epoch": 0.3384223918575064,
+      "grad_norm": 1.3323383331298828,
+      "learning_rate": 3.532310526315789e-05,
+      "loss": 0.2967,
+      "step": 133
+    },
+    {
+      "epoch": 0.34096692111959287,
+      "grad_norm": 1.58578360080719,
+      "learning_rate": 3.4795894736842106e-05,
+      "loss": 0.2687,
+      "step": 134
+    },
+    {
+      "epoch": 0.3435114503816794,
+      "grad_norm": 1.3783105611801147,
+      "learning_rate": 3.4268684210526314e-05,
+      "loss": 0.2798,
+      "step": 135
+    },
+    {
+      "epoch": 0.3460559796437659,
+      "grad_norm": 1.470922827720642,
+      "learning_rate": 3.374147368421052e-05,
+      "loss": 0.3177,
+      "step": 136
+    },
+    {
+      "epoch": 0.3486005089058524,
+      "grad_norm": 1.449453592300415,
+      "learning_rate": 3.321426315789473e-05,
+      "loss": 0.1875,
+      "step": 137
+    },
+    {
+      "epoch": 0.3511450381679389,
+      "grad_norm": 1.273271083831787,
+      "learning_rate": 3.2687052631578946e-05,
+      "loss": 0.2517,
+      "step": 138
+    },
+    {
+      "epoch": 0.35368956743002544,
+      "grad_norm": 1.2989132404327393,
+      "learning_rate": 3.2159842105263154e-05,
+      "loss": 0.1741,
+      "step": 139
+    },
+    {
+      "epoch": 0.356234096692112,
+      "grad_norm": 1.1349838972091675,
+      "learning_rate": 3.163263157894737e-05,
+      "loss": 0.2073,
+      "step": 140
+    },
+    {
+      "epoch": 0.35877862595419846,
+      "grad_norm": 1.2873899936676025,
+      "learning_rate": 3.110542105263158e-05,
+      "loss": 0.1692,
+      "step": 141
+    },
+    {
+      "epoch": 0.361323155216285,
+      "grad_norm": 1.297892689704895,
+      "learning_rate": 3.057821052631579e-05,
+      "loss": 0.1529,
+      "step": 142
+    },
+    {
+      "epoch": 0.3638676844783715,
+      "grad_norm": 1.0262969732284546,
+      "learning_rate": 3.0050999999999997e-05,
+      "loss": 0.1269,
+      "step": 143
+    },
+    {
+      "epoch": 0.366412213740458,
+      "grad_norm": 1.489499807357788,
+      "learning_rate": 2.9523789473684206e-05,
+      "loss": 0.2182,
+      "step": 144
+    },
+    {
+      "epoch": 0.36895674300254455,
+      "grad_norm": 2.6656413078308105,
+      "learning_rate": 2.899657894736842e-05,
+      "loss": 0.1811,
+      "step": 145
+    },
+    {
+      "epoch": 0.37150127226463103,
+      "grad_norm": 1.5800155401229858,
+      "learning_rate": 2.846936842105263e-05,
+      "loss": 0.1364,
+      "step": 146
+    },
+    {
+      "epoch": 0.37404580152671757,
+      "grad_norm": 0.6563022136688232,
+      "learning_rate": 2.794215789473684e-05,
+      "loss": 0.0965,
+      "step": 147
+    },
+    {
+      "epoch": 0.37659033078880405,
+      "grad_norm": 1.1012194156646729,
+      "learning_rate": 2.7414947368421056e-05,
+      "loss": 0.097,
+      "step": 148
+    },
+    {
+      "epoch": 0.3791348600508906,
+      "grad_norm": 1.3474540710449219,
+      "learning_rate": 2.6887736842105264e-05,
+      "loss": 0.1278,
+      "step": 149
+    },
+    {
+      "epoch": 0.3816793893129771,
+      "grad_norm": 1.2162439823150635,
+      "learning_rate": 2.6360526315789472e-05,
+      "loss": 0.1464,
+      "step": 150
+    },
+    {
+      "epoch": 0.3816793893129771,
+      "eval_loss": 0.1360974758863449,
+      "eval_runtime": 10.1762,
+      "eval_samples_per_second": 16.313,
+      "eval_steps_per_second": 4.127,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.5257570729984e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null