Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a0b314709ac39deea9ebc5eab56fdd6bc7371487ae26a6f9dd07243eafdd234
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1ae0ab8ecd2934b6e70a7adcd2600ffd2233968f80f6545663962fd28220b92
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:449f0a1035e0ffd145c266ac5b7bd405bbb6f417197232619da0404e634d5d3f
 size 591203178

 version https://git-lfs.github.com/spec/v1
+oid sha256:1cec4294daf8ea20b863aee64e06529bab850a7433ab64ba876965cb4b43f3fe
 size 591203178

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f479335c3ae7a11ab4e36ff93d500c4b5520057cb82bf80528ad5587c31ec3ca
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:778e2175a87c1aec44c33904920c591b2a56327aea48f90e830a2c7157b01988
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7458124756813049,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.08421052631578947,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 24.435,
       "eval_steps_per_second": 12.218,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.946230480207872e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7339931726455688,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.11228070175438597,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 24.435,
       "eval_steps_per_second": 12.218,
       "step": 150
+    },
+    {
+      "epoch": 0.0847719298245614,
+      "grad_norm": 0.3122590482234955,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.8948,
+      "step": 151
+    },
+    {
+      "epoch": 0.08533333333333333,
+      "grad_norm": 0.3245391845703125,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.7903,
+      "step": 152
+    },
+    {
+      "epoch": 0.08589473684210526,
+      "grad_norm": 0.28587210178375244,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.7252,
+      "step": 153
+    },
+    {
+      "epoch": 0.08645614035087719,
+      "grad_norm": 0.3171529173851013,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.6687,
+      "step": 154
+    },
+    {
+      "epoch": 0.08701754385964912,
+      "grad_norm": 0.2921241223812103,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.6201,
+      "step": 155
+    },
+    {
+      "epoch": 0.08757894736842105,
+      "grad_norm": 0.26473841071128845,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.6518,
+      "step": 156
+    },
+    {
+      "epoch": 0.08814035087719298,
+      "grad_norm": 0.33115243911743164,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.8362,
+      "step": 157
+    },
+    {
+      "epoch": 0.0887017543859649,
+      "grad_norm": 0.2714337110519409,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.6746,
+      "step": 158
+    },
+    {
+      "epoch": 0.08926315789473684,
+      "grad_norm": 0.2988300025463104,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.7037,
+      "step": 159
+    },
+    {
+      "epoch": 0.08982456140350877,
+      "grad_norm": 0.31018441915512085,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.7861,
+      "step": 160
+    },
+    {
+      "epoch": 0.09038596491228071,
+      "grad_norm": 0.2746640145778656,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.7249,
+      "step": 161
+    },
+    {
+      "epoch": 0.09094736842105264,
+      "grad_norm": 0.2654082179069519,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.7455,
+      "step": 162
+    },
+    {
+      "epoch": 0.09150877192982457,
+      "grad_norm": 0.24473796784877777,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.6227,
+      "step": 163
+    },
+    {
+      "epoch": 0.0920701754385965,
+      "grad_norm": 0.2504828870296478,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.7073,
+      "step": 164
+    },
+    {
+      "epoch": 0.09263157894736843,
+      "grad_norm": 0.30534666776657104,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.6981,
+      "step": 165
+    },
+    {
+      "epoch": 0.09319298245614036,
+      "grad_norm": 0.30247023701667786,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.6736,
+      "step": 166
+    },
+    {
+      "epoch": 0.09375438596491228,
+      "grad_norm": 0.4104022979736328,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.754,
+      "step": 167
+    },
+    {
+      "epoch": 0.09431578947368421,
+      "grad_norm": 0.3200322091579437,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.5872,
+      "step": 168
+    },
+    {
+      "epoch": 0.09487719298245614,
+      "grad_norm": 0.353659063577652,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.8617,
+      "step": 169
+    },
+    {
+      "epoch": 0.09543859649122807,
+      "grad_norm": 0.3442855179309845,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.7441,
+      "step": 170
+    },
+    {
+      "epoch": 0.096,
+      "grad_norm": 0.3479902744293213,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.7429,
+      "step": 171
+    },
+    {
+      "epoch": 0.09656140350877193,
+      "grad_norm": 0.3734584152698517,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.7839,
+      "step": 172
+    },
+    {
+      "epoch": 0.09712280701754386,
+      "grad_norm": 0.3731192648410797,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.8681,
+      "step": 173
+    },
+    {
+      "epoch": 0.09768421052631579,
+      "grad_norm": 0.4046560525894165,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.9747,
+      "step": 174
+    },
+    {
+      "epoch": 0.09824561403508772,
+      "grad_norm": 0.5003874897956848,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.7606,
+      "step": 175
+    },
+    {
+      "epoch": 0.09880701754385965,
+      "grad_norm": 0.5039113759994507,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.7671,
+      "step": 176
+    },
+    {
+      "epoch": 0.09936842105263158,
+      "grad_norm": 0.5854450464248657,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.7763,
+      "step": 177
+    },
+    {
+      "epoch": 0.09992982456140351,
+      "grad_norm": 0.5254133343696594,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.7363,
+      "step": 178
+    },
+    {
+      "epoch": 0.10049122807017544,
+      "grad_norm": 0.5278907418251038,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.7107,
+      "step": 179
+    },
+    {
+      "epoch": 0.10105263157894737,
+      "grad_norm": 0.6159325242042542,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.6757,
+      "step": 180
+    },
+    {
+      "epoch": 0.1016140350877193,
+      "grad_norm": 0.6477799415588379,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.6614,
+      "step": 181
+    },
+    {
+      "epoch": 0.10217543859649122,
+      "grad_norm": 0.5958523154258728,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.7575,
+      "step": 182
+    },
+    {
+      "epoch": 0.10273684210526315,
+      "grad_norm": 0.6421895027160645,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.4513,
+      "step": 183
+    },
+    {
+      "epoch": 0.10329824561403508,
+      "grad_norm": 0.7550863027572632,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.6119,
+      "step": 184
+    },
+    {
+      "epoch": 0.10385964912280701,
+      "grad_norm": 0.7374166250228882,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.5968,
+      "step": 185
+    },
+    {
+      "epoch": 0.10442105263157894,
+      "grad_norm": 0.755599319934845,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.5619,
+      "step": 186
+    },
+    {
+      "epoch": 0.10498245614035087,
+      "grad_norm": 0.8369899988174438,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.5447,
+      "step": 187
+    },
+    {
+      "epoch": 0.1055438596491228,
+      "grad_norm": 0.875713050365448,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.5324,
+      "step": 188
+    },
+    {
+      "epoch": 0.10610526315789473,
+      "grad_norm": 0.8864160776138306,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.5216,
+      "step": 189
+    },
+    {
+      "epoch": 0.10666666666666667,
+      "grad_norm": 0.9906832575798035,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.6048,
+      "step": 190
+    },
+    {
+      "epoch": 0.1072280701754386,
+      "grad_norm": 1.3534337282180786,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.5319,
+      "step": 191
+    },
+    {
+      "epoch": 0.10778947368421053,
+      "grad_norm": 1.093711256980896,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.4393,
+      "step": 192
+    },
+    {
+      "epoch": 0.10835087719298246,
+      "grad_norm": 0.874680757522583,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.5225,
+      "step": 193
+    },
+    {
+      "epoch": 0.10891228070175439,
+      "grad_norm": 0.8974077105522156,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.7351,
+      "step": 194
+    },
+    {
+      "epoch": 0.10947368421052632,
+      "grad_norm": 0.8559723496437073,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.7442,
+      "step": 195
+    },
+    {
+      "epoch": 0.11003508771929825,
+      "grad_norm": 0.8494490385055542,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.638,
+      "step": 196
+    },
+    {
+      "epoch": 0.11059649122807018,
+      "grad_norm": 0.8759176731109619,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.8537,
+      "step": 197
+    },
+    {
+      "epoch": 0.11115789473684211,
+      "grad_norm": 1.0472686290740967,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.909,
+      "step": 198
+    },
+    {
+      "epoch": 0.11171929824561404,
+      "grad_norm": 1.3545598983764648,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 1.1257,
+      "step": 199
+    },
+    {
+      "epoch": 0.11228070175438597,
+      "grad_norm": 1.787885069847107,
+      "learning_rate": 0.0,
+      "loss": 1.7459,
+      "step": 200
+    },
+    {
+      "epoch": 0.11228070175438597,
+      "eval_loss": 0.7339931726455688,
+      "eval_runtime": 122.6295,
+      "eval_samples_per_second": 24.464,
+      "eval_steps_per_second": 12.232,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.579098864320512e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null