Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:245cfb0e3c67cc5b15c36a6addb7a9a87f7b80bc5b05c497a012863188d6a793
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:99b9eda75dbdde8aefa10a17da2cb05fefce7bc42c0fdfd9a08c11ef9094ddeb
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee245058381e6aff9ec8f7bbc5edf46706363b55a562d17c47e5adcae5a5ea57
 size 1279641042

 version https://git-lfs.github.com/spec/v1
+oid sha256:30f2f42236462338e9b838455fd0adbc45db1674e9d0fa40534e7a7984afd5d3
 size 1279641042

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d561c013e5d918a234ffe067988ca500e7e91f04bfab6a34a74a15f83c177b1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:faff995763bc4e40a36c0b4f84b69116005fe95c667b33e55d6c85c5745755b1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7522867918014526,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.20429009193054137,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 11.95,
       "eval_steps_per_second": 5.98,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.10612630061056e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7441057562828064,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.27238678924072185,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.95,
       "eval_steps_per_second": 5.98,
       "step": 150
+    },
+    {
+      "epoch": 0.20565202587674497,
+      "grad_norm": 0.17255501449108124,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.6241,
+      "step": 151
+    },
+    {
+      "epoch": 0.2070139598229486,
+      "grad_norm": 0.15434518456459045,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.5175,
+      "step": 152
+    },
+    {
+      "epoch": 0.2083758937691522,
+      "grad_norm": 0.16057954728603363,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.5843,
+      "step": 153
+    },
+    {
+      "epoch": 0.20973782771535582,
+      "grad_norm": 0.16260911524295807,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.6183,
+      "step": 154
+    },
+    {
+      "epoch": 0.21109976166155942,
+      "grad_norm": 0.180234894156456,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.6693,
+      "step": 155
+    },
+    {
+      "epoch": 0.212461695607763,
+      "grad_norm": 0.21156199276447296,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.726,
+      "step": 156
+    },
+    {
+      "epoch": 0.21382362955396664,
+      "grad_norm": 0.18673358857631683,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.6639,
+      "step": 157
+    },
+    {
+      "epoch": 0.21518556350017023,
+      "grad_norm": 0.19483909010887146,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.6797,
+      "step": 158
+    },
+    {
+      "epoch": 0.21654749744637386,
+      "grad_norm": 0.2238985300064087,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.6316,
+      "step": 159
+    },
+    {
+      "epoch": 0.21790943139257746,
+      "grad_norm": 0.22773568332195282,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.7293,
+      "step": 160
+    },
+    {
+      "epoch": 0.21927136533878108,
+      "grad_norm": 0.22139477729797363,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.5769,
+      "step": 161
+    },
+    {
+      "epoch": 0.22063329928498468,
+      "grad_norm": 0.2327103167772293,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.7907,
+      "step": 162
+    },
+    {
+      "epoch": 0.22199523323118828,
+      "grad_norm": 0.22104878723621368,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.6322,
+      "step": 163
+    },
+    {
+      "epoch": 0.2233571671773919,
+      "grad_norm": 0.2553716003894806,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.7341,
+      "step": 164
+    },
+    {
+      "epoch": 0.2247191011235955,
+      "grad_norm": 0.2540958523750305,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.7212,
+      "step": 165
+    },
+    {
+      "epoch": 0.22608103506979912,
+      "grad_norm": 0.24824371933937073,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.7148,
+      "step": 166
+    },
+    {
+      "epoch": 0.22744296901600272,
+      "grad_norm": 0.2560056447982788,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.8558,
+      "step": 167
+    },
+    {
+      "epoch": 0.22880490296220635,
+      "grad_norm": 0.2578204274177551,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.7938,
+      "step": 168
+    },
+    {
+      "epoch": 0.23016683690840994,
+      "grad_norm": 0.23950396478176117,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.7336,
+      "step": 169
+    },
+    {
+      "epoch": 0.23152877085461354,
+      "grad_norm": 0.2819407880306244,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.8381,
+      "step": 170
+    },
+    {
+      "epoch": 0.23289070480081717,
+      "grad_norm": 0.2772591710090637,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.6564,
+      "step": 171
+    },
+    {
+      "epoch": 0.23425263874702076,
+      "grad_norm": 0.28909778594970703,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.7016,
+      "step": 172
+    },
+    {
+      "epoch": 0.2356145726932244,
+      "grad_norm": 0.2975641191005707,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.7454,
+      "step": 173
+    },
+    {
+      "epoch": 0.236976506639428,
+      "grad_norm": 0.3052116334438324,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.7788,
+      "step": 174
+    },
+    {
+      "epoch": 0.23833844058563158,
+      "grad_norm": 0.3118731677532196,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.68,
+      "step": 175
+    },
+    {
+      "epoch": 0.2397003745318352,
+      "grad_norm": 0.35767170786857605,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.7429,
+      "step": 176
+    },
+    {
+      "epoch": 0.2410623084780388,
+      "grad_norm": 0.361837238073349,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.8067,
+      "step": 177
+    },
+    {
+      "epoch": 0.24242424242424243,
+      "grad_norm": 0.3700478971004486,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.7809,
+      "step": 178
+    },
+    {
+      "epoch": 0.24378617637044603,
+      "grad_norm": 0.39388328790664673,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.6992,
+      "step": 179
+    },
+    {
+      "epoch": 0.24514811031664965,
+      "grad_norm": 0.43889668583869934,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.8435,
+      "step": 180
+    },
+    {
+      "epoch": 0.24651004426285325,
+      "grad_norm": 0.45104825496673584,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.6705,
+      "step": 181
+    },
+    {
+      "epoch": 0.24787197820905685,
+      "grad_norm": 0.4363878071308136,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.7322,
+      "step": 182
+    },
+    {
+      "epoch": 0.24923391215526047,
+      "grad_norm": 0.35879090428352356,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.722,
+      "step": 183
+    },
+    {
+      "epoch": 0.25059584610146407,
+      "grad_norm": 0.43597957491874695,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.7197,
+      "step": 184
+    },
+    {
+      "epoch": 0.25195778004766767,
+      "grad_norm": 0.49777480959892273,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.827,
+      "step": 185
+    },
+    {
+      "epoch": 0.2533197139938713,
+      "grad_norm": 0.41167500615119934,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.6546,
+      "step": 186
+    },
+    {
+      "epoch": 0.2546816479400749,
+      "grad_norm": 0.4394213557243347,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.8141,
+      "step": 187
+    },
+    {
+      "epoch": 0.2560435818862785,
+      "grad_norm": 0.556496798992157,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.8927,
+      "step": 188
+    },
+    {
+      "epoch": 0.2574055158324821,
+      "grad_norm": 0.5145028829574585,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.7661,
+      "step": 189
+    },
+    {
+      "epoch": 0.2587674497786857,
+      "grad_norm": 0.5240097045898438,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.7814,
+      "step": 190
+    },
+    {
+      "epoch": 0.26012938372488936,
+      "grad_norm": 0.4952259063720703,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.76,
+      "step": 191
+    },
+    {
+      "epoch": 0.26149131767109296,
+      "grad_norm": 0.44523516297340393,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.7334,
+      "step": 192
+    },
+    {
+      "epoch": 0.26285325161729656,
+      "grad_norm": 0.5299931764602661,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.7903,
+      "step": 193
+    },
+    {
+      "epoch": 0.26421518556350015,
+      "grad_norm": 0.4894885718822479,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.7987,
+      "step": 194
+    },
+    {
+      "epoch": 0.26557711950970375,
+      "grad_norm": 0.5092527270317078,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.7906,
+      "step": 195
+    },
+    {
+      "epoch": 0.2669390534559074,
+      "grad_norm": 0.48224949836730957,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.8502,
+      "step": 196
+    },
+    {
+      "epoch": 0.268300987402111,
+      "grad_norm": 0.6226413249969482,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.9195,
+      "step": 197
+    },
+    {
+      "epoch": 0.2696629213483146,
+      "grad_norm": 0.5972543954849243,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.8265,
+      "step": 198
+    },
+    {
+      "epoch": 0.2710248552945182,
+      "grad_norm": 0.6523533463478088,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.5512,
+      "step": 199
+    },
+    {
+      "epoch": 0.27238678924072185,
+      "grad_norm": 1.3673629760742188,
+      "learning_rate": 0.0,
+      "loss": 1.0819,
+      "step": 200
+    },
+    {
+      "epoch": 0.27238678924072185,
+      "eval_loss": 0.7441057562828064,
+      "eval_runtime": 103.3871,
+      "eval_samples_per_second": 11.965,
+      "eval_steps_per_second": 5.987,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.8092898738176e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null