Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7348054981fb2011924b590abb8e0633418112f07004d373ed7804b81b17a497
 size 338298

 version https://git-lfs.github.com/spec/v1
+oid sha256:aec5b26cba16128c74ba7a1a5729499a4761b866a468ba4ac68d424eab2dea48
 size 338298

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19dc736042d9073f25fb9d08d200480f2938272ba16becd5abb982c2aaa4ac9c
 size 418030

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d4d22503118a9becac03e86f000715af0dfc6b3af7c99dacc3bed945fb2bf69
 size 418030

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f708827b1e3d5ff7cf2230f8a433643ae0535c072a3e859a5815d6d25eaa4dbd
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:031103b268cf61237a979d0b97ca6b53ed878d4ac672557db5812dac9a8c5bb9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 6.8596086502075195,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.025283384602418777,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 643.584,
       "eval_steps_per_second": 160.896,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2998719479808.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 6.858746528625488,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.0337111794698917,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 643.584,
       "eval_steps_per_second": 160.896,
       "step": 150
+    },
+    {
+      "epoch": 0.025451940499768236,
+      "grad_norm": 0.23164325952529907,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 6.8592,
+      "step": 151
+    },
+    {
+      "epoch": 0.025620496397117692,
+      "grad_norm": 0.18157918751239777,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 6.8511,
+      "step": 152
+    },
+    {
+      "epoch": 0.025789052294467152,
+      "grad_norm": 0.1954401731491089,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 6.849,
+      "step": 153
+    },
+    {
+      "epoch": 0.02595760819181661,
+      "grad_norm": 0.18820726871490479,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 6.8537,
+      "step": 154
+    },
+    {
+      "epoch": 0.02612616408916607,
+      "grad_norm": 0.15359963476657867,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 6.8479,
+      "step": 155
+    },
+    {
+      "epoch": 0.026294719986515527,
+      "grad_norm": 0.19926409423351288,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 6.8536,
+      "step": 156
+    },
+    {
+      "epoch": 0.026463275883864986,
+      "grad_norm": 0.11309228092432022,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 6.86,
+      "step": 157
+    },
+    {
+      "epoch": 0.026631831781214445,
+      "grad_norm": 0.15824587643146515,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 6.8548,
+      "step": 158
+    },
+    {
+      "epoch": 0.026800387678563905,
+      "grad_norm": 0.1380545049905777,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 6.8611,
+      "step": 159
+    },
+    {
+      "epoch": 0.02696894357591336,
+      "grad_norm": 0.2048022598028183,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 6.8438,
+      "step": 160
+    },
+    {
+      "epoch": 0.02713749947326282,
+      "grad_norm": 0.1299951672554016,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 6.8549,
+      "step": 161
+    },
+    {
+      "epoch": 0.02730605537061228,
+      "grad_norm": 0.11497420817613602,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 6.8614,
+      "step": 162
+    },
+    {
+      "epoch": 0.02747461126796174,
+      "grad_norm": 0.12729859352111816,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 6.8578,
+      "step": 163
+    },
+    {
+      "epoch": 0.027643167165311195,
+      "grad_norm": 0.11626887321472168,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 6.8629,
+      "step": 164
+    },
+    {
+      "epoch": 0.027811723062660654,
+      "grad_norm": 0.13181337714195251,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 6.8508,
+      "step": 165
+    },
+    {
+      "epoch": 0.027980278960010114,
+      "grad_norm": 0.1246551051735878,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 6.8631,
+      "step": 166
+    },
+    {
+      "epoch": 0.028148834857359573,
+      "grad_norm": 0.12475062161684036,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 6.8556,
+      "step": 167
+    },
+    {
+      "epoch": 0.02831739075470903,
+      "grad_norm": 0.12965960800647736,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 6.8542,
+      "step": 168
+    },
+    {
+      "epoch": 0.02848594665205849,
+      "grad_norm": 0.1600303202867508,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 6.8493,
+      "step": 169
+    },
+    {
+      "epoch": 0.028654502549407948,
+      "grad_norm": 0.12555383145809174,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 6.8618,
+      "step": 170
+    },
+    {
+      "epoch": 0.028823058446757407,
+      "grad_norm": 0.12346375733613968,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 6.8596,
+      "step": 171
+    },
+    {
+      "epoch": 0.028991614344106863,
+      "grad_norm": 0.1225038692355156,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 6.8499,
+      "step": 172
+    },
+    {
+      "epoch": 0.029160170241456323,
+      "grad_norm": 0.1317131370306015,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 6.8587,
+      "step": 173
+    },
+    {
+      "epoch": 0.029328726138805782,
+      "grad_norm": 0.11677898466587067,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 6.8579,
+      "step": 174
+    },
+    {
+      "epoch": 0.02949728203615524,
+      "grad_norm": 0.12473507225513458,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 6.8596,
+      "step": 175
+    },
+    {
+      "epoch": 0.029665837933504698,
+      "grad_norm": 0.12394051253795624,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 6.8598,
+      "step": 176
+    },
+    {
+      "epoch": 0.029834393830854157,
+      "grad_norm": 0.11931053549051285,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 6.8553,
+      "step": 177
+    },
+    {
+      "epoch": 0.030002949728203616,
+      "grad_norm": 0.1377364993095398,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 6.8594,
+      "step": 178
+    },
+    {
+      "epoch": 0.030171505625553072,
+      "grad_norm": 0.11901822686195374,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 6.862,
+      "step": 179
+    },
+    {
+      "epoch": 0.030340061522902532,
+      "grad_norm": 0.14693230390548706,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 6.8631,
+      "step": 180
+    },
+    {
+      "epoch": 0.03050861742025199,
+      "grad_norm": 0.12631940841674805,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 6.8491,
+      "step": 181
+    },
+    {
+      "epoch": 0.03067717331760145,
+      "grad_norm": 0.1178191602230072,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 6.8565,
+      "step": 182
+    },
+    {
+      "epoch": 0.030845729214950907,
+      "grad_norm": 0.1283978372812271,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 6.8597,
+      "step": 183
+    },
+    {
+      "epoch": 0.031014285112300366,
+      "grad_norm": 0.13060475885868073,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 6.8603,
+      "step": 184
+    },
+    {
+      "epoch": 0.031182841009649825,
+      "grad_norm": 0.1462021917104721,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 6.8643,
+      "step": 185
+    },
+    {
+      "epoch": 0.031351396906999285,
+      "grad_norm": 0.1487695276737213,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 6.8584,
+      "step": 186
+    },
+    {
+      "epoch": 0.031519952804348744,
+      "grad_norm": 0.12561187148094177,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 6.8626,
+      "step": 187
+    },
+    {
+      "epoch": 0.031688508701698204,
+      "grad_norm": 0.12780500948429108,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 6.8598,
+      "step": 188
+    },
+    {
+      "epoch": 0.031857064599047656,
+      "grad_norm": 0.14706137776374817,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 6.8638,
+      "step": 189
+    },
+    {
+      "epoch": 0.032025620496397116,
+      "grad_norm": 0.12453869730234146,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 6.8556,
+      "step": 190
+    },
+    {
+      "epoch": 0.032194176393746575,
+      "grad_norm": 0.1434018611907959,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 6.8602,
+      "step": 191
+    },
+    {
+      "epoch": 0.032362732291096034,
+      "grad_norm": 0.15538664162158966,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 6.862,
+      "step": 192
+    },
+    {
+      "epoch": 0.032531288188445494,
+      "grad_norm": 0.14758512377738953,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 6.8665,
+      "step": 193
+    },
+    {
+      "epoch": 0.03269984408579495,
+      "grad_norm": 0.1664230078458786,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 6.8697,
+      "step": 194
+    },
+    {
+      "epoch": 0.03286839998314441,
+      "grad_norm": 0.17496484518051147,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 6.8706,
+      "step": 195
+    },
+    {
+      "epoch": 0.03303695588049387,
+      "grad_norm": 0.1683475226163864,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 6.8645,
+      "step": 196
+    },
+    {
+      "epoch": 0.033205511777843325,
+      "grad_norm": 0.16963422298431396,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 6.8615,
+      "step": 197
+    },
+    {
+      "epoch": 0.033374067675192784,
+      "grad_norm": 0.18305006623268127,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 6.8713,
+      "step": 198
+    },
+    {
+      "epoch": 0.03354262357254224,
+      "grad_norm": 0.1892130821943283,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 6.8654,
+      "step": 199
+    },
+    {
+      "epoch": 0.0337111794698917,
+      "grad_norm": 0.23540045320987701,
+      "learning_rate": 0.0,
+      "loss": 6.8734,
+      "step": 200
+    },
+    {
+      "epoch": 0.0337111794698917,
+      "eval_loss": 6.858746528625488,
+      "eval_runtime": 15.5992,
+      "eval_samples_per_second": 640.546,
+      "eval_steps_per_second": 160.136,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4007903920128.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null