Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c20b12d70ee5100b8376be29fe9ec43e6c022c72cec53196f8ca6e3b2f3c3ed
 size 138995824

 version https://git-lfs.github.com/spec/v1
+oid sha256:7eb550648af3face1593abba74f24bb2577e8c861c8bf92841db554a98bec7c0
 size 138995824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:644802706ffd9cafe29a13877727438a055a91ae33168908b2deab0631bf7b58
 size 71077780

 version https://git-lfs.github.com/spec/v1
+oid sha256:0be9bcce9af0ee6726ef0f186f26db6ec182d5bd50f038ce20ce9802ba47d9f8
 size 71077780

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b11f03ae21d39be3f1f941e5510af2aa8839c66c87f19317481335b391cdbef
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f9eec096e2282034ce045740e2b21ca23776e09a9c74792f4838510420a4959
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.0355226956307888,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.0118043844856661,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 59.921,
       "eval_steps_per_second": 15.1,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.030117278547968e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.030804481357336044,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.3490725126475547,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 59.921,
       "eval_steps_per_second": 15.1,
       "step": 150
+    },
+    {
+      "epoch": 1.0185497470489038,
+      "grad_norm": 0.3283449113368988,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 0.0724,
+      "step": 151
+    },
+    {
+      "epoch": 1.0252951096121417,
+      "grad_norm": 0.13863329589366913,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 0.0232,
+      "step": 152
+    },
+    {
+      "epoch": 1.0320404721753795,
+      "grad_norm": 0.21840208768844604,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 0.0189,
+      "step": 153
+    },
+    {
+      "epoch": 1.0387858347386172,
+      "grad_norm": 0.14212460815906525,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 0.0182,
+      "step": 154
+    },
+    {
+      "epoch": 1.045531197301855,
+      "grad_norm": 0.21400751173496246,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 0.0124,
+      "step": 155
+    },
+    {
+      "epoch": 1.0522765598650927,
+      "grad_norm": 0.006335023324936628,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 0.0003,
+      "step": 156
+    },
+    {
+      "epoch": 1.0590219224283306,
+      "grad_norm": 0.044015780091285706,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 0.0012,
+      "step": 157
+    },
+    {
+      "epoch": 1.0657672849915683,
+      "grad_norm": 0.019701050594449043,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 0.0009,
+      "step": 158
+    },
+    {
+      "epoch": 1.0725126475548061,
+      "grad_norm": 0.018881501629948616,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 0.0007,
+      "step": 159
+    },
+    {
+      "epoch": 1.0792580101180438,
+      "grad_norm": 0.24259497225284576,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 0.0272,
+      "step": 160
+    },
+    {
+      "epoch": 1.0860033726812817,
+      "grad_norm": 0.0803951844573021,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 0.0116,
+      "step": 161
+    },
+    {
+      "epoch": 1.0927487352445193,
+      "grad_norm": 0.138003408908844,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.0214,
+      "step": 162
+    },
+    {
+      "epoch": 1.0994940978077572,
+      "grad_norm": 0.1587391346693039,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 0.0123,
+      "step": 163
+    },
+    {
+      "epoch": 1.1062394603709949,
+      "grad_norm": 0.09421160817146301,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 0.0186,
+      "step": 164
+    },
+    {
+      "epoch": 1.1129848229342327,
+      "grad_norm": 0.05252014100551605,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 0.0019,
+      "step": 165
+    },
+    {
+      "epoch": 1.1197301854974704,
+      "grad_norm": 0.061075758188962936,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 0.0022,
+      "step": 166
+    },
+    {
+      "epoch": 1.1264755480607083,
+      "grad_norm": 0.1525970995426178,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 0.0243,
+      "step": 167
+    },
+    {
+      "epoch": 1.1332209106239461,
+      "grad_norm": 0.08765783160924911,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 0.0028,
+      "step": 168
+    },
+    {
+      "epoch": 1.1399662731871838,
+      "grad_norm": 0.05461001396179199,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 0.0018,
+      "step": 169
+    },
+    {
+      "epoch": 1.1467116357504217,
+      "grad_norm": 0.031027011573314667,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 0.0012,
+      "step": 170
+    },
+    {
+      "epoch": 1.1534569983136593,
+      "grad_norm": 0.11484523862600327,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 0.0131,
+      "step": 171
+    },
+    {
+      "epoch": 1.1602023608768972,
+      "grad_norm": 0.10547629743814468,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 0.0035,
+      "step": 172
+    },
+    {
+      "epoch": 1.1669477234401349,
+      "grad_norm": 0.07820610702037811,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 0.0087,
+      "step": 173
+    },
+    {
+      "epoch": 1.1736930860033727,
+      "grad_norm": 0.13314871490001678,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 0.0086,
+      "step": 174
+    },
+    {
+      "epoch": 1.1804384485666104,
+      "grad_norm": 0.0992496982216835,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 0.0035,
+      "step": 175
+    },
+    {
+      "epoch": 1.1871838111298483,
+      "grad_norm": 0.07356826961040497,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 0.0026,
+      "step": 176
+    },
+    {
+      "epoch": 1.193929173693086,
+      "grad_norm": 0.11367491632699966,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 0.0038,
+      "step": 177
+    },
+    {
+      "epoch": 1.2006745362563238,
+      "grad_norm": 0.17552682757377625,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 0.006,
+      "step": 178
+    },
+    {
+      "epoch": 1.2074198988195615,
+      "grad_norm": 0.18896107375621796,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 0.0089,
+      "step": 179
+    },
+    {
+      "epoch": 1.2141652613827993,
+      "grad_norm": 0.1438591331243515,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 0.0095,
+      "step": 180
+    },
+    {
+      "epoch": 1.220910623946037,
+      "grad_norm": 0.11014795303344727,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.0036,
+      "step": 181
+    },
+    {
+      "epoch": 1.2276559865092749,
+      "grad_norm": 0.20106233656406403,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 0.007,
+      "step": 182
+    },
+    {
+      "epoch": 1.2344013490725128,
+      "grad_norm": 0.13474810123443604,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 0.0216,
+      "step": 183
+    },
+    {
+      "epoch": 1.2411467116357504,
+      "grad_norm": 0.12335095554590225,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 0.0044,
+      "step": 184
+    },
+    {
+      "epoch": 1.2478920741989883,
+      "grad_norm": 0.13575774431228638,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 0.0114,
+      "step": 185
+    },
+    {
+      "epoch": 1.254637436762226,
+      "grad_norm": 0.490744024515152,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 0.2586,
+      "step": 186
+    },
+    {
+      "epoch": 1.2613827993254638,
+      "grad_norm": 0.36557716131210327,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 0.0905,
+      "step": 187
+    },
+    {
+      "epoch": 1.2681281618887015,
+      "grad_norm": 0.17767265439033508,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 0.0353,
+      "step": 188
+    },
+    {
+      "epoch": 1.2748735244519394,
+      "grad_norm": 0.056860972195863724,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 0.0127,
+      "step": 189
+    },
+    {
+      "epoch": 1.281618887015177,
+      "grad_norm": 0.2886894643306732,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 0.0757,
+      "step": 190
+    },
+    {
+      "epoch": 1.2883642495784149,
+      "grad_norm": 0.013288192451000214,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 0.0006,
+      "step": 191
+    },
+    {
+      "epoch": 1.2951096121416525,
+      "grad_norm": 0.2898249626159668,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 0.0343,
+      "step": 192
+    },
+    {
+      "epoch": 1.3018549747048904,
+      "grad_norm": 0.010432682000100613,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 0.0004,
+      "step": 193
+    },
+    {
+      "epoch": 1.3086003372681283,
+      "grad_norm": 0.16382138431072235,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 0.0074,
+      "step": 194
+    },
+    {
+      "epoch": 1.315345699831366,
+      "grad_norm": 0.11828618496656418,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 0.0156,
+      "step": 195
+    },
+    {
+      "epoch": 1.3220910623946036,
+      "grad_norm": 0.03025907278060913,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 0.001,
+      "step": 196
+    },
+    {
+      "epoch": 1.3288364249578415,
+      "grad_norm": 0.018650656566023827,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 0.0007,
+      "step": 197
+    },
+    {
+      "epoch": 1.3355817875210794,
+      "grad_norm": 0.01551801897585392,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 0.0006,
+      "step": 198
+    },
+    {
+      "epoch": 1.342327150084317,
+      "grad_norm": 0.024330761283636093,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 0.0009,
+      "step": 199
+    },
+    {
+      "epoch": 1.3490725126475547,
+      "grad_norm": 0.052249450236558914,
+      "learning_rate": 0.0,
+      "loss": 0.0048,
+      "step": 200
+    },
+    {
+      "epoch": 1.3490725126475547,
+      "eval_loss": 0.030804481357336044,
+      "eval_runtime": 4.1742,
+      "eval_samples_per_second": 59.892,
+      "eval_steps_per_second": 15.093,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.373561255559168e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null