Training in progress, step 361, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +431 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db1af205eff0d1c3981f58af318310bd969f0ccee8e313b79ebead6997c843b5
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc0496d758ec86c37d1971c49b2e7b5876067a9ce8635cb3d8c4520d38cd938b
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8cf0977855e53e7a960c7caf7aa4b8b6575b7d1cea78d007041fe7b74b13d404
 size 325340244

 version https://git-lfs.github.com/spec/v1
+oid sha256:de404a495901928b91622cc6faceee097ce90f7963c9310ed4a92168b38a33d6
 size 325340244

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a30f2d89b79bfb2d9929f2dc3ffeef086eb1500788503382d980f99cbe057e80
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f761d844d5f040bacf1f99959e3a5fd3da1b16fd7877660f2d80d9193f6afa1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:825ef3e3db682363455a0d008a860e7cc4412a53aa533791e6b37fda9dca6312
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b857be7b95ff8324c4727de3c0f481a268cea8c6e2533b10d776846f18e23993
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.556799054145813,
   "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.8310249307479224,
   "eval_steps": 100,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2139,6 +2139,433 @@
       "eval_samples_per_second": 6.356,
       "eval_steps_per_second": 1.589,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2162,12 +2589,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.281474924675072e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.556799054145813,
   "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 1.0,
   "eval_steps": 100,
+  "global_step": 361,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.356,
       "eval_steps_per_second": 1.589,
       "step": 300
+    },
+    {
+      "epoch": 0.8337950138504155,
+      "grad_norm": 0.38641664385795593,
+      "learning_rate": 7.446405675168938e-06,
+      "loss": 1.6862,
+      "step": 301
+    },
+    {
+      "epoch": 0.8365650969529086,
+      "grad_norm": 0.386618435382843,
+      "learning_rate": 7.206354198753862e-06,
+      "loss": 1.6566,
+      "step": 302
+    },
+    {
+      "epoch": 0.8393351800554016,
+      "grad_norm": 0.4190298318862915,
+      "learning_rate": 6.969934901005809e-06,
+      "loss": 1.6447,
+      "step": 303
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 0.40949639678001404,
+      "learning_rate": 6.7371678483882264e-06,
+      "loss": 1.6331,
+      "step": 304
+    },
+    {
+      "epoch": 0.8448753462603878,
+      "grad_norm": 0.42585617303848267,
+      "learning_rate": 6.508072797374454e-06,
+      "loss": 1.6439,
+      "step": 305
+    },
+    {
+      "epoch": 0.8476454293628809,
+      "grad_norm": 0.41650789976119995,
+      "learning_rate": 6.282669192770896e-06,
+      "loss": 1.616,
+      "step": 306
+    },
+    {
+      "epoch": 0.850415512465374,
+      "grad_norm": 0.4084133803844452,
+      "learning_rate": 6.060976166066546e-06,
+      "loss": 1.587,
+      "step": 307
+    },
+    {
+      "epoch": 0.853185595567867,
+      "grad_norm": 0.4127242863178253,
+      "learning_rate": 5.8430125338092115e-06,
+      "loss": 1.6102,
+      "step": 308
+    },
+    {
+      "epoch": 0.8559556786703602,
+      "grad_norm": 0.42294085025787354,
+      "learning_rate": 5.628796796008434e-06,
+      "loss": 1.6542,
+      "step": 309
+    },
+    {
+      "epoch": 0.8587257617728532,
+      "grad_norm": 0.41091540455818176,
+      "learning_rate": 5.418347134565249e-06,
+      "loss": 1.5955,
+      "step": 310
+    },
+    {
+      "epoch": 0.8614958448753463,
+      "grad_norm": 0.41265037655830383,
+      "learning_rate": 5.211681411728969e-06,
+      "loss": 1.6711,
+      "step": 311
+    },
+    {
+      "epoch": 0.8642659279778393,
+      "grad_norm": 0.44204774498939514,
+      "learning_rate": 5.008817168581137e-06,
+      "loss": 1.5871,
+      "step": 312
+    },
+    {
+      "epoch": 0.8670360110803325,
+      "grad_norm": 0.4217774271965027,
+      "learning_rate": 4.809771623546627e-06,
+      "loss": 1.5632,
+      "step": 313
+    },
+    {
+      "epoch": 0.8698060941828255,
+      "grad_norm": 0.43789371848106384,
+      "learning_rate": 4.614561670932288e-06,
+      "loss": 1.5672,
+      "step": 314
+    },
+    {
+      "epoch": 0.8725761772853186,
+      "grad_norm": 0.44473928213119507,
+      "learning_rate": 4.423203879492943e-06,
+      "loss": 1.5175,
+      "step": 315
+    },
+    {
+      "epoch": 0.8753462603878116,
+      "grad_norm": 0.4420805871486664,
+      "learning_rate": 4.2357144910251e-06,
+      "loss": 1.5502,
+      "step": 316
+    },
+    {
+      "epoch": 0.8781163434903048,
+      "grad_norm": 0.448369562625885,
+      "learning_rate": 4.05210941898847e-06,
+      "loss": 1.5517,
+      "step": 317
+    },
+    {
+      "epoch": 0.8808864265927978,
+      "grad_norm": 0.44154343008995056,
+      "learning_rate": 3.872404247155193e-06,
+      "loss": 1.4863,
+      "step": 318
+    },
+    {
+      "epoch": 0.8836565096952909,
+      "grad_norm": 0.44638606905937195,
+      "learning_rate": 3.696614228287187e-06,
+      "loss": 1.5786,
+      "step": 319
+    },
+    {
+      "epoch": 0.8864265927977839,
+      "grad_norm": 0.43334829807281494,
+      "learning_rate": 3.5247542828415747e-06,
+      "loss": 1.5491,
+      "step": 320
+    },
+    {
+      "epoch": 0.889196675900277,
+      "grad_norm": 0.4540192484855652,
+      "learning_rate": 3.356838997704226e-06,
+      "loss": 1.5194,
+      "step": 321
+    },
+    {
+      "epoch": 0.8919667590027701,
+      "grad_norm": 0.4565938115119934,
+      "learning_rate": 3.1928826249516987e-06,
+      "loss": 1.5226,
+      "step": 322
+    },
+    {
+      "epoch": 0.8947368421052632,
+      "grad_norm": 0.4570363461971283,
+      "learning_rate": 3.0328990806415934e-06,
+      "loss": 1.5702,
+      "step": 323
+    },
+    {
+      "epoch": 0.8975069252077562,
+      "grad_norm": 0.4690124988555908,
+      "learning_rate": 2.8769019436313715e-06,
+      "loss": 1.5384,
+      "step": 324
+    },
+    {
+      "epoch": 0.9002770083102493,
+      "grad_norm": 0.47177836298942566,
+      "learning_rate": 2.7249044544258363e-06,
+      "loss": 1.5368,
+      "step": 325
+    },
+    {
+      "epoch": 0.9030470914127424,
+      "grad_norm": 0.4627552628517151,
+      "learning_rate": 2.576919514053355e-06,
+      "loss": 1.5475,
+      "step": 326
+    },
+    {
+      "epoch": 0.9058171745152355,
+      "grad_norm": 0.4692405164241791,
+      "learning_rate": 2.4329596829708144e-06,
+      "loss": 1.4843,
+      "step": 327
+    },
+    {
+      "epoch": 0.9085872576177285,
+      "grad_norm": 0.5105109214782715,
+      "learning_rate": 2.2930371799975594e-06,
+      "loss": 1.5008,
+      "step": 328
+    },
+    {
+      "epoch": 0.9113573407202216,
+      "grad_norm": 0.48938921093940735,
+      "learning_rate": 2.157163881278312e-06,
+      "loss": 1.5492,
+      "step": 329
+    },
+    {
+      "epoch": 0.9141274238227147,
+      "grad_norm": 0.485343337059021,
+      "learning_rate": 2.0253513192751373e-06,
+      "loss": 1.5183,
+      "step": 330
+    },
+    {
+      "epoch": 0.9168975069252078,
+      "grad_norm": 0.4986342489719391,
+      "learning_rate": 1.8976106817886196e-06,
+      "loss": 1.5177,
+      "step": 331
+    },
+    {
+      "epoch": 0.9196675900277008,
+      "grad_norm": 0.49586573243141174,
+      "learning_rate": 1.7739528110083004e-06,
+      "loss": 1.519,
+      "step": 332
+    },
+    {
+      "epoch": 0.9224376731301939,
+      "grad_norm": 0.5050438642501831,
+      "learning_rate": 1.6543882025923886e-06,
+      "loss": 1.4329,
+      "step": 333
+    },
+    {
+      "epoch": 0.925207756232687,
+      "grad_norm": 0.5025500655174255,
+      "learning_rate": 1.5389270047769578e-06,
+      "loss": 1.486,
+      "step": 334
+    },
+    {
+      "epoch": 0.9279778393351801,
+      "grad_norm": 0.50665682554245,
+      "learning_rate": 1.4275790175145908e-06,
+      "loss": 1.4943,
+      "step": 335
+    },
+    {
+      "epoch": 0.9307479224376731,
+      "grad_norm": 0.518688440322876,
+      "learning_rate": 1.3203536916425841e-06,
+      "loss": 1.526,
+      "step": 336
+    },
+    {
+      "epoch": 0.9335180055401662,
+      "grad_norm": 0.5425696969032288,
+      "learning_rate": 1.217260128080816e-06,
+      "loss": 1.513,
+      "step": 337
+    },
+    {
+      "epoch": 0.9362880886426593,
+      "grad_norm": 0.5442622303962708,
+      "learning_rate": 1.1183070770592441e-06,
+      "loss": 1.5735,
+      "step": 338
+    },
+    {
+      "epoch": 0.9390581717451524,
+      "grad_norm": 0.5330432057380676,
+      "learning_rate": 1.0235029373752758e-06,
+      "loss": 1.5291,
+      "step": 339
+    },
+    {
+      "epoch": 0.9418282548476454,
+      "grad_norm": 0.5193708539009094,
+      "learning_rate": 9.32855755680867e-07,
+      "loss": 1.418,
+      "step": 340
+    },
+    {
+      "epoch": 0.9445983379501385,
+      "grad_norm": 0.5180433392524719,
+      "learning_rate": 8.463732257995571e-07,
+      "loss": 1.495,
+      "step": 341
+    },
+    {
+      "epoch": 0.9473684210526315,
+      "grad_norm": 0.5407363772392273,
+      "learning_rate": 7.640626880734581e-07,
+      "loss": 1.5243,
+      "step": 342
+    },
+    {
+      "epoch": 0.9501385041551247,
+      "grad_norm": 0.5722284913063049,
+      "learning_rate": 6.859311287402081e-07,
+      "loss": 1.4204,
+      "step": 343
+    },
+    {
+      "epoch": 0.9529085872576177,
+      "grad_norm": 0.5504374504089355,
+      "learning_rate": 6.119851793400189e-07,
+      "loss": 1.4763,
+      "step": 344
+    },
+    {
+      "epoch": 0.9556786703601108,
+      "grad_norm": 0.5620555281639099,
+      "learning_rate": 5.422311161528193e-07,
+      "loss": 1.4551,
+      "step": 345
+    },
+    {
+      "epoch": 0.9584487534626038,
+      "grad_norm": 0.5651283860206604,
+      "learning_rate": 4.7667485966552685e-07,
+      "loss": 1.4817,
+      "step": 346
+    },
+    {
+      "epoch": 0.961218836565097,
+      "grad_norm": 0.6057314872741699,
+      "learning_rate": 4.153219740695435e-07,
+      "loss": 1.5466,
+      "step": 347
+    },
+    {
+      "epoch": 0.96398891966759,
+      "grad_norm": 0.6597415208816528,
+      "learning_rate": 3.5817766678850615e-07,
+      "loss": 1.5562,
+      "step": 348
+    },
+    {
+      "epoch": 0.9667590027700831,
+      "grad_norm": 0.7001429796218872,
+      "learning_rate": 3.052467880362675e-07,
+      "loss": 1.899,
+      "step": 349
+    },
+    {
+      "epoch": 0.9695290858725761,
+      "grad_norm": 0.8798688054084778,
+      "learning_rate": 2.5653383040524227e-07,
+      "loss": 1.96,
+      "step": 350
+    },
+    {
+      "epoch": 0.9722991689750693,
+      "grad_norm": 0.3499889671802521,
+      "learning_rate": 2.1204292848509555e-07,
+      "loss": 1.6123,
+      "step": 351
+    },
+    {
+      "epoch": 0.9750692520775623,
+      "grad_norm": 0.38690975308418274,
+      "learning_rate": 1.717778585118013e-07,
+      "loss": 1.5579,
+      "step": 352
+    },
+    {
+      "epoch": 0.9778393351800554,
+      "grad_norm": 0.41460373997688293,
+      "learning_rate": 1.3574203804713747e-07,
+      "loss": 1.585,
+      "step": 353
+    },
+    {
+      "epoch": 0.9806094182825484,
+      "grad_norm": 0.4353157877922058,
+      "learning_rate": 1.0393852568860719e-07,
+      "loss": 1.5611,
+      "step": 354
+    },
+    {
+      "epoch": 0.9833795013850416,
+      "grad_norm": 0.450083464384079,
+      "learning_rate": 7.637002080985168e-08,
+      "loss": 1.5568,
+      "step": 355
+    },
+    {
+      "epoch": 0.9861495844875346,
+      "grad_norm": 0.47905081510543823,
+      "learning_rate": 5.303886333151153e-08,
+      "loss": 1.5438,
+      "step": 356
+    },
+    {
+      "epoch": 0.9889196675900277,
+      "grad_norm": 0.4843379855155945,
+      "learning_rate": 3.394703352263551e-08,
+      "loss": 1.4646,
+      "step": 357
+    },
+    {
+      "epoch": 0.9916897506925207,
+      "grad_norm": 0.5109671950340271,
+      "learning_rate": 1.9096151832609375e-08,
+      "loss": 1.513,
+      "step": 358
+    },
+    {
+      "epoch": 0.9944598337950139,
+      "grad_norm": 0.5229476690292358,
+      "learning_rate": 8.487478753615997e-09,
+      "loss": 1.4675,
+      "step": 359
+    },
+    {
+      "epoch": 0.997229916897507,
+      "grad_norm": 0.5755655765533447,
+      "learning_rate": 2.1219147136264382e-09,
+      "loss": 1.5564,
+      "step": 360
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.7581580877304077,
+      "learning_rate": 0.0,
+      "loss": 1.8149,
+      "step": 361
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.1502575448070554e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null