Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a3acefe9fc9a017341a3fcdb08189f6277c08ef5bb797413ff81ab1e9cb1280
 size 50899792

 version https://git-lfs.github.com/spec/v1
+oid sha256:7241015963b4e9312d8dda017ea8d135bae01bb1075895ed4565e38143f1395c
 size 50899792

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a57e50049278c9221ba316fc09ef34f20f0a6dd392f7614b71a60efd7fc8fa1
 size 102009558

 version https://git-lfs.github.com/spec/v1
+oid sha256:544e079ad3d0910b3d99cf66ec12234cca5639883a74f4aab1937f1e70a088f1
 size 102009558

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e25b0b35db85155f4f5ce92cda063932982c55e93655b0da451f131e3511b5b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:378ebc1116a6ae28136fa27fe8b7439b5379e4d44a0fb7663a874d3084c60ed6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a89ffc445067fef9d6d02bb3ff9e61d5e3209e6fa67c7259b3b364b90dbaa2cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.8736505508422852,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.0011810628384483195,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 15.607,
       "eval_steps_per_second": 7.803,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6744900269506560.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.812772274017334,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.002362125676896639,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 15.607,
       "eval_steps_per_second": 7.803,
       "step": 50
+    },
+    {
+      "epoch": 0.001204684095217286,
+      "grad_norm": 1.6444913148880005,
+      "learning_rate": 5.695865504800327e-05,
+      "loss": 1.8085,
+      "step": 51
+    },
+    {
+      "epoch": 0.0012283053519862525,
+      "grad_norm": 1.6485252380371094,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 1.9418,
+      "step": 52
+    },
+    {
+      "epoch": 0.0012519266087552188,
+      "grad_norm": 1.3757036924362183,
+      "learning_rate": 5.348782368720626e-05,
+      "loss": 1.8188,
+      "step": 53
+    },
+    {
+      "epoch": 0.0012755478655241852,
+      "grad_norm": 1.4265061616897583,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 1.8015,
+      "step": 54
+    },
+    {
+      "epoch": 0.0012991691222931517,
+      "grad_norm": 1.2143784761428833,
+      "learning_rate": 5e-05,
+      "loss": 2.0403,
+      "step": 55
+    },
+    {
+      "epoch": 0.001322790379062118,
+      "grad_norm": 1.1464849710464478,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 1.9537,
+      "step": 56
+    },
+    {
+      "epoch": 0.0013464116358310844,
+      "grad_norm": 1.017458200454712,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 1.8258,
+      "step": 57
+    },
+    {
+      "epoch": 0.0013700328926000507,
+      "grad_norm": 1.0252606868743896,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 1.7314,
+      "step": 58
+    },
+    {
+      "epoch": 0.0013936541493690172,
+      "grad_norm": 1.1397935152053833,
+      "learning_rate": 4.3041344951996746e-05,
+      "loss": 1.7815,
+      "step": 59
+    },
+    {
+      "epoch": 0.0014172754061379837,
+      "grad_norm": 1.2537386417388916,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 2.0948,
+      "step": 60
+    },
+    {
+      "epoch": 0.00144089666290695,
+      "grad_norm": 1.226378083229065,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 1.7523,
+      "step": 61
+    },
+    {
+      "epoch": 0.0014645179196759164,
+      "grad_norm": 1.1963748931884766,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 1.6124,
+      "step": 62
+    },
+    {
+      "epoch": 0.0014881391764448827,
+      "grad_norm": 1.2817221879959106,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 1.7639,
+      "step": 63
+    },
+    {
+      "epoch": 0.0015117604332138491,
+      "grad_norm": 1.320666790008545,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 1.815,
+      "step": 64
+    },
+    {
+      "epoch": 0.0015353816899828156,
+      "grad_norm": 1.1580308675765991,
+      "learning_rate": 3.289899283371657e-05,
+      "loss": 1.6898,
+      "step": 65
+    },
+    {
+      "epoch": 0.0015590029467517819,
+      "grad_norm": 1.3005945682525635,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 1.7134,
+      "step": 66
+    },
+    {
+      "epoch": 0.0015826242035207484,
+      "grad_norm": 1.460551142692566,
+      "learning_rate": 2.9663167846209998e-05,
+      "loss": 1.9224,
+      "step": 67
+    },
+    {
+      "epoch": 0.0016062454602897146,
+      "grad_norm": 1.084309458732605,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 1.6219,
+      "step": 68
+    },
+    {
+      "epoch": 0.001629866717058681,
+      "grad_norm": 1.6280590295791626,
+      "learning_rate": 2.6526421860705473e-05,
+      "loss": 1.9883,
+      "step": 69
+    },
+    {
+      "epoch": 0.0016534879738276476,
+      "grad_norm": 1.6045172214508057,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 1.9836,
+      "step": 70
+    },
+    {
+      "epoch": 0.0016771092305966138,
+      "grad_norm": 1.6871895790100098,
+      "learning_rate": 2.350403678833976e-05,
+      "loss": 2.1157,
+      "step": 71
+    },
+    {
+      "epoch": 0.0017007304873655803,
+      "grad_norm": 1.8046764135360718,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 2.2028,
+      "step": 72
+    },
+    {
+      "epoch": 0.0017243517441345468,
+      "grad_norm": 1.3781118392944336,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 1.8658,
+      "step": 73
+    },
+    {
+      "epoch": 0.001747973000903513,
+      "grad_norm": 1.5428242683410645,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 2.1846,
+      "step": 74
+    },
+    {
+      "epoch": 0.0017715942576724795,
+      "grad_norm": 1.931896686553955,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 1.8872,
+      "step": 75
+    },
+    {
+      "epoch": 0.0017952155144414458,
+      "grad_norm": 1.6994593143463135,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 2.0523,
+      "step": 76
+    },
+    {
+      "epoch": 0.0018188367712104123,
+      "grad_norm": 1.7547457218170166,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 1.982,
+      "step": 77
+    },
+    {
+      "epoch": 0.0018424580279793787,
+      "grad_norm": 1.5750082731246948,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 1.9124,
+      "step": 78
+    },
+    {
+      "epoch": 0.001866079284748345,
+      "grad_norm": 1.8795514106750488,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 2.1477,
+      "step": 79
+    },
+    {
+      "epoch": 0.0018897005415173115,
+      "grad_norm": 2.480574369430542,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 1.9591,
+      "step": 80
+    },
+    {
+      "epoch": 0.0019133217982862777,
+      "grad_norm": 1.3811838626861572,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 1.5521,
+      "step": 81
+    },
+    {
+      "epoch": 0.0019369430550552442,
+      "grad_norm": 1.6117287874221802,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.6701,
+      "step": 82
+    },
+    {
+      "epoch": 0.0019605643118242107,
+      "grad_norm": 1.5689414739608765,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 1.4656,
+      "step": 83
+    },
+    {
+      "epoch": 0.001984185568593177,
+      "grad_norm": 1.580367088317871,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 1.6255,
+      "step": 84
+    },
+    {
+      "epoch": 0.002007806825362143,
+      "grad_norm": 1.5394359827041626,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.6227,
+      "step": 85
+    },
+    {
+      "epoch": 0.0020314280821311097,
+      "grad_norm": 1.3796682357788086,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 1.4377,
+      "step": 86
+    },
+    {
+      "epoch": 0.002055049338900076,
+      "grad_norm": 1.5555745363235474,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 1.7109,
+      "step": 87
+    },
+    {
+      "epoch": 0.0020786705956690426,
+      "grad_norm": 1.5756012201309204,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.913,
+      "step": 88
+    },
+    {
+      "epoch": 0.002102291852438009,
+      "grad_norm": 1.368099331855774,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 1.446,
+      "step": 89
+    },
+    {
+      "epoch": 0.002125913109206975,
+      "grad_norm": 1.5308266878128052,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 1.6731,
+      "step": 90
+    },
+    {
+      "epoch": 0.0021495343659759416,
+      "grad_norm": 1.6399803161621094,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.9263,
+      "step": 91
+    },
+    {
+      "epoch": 0.002173155622744908,
+      "grad_norm": 1.7179704904556274,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 1.4546,
+      "step": 92
+    },
+    {
+      "epoch": 0.0021967768795138746,
+      "grad_norm": 1.9310886859893799,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 1.7588,
+      "step": 93
+    },
+    {
+      "epoch": 0.002220398136282841,
+      "grad_norm": 1.8719663619995117,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.7902,
+      "step": 94
+    },
+    {
+      "epoch": 0.002244019393051807,
+      "grad_norm": 2.372739791870117,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 1.6732,
+      "step": 95
+    },
+    {
+      "epoch": 0.0022676406498207736,
+      "grad_norm": 1.6990795135498047,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 1.8265,
+      "step": 96
+    },
+    {
+      "epoch": 0.00229126190658974,
+      "grad_norm": 1.6857136487960815,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.5841,
+      "step": 97
+    },
+    {
+      "epoch": 0.0023148831633587066,
+      "grad_norm": 1.8094323873519897,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 1.6073,
+      "step": 98
+    },
+    {
+      "epoch": 0.002338504420127673,
+      "grad_norm": 1.8924447298049927,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 1.5823,
+      "step": 99
+    },
+    {
+      "epoch": 0.002362125676896639,
+      "grad_norm": 1.920559287071228,
+      "learning_rate": 0.0,
+      "loss": 1.6138,
+      "step": 100
+    },
+    {
+      "epoch": 0.002362125676896639,
+      "eval_loss": 1.812772274017334,
+      "eval_runtime": 1141.2879,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 7.81,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.334629202264064e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null