Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95780ad94a5db1cbd6b519f40a7e9aba907320f1b6d7b2ba3b86b0ef1ba11143
 size 97728

 version https://git-lfs.github.com/spec/v1
+oid sha256:e08c10061058c435cc376c329fc3aca3c37929aa5bbbb77dd86d4b1b14dd486a
 size 97728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a7a162cdd4a1625075dcbb0329018f699eb2f982170898d63b37162d9b17b10
 size 205350

 version https://git-lfs.github.com/spec/v1
+oid sha256:f640e4b420238641c1e69654d694392551e7da0ed31e86fe926192687a304f4a
 size 205350

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c6e7276a2f1bb0919034b00b18c6ec9ec6a1e66261c4ba3cc9135e6e70d966b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d2042752f1f49e38179d2b9ca9233856ff37ed72c5319341ace36487c88c82a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a89ffc445067fef9d6d02bb3ff9e61d5e3209e6fa67c7259b3b364b90dbaa2cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.351405143737793,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.6006006006006006,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 420.431,
       "eval_steps_per_second": 105.108,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5346007449600.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.348917007446289,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.2012012012012012,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 420.431,
       "eval_steps_per_second": 105.108,
       "step": 50
+    },
+    {
+      "epoch": 0.6126126126126126,
+      "grad_norm": 0.1385074406862259,
+      "learning_rate": 5.695865504800327e-05,
+      "loss": 10.3523,
+      "step": 51
+    },
+    {
+      "epoch": 0.6246246246246246,
+      "grad_norm": 0.11994314938783646,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 10.3496,
+      "step": 52
+    },
+    {
+      "epoch": 0.6366366366366366,
+      "grad_norm": 0.12939238548278809,
+      "learning_rate": 5.348782368720626e-05,
+      "loss": 10.35,
+      "step": 53
+    },
+    {
+      "epoch": 0.6486486486486487,
+      "grad_norm": 0.13329431414604187,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 10.3485,
+      "step": 54
+    },
+    {
+      "epoch": 0.6606606606606606,
+      "grad_norm": 0.15696164965629578,
+      "learning_rate": 5e-05,
+      "loss": 10.3508,
+      "step": 55
+    },
+    {
+      "epoch": 0.6726726726726727,
+      "grad_norm": 0.11817120760679245,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 10.3475,
+      "step": 56
+    },
+    {
+      "epoch": 0.6846846846846847,
+      "grad_norm": 0.17037023603916168,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 10.3456,
+      "step": 57
+    },
+    {
+      "epoch": 0.6966966966966966,
+      "grad_norm": 0.27570074796676636,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 10.3418,
+      "step": 58
+    },
+    {
+      "epoch": 0.7087087087087087,
+      "grad_norm": 0.25951486825942993,
+      "learning_rate": 4.3041344951996746e-05,
+      "loss": 10.3458,
+      "step": 59
+    },
+    {
+      "epoch": 0.7207207207207207,
+      "grad_norm": 0.25024840235710144,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 10.3361,
+      "step": 60
+    },
+    {
+      "epoch": 0.7327327327327328,
+      "grad_norm": 0.15343418717384338,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 10.3538,
+      "step": 61
+    },
+    {
+      "epoch": 0.7447447447447447,
+      "grad_norm": 0.12202432751655579,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 10.3493,
+      "step": 62
+    },
+    {
+      "epoch": 0.7567567567567568,
+      "grad_norm": 0.11715935170650482,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 10.3577,
+      "step": 63
+    },
+    {
+      "epoch": 0.7687687687687688,
+      "grad_norm": 0.12855777144432068,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 10.3495,
+      "step": 64
+    },
+    {
+      "epoch": 0.7807807807807807,
+      "grad_norm": 0.12814295291900635,
+      "learning_rate": 3.289899283371657e-05,
+      "loss": 10.3517,
+      "step": 65
+    },
+    {
+      "epoch": 0.7927927927927928,
+      "grad_norm": 0.14796307682991028,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 10.3543,
+      "step": 66
+    },
+    {
+      "epoch": 0.8048048048048048,
+      "grad_norm": 0.10517235845327377,
+      "learning_rate": 2.9663167846209998e-05,
+      "loss": 10.3526,
+      "step": 67
+    },
+    {
+      "epoch": 0.8168168168168168,
+      "grad_norm": 0.10510523617267609,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 10.3511,
+      "step": 68
+    },
+    {
+      "epoch": 0.8288288288288288,
+      "grad_norm": 0.12766772508621216,
+      "learning_rate": 2.6526421860705473e-05,
+      "loss": 10.3528,
+      "step": 69
+    },
+    {
+      "epoch": 0.8408408408408409,
+      "grad_norm": 0.09015192091464996,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 10.352,
+      "step": 70
+    },
+    {
+      "epoch": 0.8528528528528528,
+      "grad_norm": 0.08759531378746033,
+      "learning_rate": 2.350403678833976e-05,
+      "loss": 10.3496,
+      "step": 71
+    },
+    {
+      "epoch": 0.8648648648648649,
+      "grad_norm": 0.08307502418756485,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 10.3479,
+      "step": 72
+    },
+    {
+      "epoch": 0.8768768768768769,
+      "grad_norm": 0.128950834274292,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 10.3421,
+      "step": 73
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.12842462956905365,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 10.3464,
+      "step": 74
+    },
+    {
+      "epoch": 0.9009009009009009,
+      "grad_norm": 0.1564466953277588,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 10.3513,
+      "step": 75
+    },
+    {
+      "epoch": 0.9129129129129129,
+      "grad_norm": 0.10344736278057098,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 10.347,
+      "step": 76
+    },
+    {
+      "epoch": 0.924924924924925,
+      "grad_norm": 0.17611975967884064,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 10.3424,
+      "step": 77
+    },
+    {
+      "epoch": 0.9369369369369369,
+      "grad_norm": 0.17288321256637573,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 10.3423,
+      "step": 78
+    },
+    {
+      "epoch": 0.948948948948949,
+      "grad_norm": 0.24021244049072266,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 10.3399,
+      "step": 79
+    },
+    {
+      "epoch": 0.960960960960961,
+      "grad_norm": 0.22334186732769012,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 10.3372,
+      "step": 80
+    },
+    {
+      "epoch": 0.972972972972973,
+      "grad_norm": 0.10378078371286392,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 10.3536,
+      "step": 81
+    },
+    {
+      "epoch": 0.984984984984985,
+      "grad_norm": 0.11866383254528046,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 10.356,
+      "step": 82
+    },
+    {
+      "epoch": 0.996996996996997,
+      "grad_norm": 0.11081431061029434,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 10.3428,
+      "step": 83
+    },
+    {
+      "epoch": 1.009009009009009,
+      "grad_norm": 0.3789661228656769,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 18.3146,
+      "step": 84
+    },
+    {
+      "epoch": 1.021021021021021,
+      "grad_norm": 0.11683385819196701,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 10.2473,
+      "step": 85
+    },
+    {
+      "epoch": 1.033033033033033,
+      "grad_norm": 0.14324098825454712,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 10.2616,
+      "step": 86
+    },
+    {
+      "epoch": 1.045045045045045,
+      "grad_norm": 0.1403503715991974,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 10.3972,
+      "step": 87
+    },
+    {
+      "epoch": 1.057057057057057,
+      "grad_norm": 0.10031662881374359,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 10.2818,
+      "step": 88
+    },
+    {
+      "epoch": 1.069069069069069,
+      "grad_norm": 0.12696465849876404,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 10.498,
+      "step": 89
+    },
+    {
+      "epoch": 1.0810810810810811,
+      "grad_norm": 0.11515042185783386,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 10.3381,
+      "step": 90
+    },
+    {
+      "epoch": 1.093093093093093,
+      "grad_norm": 0.09645378589630127,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 10.093,
+      "step": 91
+    },
+    {
+      "epoch": 1.105105105105105,
+      "grad_norm": 0.11583787947893143,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 10.5066,
+      "step": 92
+    },
+    {
+      "epoch": 1.117117117117117,
+      "grad_norm": 0.10351304709911346,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 10.373,
+      "step": 93
+    },
+    {
+      "epoch": 1.1291291291291292,
+      "grad_norm": 0.08083759248256683,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 10.389,
+      "step": 94
+    },
+    {
+      "epoch": 1.1411411411411412,
+      "grad_norm": 0.13066591322422028,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 10.7377,
+      "step": 95
+    },
+    {
+      "epoch": 1.1531531531531531,
+      "grad_norm": 0.10491601377725601,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 9.8073,
+      "step": 96
+    },
+    {
+      "epoch": 1.165165165165165,
+      "grad_norm": 0.11174864321947098,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 10.6638,
+      "step": 97
+    },
+    {
+      "epoch": 1.1771771771771773,
+      "grad_norm": 0.13597334921360016,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 10.1348,
+      "step": 98
+    },
+    {
+      "epoch": 1.1891891891891893,
+      "grad_norm": 0.13292589783668518,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 10.5918,
+      "step": 99
+    },
+    {
+      "epoch": 1.2012012012012012,
+      "grad_norm": 0.16977031528949738,
+      "learning_rate": 0.0,
+      "loss": 10.1499,
+      "step": 100
+    },
+    {
+      "epoch": 1.2012012012012012,
+      "eval_loss": 10.348917007446289,
+      "eval_runtime": 0.3124,
+      "eval_samples_per_second": 448.121,
+      "eval_steps_per_second": 112.03,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 10678649880576.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null