Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:844ca6a710741dbae123f11124fd45ebbd775591069ae1c095be0531420e8c4e
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6f3f00a37526f73aa9a46d560965c6e512d7db6f8250c623eb7a8320f53366f
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb6c68918be405ebbaf472903b2638500b74510dcf4a42e3b722d651b33f0c3a
 size 403961210

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0dfc927ce63e88f3d2b41db98fac65d0659fb38ae2b36aa9d40314117c7af06
 size 403961210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f02a89b1300864a6f89aa0f8d373a538621b90c0581e0d6ef037d60eb8b9092d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:04287158be3dd25edf2b56f89647fe89f21b744c8af7c8a445ffcfef6d76d93f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 4.852181434631348,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 1.415492957746479,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 48.819,
       "eval_steps_per_second": 6.509,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.06657392623616e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 4.300358295440674,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 2.830985915492958,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 48.819,
       "eval_steps_per_second": 6.509,
       "step": 50
+    },
+    {
+      "epoch": 1.443661971830986,
+      "grad_norm": 5.88961124420166,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 5.3644,
+      "step": 51
+    },
+    {
+      "epoch": 1.471830985915493,
+      "grad_norm": 4.989941120147705,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 5.3129,
+      "step": 52
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 5.352567672729492,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 5.1404,
+      "step": 53
+    },
+    {
+      "epoch": 1.528169014084507,
+      "grad_norm": 5.720556259155273,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 4.8448,
+      "step": 54
+    },
+    {
+      "epoch": 1.556338028169014,
+      "grad_norm": 5.738869667053223,
+      "learning_rate": 2.5e-06,
+      "loss": 5.2918,
+      "step": 55
+    },
+    {
+      "epoch": 1.584507042253521,
+      "grad_norm": 5.720866680145264,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 4.7,
+      "step": 56
+    },
+    {
+      "epoch": 1.612676056338028,
+      "grad_norm": 5.672741413116455,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 5.124,
+      "step": 57
+    },
+    {
+      "epoch": 1.6408450704225352,
+      "grad_norm": 5.708094120025635,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 5.3825,
+      "step": 58
+    },
+    {
+      "epoch": 1.6690140845070423,
+      "grad_norm": 4.969758033752441,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 5.2906,
+      "step": 59
+    },
+    {
+      "epoch": 1.6971830985915493,
+      "grad_norm": 4.994139194488525,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 5.1099,
+      "step": 60
+    },
+    {
+      "epoch": 1.7253521126760565,
+      "grad_norm": 5.776417255401611,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 4.8853,
+      "step": 61
+    },
+    {
+      "epoch": 1.7535211267605635,
+      "grad_norm": 5.504635334014893,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 4.8793,
+      "step": 62
+    },
+    {
+      "epoch": 1.7816901408450705,
+      "grad_norm": 5.382895469665527,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 4.9927,
+      "step": 63
+    },
+    {
+      "epoch": 1.8098591549295775,
+      "grad_norm": 5.304967403411865,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 4.8314,
+      "step": 64
+    },
+    {
+      "epoch": 1.8380281690140845,
+      "grad_norm": 5.1869893074035645,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 5.233,
+      "step": 65
+    },
+    {
+      "epoch": 1.8661971830985915,
+      "grad_norm": 6.1123366355896,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 4.9092,
+      "step": 66
+    },
+    {
+      "epoch": 1.8943661971830985,
+      "grad_norm": 5.411812782287598,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 5.1397,
+      "step": 67
+    },
+    {
+      "epoch": 1.9225352112676055,
+      "grad_norm": 5.654954433441162,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 4.6659,
+      "step": 68
+    },
+    {
+      "epoch": 1.9507042253521125,
+      "grad_norm": 5.411698341369629,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 4.8395,
+      "step": 69
+    },
+    {
+      "epoch": 1.9788732394366197,
+      "grad_norm": 5.4398064613342285,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 5.1385,
+      "step": 70
+    },
+    {
+      "epoch": 2.0140845070422535,
+      "grad_norm": 10.192140579223633,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 7.7338,
+      "step": 71
+    },
+    {
+      "epoch": 2.0422535211267605,
+      "grad_norm": 5.421514511108398,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 4.567,
+      "step": 72
+    },
+    {
+      "epoch": 2.0704225352112675,
+      "grad_norm": 5.557625770568848,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 4.3131,
+      "step": 73
+    },
+    {
+      "epoch": 2.0985915492957745,
+      "grad_norm": 5.216914653778076,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 4.5585,
+      "step": 74
+    },
+    {
+      "epoch": 2.1267605633802815,
+      "grad_norm": 5.993343830108643,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 4.6337,
+      "step": 75
+    },
+    {
+      "epoch": 2.1549295774647885,
+      "grad_norm": 5.094235420227051,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 4.8541,
+      "step": 76
+    },
+    {
+      "epoch": 2.183098591549296,
+      "grad_norm": 5.717433929443359,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 5.2967,
+      "step": 77
+    },
+    {
+      "epoch": 2.211267605633803,
+      "grad_norm": 5.293563365936279,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 4.8659,
+      "step": 78
+    },
+    {
+      "epoch": 2.23943661971831,
+      "grad_norm": 5.073064804077148,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 4.9172,
+      "step": 79
+    },
+    {
+      "epoch": 2.267605633802817,
+      "grad_norm": 5.354030609130859,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 4.6161,
+      "step": 80
+    },
+    {
+      "epoch": 2.295774647887324,
+      "grad_norm": 5.201179504394531,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 4.7591,
+      "step": 81
+    },
+    {
+      "epoch": 2.323943661971831,
+      "grad_norm": 5.040406227111816,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 4.7533,
+      "step": 82
+    },
+    {
+      "epoch": 2.352112676056338,
+      "grad_norm": 5.279065132141113,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 4.6149,
+      "step": 83
+    },
+    {
+      "epoch": 2.380281690140845,
+      "grad_norm": 5.657581329345703,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 4.4146,
+      "step": 84
+    },
+    {
+      "epoch": 2.408450704225352,
+      "grad_norm": 5.0008931159973145,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 4.374,
+      "step": 85
+    },
+    {
+      "epoch": 2.436619718309859,
+      "grad_norm": 5.283320426940918,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 4.5699,
+      "step": 86
+    },
+    {
+      "epoch": 2.464788732394366,
+      "grad_norm": 5.353503704071045,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 4.6215,
+      "step": 87
+    },
+    {
+      "epoch": 2.492957746478873,
+      "grad_norm": 5.431240081787109,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 4.5881,
+      "step": 88
+    },
+    {
+      "epoch": 2.52112676056338,
+      "grad_norm": 5.6447858810424805,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 4.2597,
+      "step": 89
+    },
+    {
+      "epoch": 2.5492957746478875,
+      "grad_norm": 5.344186305999756,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 4.4999,
+      "step": 90
+    },
+    {
+      "epoch": 2.5774647887323945,
+      "grad_norm": 5.2609477043151855,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 4.5457,
+      "step": 91
+    },
+    {
+      "epoch": 2.6056338028169015,
+      "grad_norm": 5.623130798339844,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 4.4276,
+      "step": 92
+    },
+    {
+      "epoch": 2.6338028169014085,
+      "grad_norm": 5.679134845733643,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 4.7285,
+      "step": 93
+    },
+    {
+      "epoch": 2.6619718309859155,
+      "grad_norm": 5.343556880950928,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 4.7053,
+      "step": 94
+    },
+    {
+      "epoch": 2.6901408450704225,
+      "grad_norm": 4.940552711486816,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 4.4612,
+      "step": 95
+    },
+    {
+      "epoch": 2.7183098591549295,
+      "grad_norm": 5.611880779266357,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 4.37,
+      "step": 96
+    },
+    {
+      "epoch": 2.7464788732394365,
+      "grad_norm": 5.197037220001221,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 4.5959,
+      "step": 97
+    },
+    {
+      "epoch": 2.7746478873239435,
+      "grad_norm": 5.572286605834961,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 4.5331,
+      "step": 98
+    },
+    {
+      "epoch": 2.802816901408451,
+      "grad_norm": 5.335962772369385,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 4.3457,
+      "step": 99
+    },
+    {
+      "epoch": 2.830985915492958,
+      "grad_norm": 5.259434223175049,
+      "learning_rate": 0.0,
+      "loss": 4.7794,
+      "step": 100
+    },
+    {
+      "epoch": 2.830985915492958,
+      "eval_loss": 4.300358295440674,
+      "eval_runtime": 1.2256,
+      "eval_samples_per_second": 48.955,
+      "eval_steps_per_second": 6.527,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.13314785247232e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null