Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e075b6709016e331e5bafe702050cc78ad9252fd5770521bccfaa37cba8a06d1
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff33f992fb27383e85fe93b11989f3c7636456acf6498e5ef658e1718aeb9331
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c426216524b1c9128375d0bd15970869bdcd36f213db99cd18f034ef2482835a
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e2c29918aef0f6daf84b8a52c47ed39176c229d030db14ddebdea59bfbce04f
 size 118090

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2191b05dbdb045edad9efa08fb329880cfba67a1b03475ce1f64f0fa420e4e2a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:32c0575a4e86cdb910863db98f1f30ff2a9c0a1f845a4da4f00ef74648322528
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.343100547790527,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.03797228023542814,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 491.624,
       "eval_steps_per_second": 123.017,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5243320270848.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.342756271362305,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.07594456047085628,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 491.624,
       "eval_steps_per_second": 123.017,
       "step": 50
+    },
+    {
+      "epoch": 0.0387317258401367,
+      "grad_norm": 0.1932212859392166,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 10.3335,
+      "step": 51
+    },
+    {
+      "epoch": 0.03949117144484526,
+      "grad_norm": 0.20674271881580353,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 10.3403,
+      "step": 52
+    },
+    {
+      "epoch": 0.04025061704955383,
+      "grad_norm": 0.16978555917739868,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 10.3549,
+      "step": 53
+    },
+    {
+      "epoch": 0.04101006265426239,
+      "grad_norm": 0.2252632975578308,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 10.3375,
+      "step": 54
+    },
+    {
+      "epoch": 0.04176950825897095,
+      "grad_norm": 0.21015048027038574,
+      "learning_rate": 2.5e-06,
+      "loss": 10.3468,
+      "step": 55
+    },
+    {
+      "epoch": 0.04252895386367951,
+      "grad_norm": 0.19843299686908722,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 10.3563,
+      "step": 56
+    },
+    {
+      "epoch": 0.04328839946838808,
+      "grad_norm": 0.20154882967472076,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 10.3559,
+      "step": 57
+    },
+    {
+      "epoch": 0.04404784507309664,
+      "grad_norm": 0.2109687328338623,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 10.3398,
+      "step": 58
+    },
+    {
+      "epoch": 0.0448072906778052,
+      "grad_norm": 0.2115778774023056,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 10.3424,
+      "step": 59
+    },
+    {
+      "epoch": 0.045566736282513766,
+      "grad_norm": 0.23688730597496033,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 10.3374,
+      "step": 60
+    },
+    {
+      "epoch": 0.046326181887222324,
+      "grad_norm": 0.20714813470840454,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 10.3575,
+      "step": 61
+    },
+    {
+      "epoch": 0.04708562749193089,
+      "grad_norm": 0.2432180792093277,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 10.3398,
+      "step": 62
+    },
+    {
+      "epoch": 0.047845073096639455,
+      "grad_norm": 0.19799984991550446,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 10.353,
+      "step": 63
+    },
+    {
+      "epoch": 0.048604518701348014,
+      "grad_norm": 0.20568235218524933,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 10.3497,
+      "step": 64
+    },
+    {
+      "epoch": 0.04936396430605658,
+      "grad_norm": 0.20056724548339844,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 10.3471,
+      "step": 65
+    },
+    {
+      "epoch": 0.050123409910765145,
+      "grad_norm": 0.18637068569660187,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 10.3384,
+      "step": 66
+    },
+    {
+      "epoch": 0.0508828555154737,
+      "grad_norm": 0.22628478705883026,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 10.3308,
+      "step": 67
+    },
+    {
+      "epoch": 0.05164230112018227,
+      "grad_norm": 0.2501058578491211,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 10.341,
+      "step": 68
+    },
+    {
+      "epoch": 0.05240174672489083,
+      "grad_norm": 0.22660444676876068,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 10.3273,
+      "step": 69
+    },
+    {
+      "epoch": 0.05316119232959939,
+      "grad_norm": 0.20599226653575897,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 10.3412,
+      "step": 70
+    },
+    {
+      "epoch": 0.05392063793430796,
+      "grad_norm": 0.21871422231197357,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 10.3326,
+      "step": 71
+    },
+    {
+      "epoch": 0.054680083539016516,
+      "grad_norm": 0.237649068236351,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 10.348,
+      "step": 72
+    },
+    {
+      "epoch": 0.05543952914372508,
+      "grad_norm": 0.23702602088451385,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 10.3462,
+      "step": 73
+    },
+    {
+      "epoch": 0.05619897474843364,
+      "grad_norm": 0.22213907539844513,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 10.3396,
+      "step": 74
+    },
+    {
+      "epoch": 0.056958420353142206,
+      "grad_norm": 0.2493429332971573,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 10.3436,
+      "step": 75
+    },
+    {
+      "epoch": 0.05771786595785077,
+      "grad_norm": 0.21368595957756042,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 10.3512,
+      "step": 76
+    },
+    {
+      "epoch": 0.05847731156255933,
+      "grad_norm": 0.2322058379650116,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 10.3496,
+      "step": 77
+    },
+    {
+      "epoch": 0.059236757167267895,
+      "grad_norm": 0.23094195127487183,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 10.3491,
+      "step": 78
+    },
+    {
+      "epoch": 0.05999620277197646,
+      "grad_norm": 0.261568158864975,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 10.3489,
+      "step": 79
+    },
+    {
+      "epoch": 0.06075564837668502,
+      "grad_norm": 0.23905989527702332,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 10.3485,
+      "step": 80
+    },
+    {
+      "epoch": 0.061515093981393584,
+      "grad_norm": 0.2564074695110321,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 10.3419,
+      "step": 81
+    },
+    {
+      "epoch": 0.06227453958610214,
+      "grad_norm": 0.24325355887413025,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 10.3518,
+      "step": 82
+    },
+    {
+      "epoch": 0.0630339851908107,
+      "grad_norm": 0.21921157836914062,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 10.3405,
+      "step": 83
+    },
+    {
+      "epoch": 0.06379343079551927,
+      "grad_norm": 0.21654434502124786,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 10.3553,
+      "step": 84
+    },
+    {
+      "epoch": 0.06455287640022783,
+      "grad_norm": 0.2687203586101532,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 10.3464,
+      "step": 85
+    },
+    {
+      "epoch": 0.06531232200493639,
+      "grad_norm": 0.21483434736728668,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 10.3408,
+      "step": 86
+    },
+    {
+      "epoch": 0.06607176760964496,
+      "grad_norm": 0.21876099705696106,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 10.3406,
+      "step": 87
+    },
+    {
+      "epoch": 0.06683121321435352,
+      "grad_norm": 0.22782814502716064,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 10.3377,
+      "step": 88
+    },
+    {
+      "epoch": 0.06759065881906208,
+      "grad_norm": 0.3331179618835449,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 10.3426,
+      "step": 89
+    },
+    {
+      "epoch": 0.06835010442377065,
+      "grad_norm": 0.24991817772388458,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 10.3352,
+      "step": 90
+    },
+    {
+      "epoch": 0.06910955002847921,
+      "grad_norm": 0.2640218734741211,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 10.3305,
+      "step": 91
+    },
+    {
+      "epoch": 0.06986899563318777,
+      "grad_norm": 0.2834155559539795,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 10.3433,
+      "step": 92
+    },
+    {
+      "epoch": 0.07062844123789634,
+      "grad_norm": 0.283803790807724,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 10.3395,
+      "step": 93
+    },
+    {
+      "epoch": 0.0713878868426049,
+      "grad_norm": 0.2507743239402771,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 10.3413,
+      "step": 94
+    },
+    {
+      "epoch": 0.07214733244731346,
+      "grad_norm": 0.2620439827442169,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 10.3463,
+      "step": 95
+    },
+    {
+      "epoch": 0.07290677805202202,
+      "grad_norm": 0.2948286235332489,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 10.3418,
+      "step": 96
+    },
+    {
+      "epoch": 0.07366622365673059,
+      "grad_norm": 0.3654595613479614,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 10.3411,
+      "step": 97
+    },
+    {
+      "epoch": 0.07442566926143915,
+      "grad_norm": 0.3371923863887787,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 10.3374,
+      "step": 98
+    },
+    {
+      "epoch": 0.0751851148661477,
+      "grad_norm": 0.3659217357635498,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 10.3461,
+      "step": 99
+    },
+    {
+      "epoch": 0.07594456047085628,
+      "grad_norm": 0.2947441041469574,
+      "learning_rate": 0.0,
+      "loss": 10.3475,
+      "step": 100
+    },
+    {
+      "epoch": 0.07594456047085628,
+      "eval_loss": 10.342756271362305,
+      "eval_runtime": 4.5134,
+      "eval_samples_per_second": 491.43,
+      "eval_steps_per_second": 122.968,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 10473564930048.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null