Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26e8628a8d7bea74ee851ff66b97cb50b9897f2507a54775053e8e5e3b0c8425
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:2bc786fd137e09b42f1ccfff32915b3bb732495edc3bacbaeda33207762528c7
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32881490352b36bdc3f694c997afe2d8466743bcd9aff85fdb4556d2981c7ae7
 size 75471860

 version https://git-lfs.github.com/spec/v1
+oid sha256:12c899ec00fa2783dcf6cbb846a5ccfa74c560c421e734ad7e72314d67c8815d
 size 75471860

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8172f37e559a8de245dc068991bed2f9eed8dd993dcff974d4b7d2f03fbe5b72
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:44e951fbacd8bcfef4738cd1ec0e619c6182c9946d6b1b168b9c160d68fc01ff
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a89ffc445067fef9d6d02bb3ff9e61d5e3209e6fa67c7259b3b364b90dbaa2cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.702011823654175,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.00898593700858157,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 25.39,
       "eval_steps_per_second": 6.347,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.7945896993947648e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.543463706970215,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.01797187401716314,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 25.39,
       "eval_steps_per_second": 6.347,
       "step": 50
+    },
+    {
+      "epoch": 0.009165655748753202,
+      "grad_norm": 4.447997570037842,
+      "learning_rate": 5.695865504800327e-05,
+      "loss": 3.1439,
+      "step": 51
+    },
+    {
+      "epoch": 0.009345374488924833,
+      "grad_norm": 4.879358291625977,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 3.0015,
+      "step": 52
+    },
+    {
+      "epoch": 0.009525093229096464,
+      "grad_norm": 3.736008882522583,
+      "learning_rate": 5.348782368720626e-05,
+      "loss": 2.9797,
+      "step": 53
+    },
+    {
+      "epoch": 0.009704811969268095,
+      "grad_norm": 3.3615059852600098,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 3.3752,
+      "step": 54
+    },
+    {
+      "epoch": 0.009884530709439726,
+      "grad_norm": 2.5511536598205566,
+      "learning_rate": 5e-05,
+      "loss": 2.8825,
+      "step": 55
+    },
+    {
+      "epoch": 0.010064249449611358,
+      "grad_norm": 2.8620405197143555,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 2.9289,
+      "step": 56
+    },
+    {
+      "epoch": 0.010243968189782989,
+      "grad_norm": 2.905618190765381,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 3.1637,
+      "step": 57
+    },
+    {
+      "epoch": 0.010423686929954622,
+      "grad_norm": 3.0646848678588867,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 3.0125,
+      "step": 58
+    },
+    {
+      "epoch": 0.010603405670126253,
+      "grad_norm": 3.3001129627227783,
+      "learning_rate": 4.3041344951996746e-05,
+      "loss": 3.1014,
+      "step": 59
+    },
+    {
+      "epoch": 0.010783124410297884,
+      "grad_norm": 3.306954860687256,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 3.4818,
+      "step": 60
+    },
+    {
+      "epoch": 0.010962843150469515,
+      "grad_norm": 3.376537561416626,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 3.5095,
+      "step": 61
+    },
+    {
+      "epoch": 0.011142561890641146,
+      "grad_norm": 3.216480016708374,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 3.4539,
+      "step": 62
+    },
+    {
+      "epoch": 0.011322280630812778,
+      "grad_norm": 3.4549150466918945,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 3.3442,
+      "step": 63
+    },
+    {
+      "epoch": 0.011501999370984409,
+      "grad_norm": 3.040135145187378,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 3.3466,
+      "step": 64
+    },
+    {
+      "epoch": 0.011681718111156042,
+      "grad_norm": 3.064269542694092,
+      "learning_rate": 3.289899283371657e-05,
+      "loss": 3.2623,
+      "step": 65
+    },
+    {
+      "epoch": 0.011861436851327673,
+      "grad_norm": 3.851158618927002,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 3.8449,
+      "step": 66
+    },
+    {
+      "epoch": 0.012041155591499304,
+      "grad_norm": 3.46075177192688,
+      "learning_rate": 2.9663167846209998e-05,
+      "loss": 3.4347,
+      "step": 67
+    },
+    {
+      "epoch": 0.012220874331670935,
+      "grad_norm": 3.6675915718078613,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 4.0072,
+      "step": 68
+    },
+    {
+      "epoch": 0.012400593071842566,
+      "grad_norm": 3.480902671813965,
+      "learning_rate": 2.6526421860705473e-05,
+      "loss": 2.4953,
+      "step": 69
+    },
+    {
+      "epoch": 0.012580311812014197,
+      "grad_norm": 4.579424858093262,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 2.6752,
+      "step": 70
+    },
+    {
+      "epoch": 0.012760030552185829,
+      "grad_norm": 3.595804214477539,
+      "learning_rate": 2.350403678833976e-05,
+      "loss": 2.4067,
+      "step": 71
+    },
+    {
+      "epoch": 0.01293974929235746,
+      "grad_norm": 3.2332661151885986,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 2.1416,
+      "step": 72
+    },
+    {
+      "epoch": 0.013119468032529093,
+      "grad_norm": 3.2234134674072266,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 2.7848,
+      "step": 73
+    },
+    {
+      "epoch": 0.013299186772700724,
+      "grad_norm": 3.04959774017334,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 2.432,
+      "step": 74
+    },
+    {
+      "epoch": 0.013478905512872355,
+      "grad_norm": 2.643514633178711,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 2.0934,
+      "step": 75
+    },
+    {
+      "epoch": 0.013658624253043986,
+      "grad_norm": 3.055098056793213,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 2.3272,
+      "step": 76
+    },
+    {
+      "epoch": 0.013838342993215617,
+      "grad_norm": 2.4809956550598145,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 2.2483,
+      "step": 77
+    },
+    {
+      "epoch": 0.014018061733387249,
+      "grad_norm": 2.64147686958313,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 2.3055,
+      "step": 78
+    },
+    {
+      "epoch": 0.01419778047355888,
+      "grad_norm": 2.2932167053222656,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 2.0433,
+      "step": 79
+    },
+    {
+      "epoch": 0.014377499213730511,
+      "grad_norm": 2.3609931468963623,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 2.3359,
+      "step": 80
+    },
+    {
+      "epoch": 0.014557217953902144,
+      "grad_norm": 2.249638080596924,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 2.1387,
+      "step": 81
+    },
+    {
+      "epoch": 0.014736936694073775,
+      "grad_norm": 2.8411879539489746,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 2.463,
+      "step": 82
+    },
+    {
+      "epoch": 0.014916655434245406,
+      "grad_norm": 2.662808418273926,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 2.5389,
+      "step": 83
+    },
+    {
+      "epoch": 0.015096374174417037,
+      "grad_norm": 2.8071436882019043,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 2.1706,
+      "step": 84
+    },
+    {
+      "epoch": 0.015276092914588668,
+      "grad_norm": 2.656956911087036,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 2.3759,
+      "step": 85
+    },
+    {
+      "epoch": 0.0154558116547603,
+      "grad_norm": 3.031149387359619,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 2.2298,
+      "step": 86
+    },
+    {
+      "epoch": 0.015635530394931933,
+      "grad_norm": 2.755890369415283,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 2.4255,
+      "step": 87
+    },
+    {
+      "epoch": 0.015815249135103564,
+      "grad_norm": 2.5731019973754883,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 2.2818,
+      "step": 88
+    },
+    {
+      "epoch": 0.015994967875275195,
+      "grad_norm": 2.7685189247131348,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 2.3691,
+      "step": 89
+    },
+    {
+      "epoch": 0.016174686615446826,
+      "grad_norm": 2.6368041038513184,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 2.4944,
+      "step": 90
+    },
+    {
+      "epoch": 0.016354405355618457,
+      "grad_norm": 2.858224630355835,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 2.2883,
+      "step": 91
+    },
+    {
+      "epoch": 0.01653412409579009,
+      "grad_norm": 2.793339252471924,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 2.4953,
+      "step": 92
+    },
+    {
+      "epoch": 0.01671384283596172,
+      "grad_norm": 2.8187801837921143,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 2.2414,
+      "step": 93
+    },
+    {
+      "epoch": 0.01689356157613335,
+      "grad_norm": 2.9361603260040283,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 2.4021,
+      "step": 94
+    },
+    {
+      "epoch": 0.017073280316304982,
+      "grad_norm": 2.7744662761688232,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 2.1991,
+      "step": 95
+    },
+    {
+      "epoch": 0.017252999056476613,
+      "grad_norm": 2.6500556468963623,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 2.2625,
+      "step": 96
+    },
+    {
+      "epoch": 0.017432717796648244,
+      "grad_norm": 2.4501571655273438,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 2.1335,
+      "step": 97
+    },
+    {
+      "epoch": 0.017612436536819875,
+      "grad_norm": 2.76723575592041,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 2.0047,
+      "step": 98
+    },
+    {
+      "epoch": 0.017792155276991507,
+      "grad_norm": 2.838305711746216,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 2.3863,
+      "step": 99
+    },
+    {
+      "epoch": 0.01797187401716314,
+      "grad_norm": 3.0587007999420166,
+      "learning_rate": 0.0,
+      "loss": 2.2002,
+      "step": 100
+    },
+    {
+      "epoch": 0.01797187401716314,
+      "eval_loss": 2.543463706970215,
+      "eval_runtime": 368.7,
+      "eval_samples_per_second": 25.419,
+      "eval_steps_per_second": 6.355,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.609045705424896e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null