Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ad034299d165264b8b41a122c56d71a999010ba87b11d35abc3d2ee85a97b1e
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec2d752a02af8402b9dfe36b8cb850ef9275254e5fbd85c32d97aa7ca7fee947
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:756425e312de41936aec7f4180fa852f994da9ce4947fa1da921d364519220dd
 size 1279641042

 version https://git-lfs.github.com/spec/v1
+oid sha256:0374069369e8a7217661d09ac8547dd9fdfe8c4e569774331d4eb8f7a7f44627
 size 1279641042

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2111f77cfa041296a40136fcee994c66ac99a1fb2d045ace1987fa2742bd23e5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e0deb8643bb1a683474955de4e095d8a8dbd40933ca79a7c1191a275e516ab5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d90116c540b4ff0066495fbccc9c914a568905fb44c6564f227952cc4231b00
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:03ad66011cfc1fc727a51190602a41adc332b48eeef62a5ee87c2ca9f9b90b2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.1654907464981079,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.04177109440267335,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 12.884,
       "eval_steps_per_second": 3.221,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.6990316003328e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.15805459022521973,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0835421888053467,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.884,
       "eval_steps_per_second": 3.221,
       "step": 50
+    },
+    {
+      "epoch": 0.042606516290726815,
+      "grad_norm": 0.3080545663833618,
+      "learning_rate": 2.3816778784387097e-05,
+      "loss": 0.1177,
+      "step": 51
+    },
+    {
+      "epoch": 0.04344193817878028,
+      "grad_norm": 0.40985429286956787,
+      "learning_rate": 2.3263454721781537e-05,
+      "loss": 0.1433,
+      "step": 52
+    },
+    {
+      "epoch": 0.04427736006683375,
+      "grad_norm": 0.15664848685264587,
+      "learning_rate": 2.2693489161088592e-05,
+      "loss": 0.0884,
+      "step": 53
+    },
+    {
+      "epoch": 0.045112781954887216,
+      "grad_norm": 0.15345360338687897,
+      "learning_rate": 2.210802993709498e-05,
+      "loss": 0.1044,
+      "step": 54
+    },
+    {
+      "epoch": 0.04594820384294068,
+      "grad_norm": 0.20342934131622314,
+      "learning_rate": 2.1508256086763372e-05,
+      "loss": 0.1088,
+      "step": 55
+    },
+    {
+      "epoch": 0.04678362573099415,
+      "grad_norm": 0.16992981731891632,
+      "learning_rate": 2.0895375474808857e-05,
+      "loss": 0.1459,
+      "step": 56
+    },
+    {
+      "epoch": 0.047619047619047616,
+      "grad_norm": 0.17031916975975037,
+      "learning_rate": 2.0270622361220143e-05,
+      "loss": 0.1282,
+      "step": 57
+    },
+    {
+      "epoch": 0.04845446950710108,
+      "grad_norm": 0.2852511703968048,
+      "learning_rate": 1.963525491562421e-05,
+      "loss": 0.1329,
+      "step": 58
+    },
+    {
+      "epoch": 0.04928989139515455,
+      "grad_norm": 0.4877017140388489,
+      "learning_rate": 1.8990552683500128e-05,
+      "loss": 0.1365,
+      "step": 59
+    },
+    {
+      "epoch": 0.05012531328320802,
+      "grad_norm": 0.3902769684791565,
+      "learning_rate": 1.8337814009344716e-05,
+      "loss": 0.1584,
+      "step": 60
+    },
+    {
+      "epoch": 0.050960735171261484,
+      "grad_norm": 0.2292199432849884,
+      "learning_rate": 1.767835342197955e-05,
+      "loss": 0.1402,
+      "step": 61
+    },
+    {
+      "epoch": 0.05179615705931495,
+      "grad_norm": 0.2017497420310974,
+      "learning_rate": 1.7013498987264832e-05,
+      "loss": 0.1482,
+      "step": 62
+    },
+    {
+      "epoch": 0.05263157894736842,
+      "grad_norm": 0.2939629852771759,
+      "learning_rate": 1.6344589633551502e-05,
+      "loss": 0.1461,
+      "step": 63
+    },
+    {
+      "epoch": 0.053467000835421885,
+      "grad_norm": 0.1926645189523697,
+      "learning_rate": 1.5672972455257726e-05,
+      "loss": 0.1044,
+      "step": 64
+    },
+    {
+      "epoch": 0.05430242272347535,
+      "grad_norm": 0.3081521987915039,
+      "learning_rate": 1.5e-05,
+      "loss": 0.1596,
+      "step": 65
+    },
+    {
+      "epoch": 0.05513784461152882,
+      "grad_norm": 0.4025512933731079,
+      "learning_rate": 1.4327027544742281e-05,
+      "loss": 0.1735,
+      "step": 66
+    },
+    {
+      "epoch": 0.055973266499582286,
+      "grad_norm": 0.20314837992191315,
+      "learning_rate": 1.36554103664485e-05,
+      "loss": 0.125,
+      "step": 67
+    },
+    {
+      "epoch": 0.05680868838763575,
+      "grad_norm": 0.20744721591472626,
+      "learning_rate": 1.2986501012735174e-05,
+      "loss": 0.1095,
+      "step": 68
+    },
+    {
+      "epoch": 0.05764411027568922,
+      "grad_norm": 0.2043089121580124,
+      "learning_rate": 1.2321646578020452e-05,
+      "loss": 0.1544,
+      "step": 69
+    },
+    {
+      "epoch": 0.05847953216374269,
+      "grad_norm": 0.2829361855983734,
+      "learning_rate": 1.1662185990655285e-05,
+      "loss": 0.1347,
+      "step": 70
+    },
+    {
+      "epoch": 0.059314954051796154,
+      "grad_norm": 0.2790044844150543,
+      "learning_rate": 1.1009447316499875e-05,
+      "loss": 0.1564,
+      "step": 71
+    },
+    {
+      "epoch": 0.06015037593984962,
+      "grad_norm": 2.54337739944458,
+      "learning_rate": 1.036474508437579e-05,
+      "loss": 0.1629,
+      "step": 72
+    },
+    {
+      "epoch": 0.06098579782790309,
+      "grad_norm": 0.279619425535202,
+      "learning_rate": 9.729377638779859e-06,
+      "loss": 0.1506,
+      "step": 73
+    },
+    {
+      "epoch": 0.061821219715956555,
+      "grad_norm": 0.2148342877626419,
+      "learning_rate": 9.104624525191147e-06,
+      "loss": 0.1222,
+      "step": 74
+    },
+    {
+      "epoch": 0.06265664160401002,
+      "grad_norm": 3.0420451164245605,
+      "learning_rate": 8.491743913236629e-06,
+      "loss": 0.1404,
+      "step": 75
+    },
+    {
+      "epoch": 0.06349206349206349,
+      "grad_norm": 0.2432384490966797,
+      "learning_rate": 7.89197006290502e-06,
+      "loss": 0.1076,
+      "step": 76
+    },
+    {
+      "epoch": 0.06432748538011696,
+      "grad_norm": 3.1877377033233643,
+      "learning_rate": 7.30651083891141e-06,
+      "loss": 0.152,
+      "step": 77
+    },
+    {
+      "epoch": 0.06516290726817042,
+      "grad_norm": 0.4180755019187927,
+      "learning_rate": 6.736545278218464e-06,
+      "loss": 0.1127,
+      "step": 78
+    },
+    {
+      "epoch": 0.06599832915622389,
+      "grad_norm": 0.25477147102355957,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 0.1277,
+      "step": 79
+    },
+    {
+      "epoch": 0.06683375104427736,
+      "grad_norm": 0.399618536233902,
+      "learning_rate": 5.647652972118998e-06,
+      "loss": 0.1418,
+      "step": 80
+    },
+    {
+      "epoch": 0.06766917293233082,
+      "grad_norm": 0.27185890078544617,
+      "learning_rate": 5.130919110904311e-06,
+      "loss": 0.155,
+      "step": 81
+    },
+    {
+      "epoch": 0.06850459482038429,
+      "grad_norm": 1.3203309774398804,
+      "learning_rate": 4.6340602651970304e-06,
+      "loss": 0.3033,
+      "step": 82
+    },
+    {
+      "epoch": 0.06934001670843776,
+      "grad_norm": 0.3346821367740631,
+      "learning_rate": 4.158077042589129e-06,
+      "loss": 0.1639,
+      "step": 83
+    },
+    {
+      "epoch": 0.07017543859649122,
+      "grad_norm": 0.3132845163345337,
+      "learning_rate": 3.7039280099458373e-06,
+      "loss": 0.1417,
+      "step": 84
+    },
+    {
+      "epoch": 0.07101086048454469,
+      "grad_norm": 0.30599430203437805,
+      "learning_rate": 3.272527762979553e-06,
+      "loss": 0.1666,
+      "step": 85
+    },
+    {
+      "epoch": 0.07184628237259816,
+      "grad_norm": 0.5425288677215576,
+      "learning_rate": 2.86474508437579e-06,
+      "loss": 0.1837,
+      "step": 86
+    },
+    {
+      "epoch": 0.07268170426065163,
+      "grad_norm": 0.2609088122844696,
+      "learning_rate": 2.4814011941804603e-06,
+      "loss": 0.1981,
+      "step": 87
+    },
+    {
+      "epoch": 0.07351712614870509,
+      "grad_norm": 0.2561860680580139,
+      "learning_rate": 2.1232680959720085e-06,
+      "loss": 0.1169,
+      "step": 88
+    },
+    {
+      "epoch": 0.07435254803675856,
+      "grad_norm": 0.26870644092559814,
+      "learning_rate": 1.79106702214893e-06,
+      "loss": 0.162,
+      "step": 89
+    },
+    {
+      "epoch": 0.07518796992481203,
+      "grad_norm": 0.35758742690086365,
+      "learning_rate": 1.4854669814637145e-06,
+      "loss": 0.1946,
+      "step": 90
+    },
+    {
+      "epoch": 0.07602339181286549,
+      "grad_norm": 0.40720826387405396,
+      "learning_rate": 1.2070834117282414e-06,
+      "loss": 0.1676,
+      "step": 91
+    },
+    {
+      "epoch": 0.07685881370091896,
+      "grad_norm": 0.2641846835613251,
+      "learning_rate": 9.56476940403942e-07,
+      "loss": 0.1419,
+      "step": 92
+    },
+    {
+      "epoch": 0.07769423558897243,
+      "grad_norm": 0.32390695810317993,
+      "learning_rate": 7.341522555726971e-07,
+      "loss": 0.1727,
+      "step": 93
+    },
+    {
+      "epoch": 0.0785296574770259,
+      "grad_norm": 0.17990685999393463,
+      "learning_rate": 5.405570895622014e-07,
+      "loss": 0.1278,
+      "step": 94
+    },
+    {
+      "epoch": 0.07936507936507936,
+      "grad_norm": 0.5421126484870911,
+      "learning_rate": 3.760813172726457e-07,
+      "loss": 0.1988,
+      "step": 95
+    },
+    {
+      "epoch": 0.08020050125313283,
+      "grad_norm": 0.22047364711761475,
+      "learning_rate": 2.41056171020555e-07,
+      "loss": 0.1636,
+      "step": 96
+    },
+    {
+      "epoch": 0.0810359231411863,
+      "grad_norm": 0.2979952096939087,
+      "learning_rate": 1.357535734809795e-07,
+      "loss": 0.1766,
+      "step": 97
+    },
+    {
+      "epoch": 0.08187134502923976,
+      "grad_norm": 0.4420830309391022,
+      "learning_rate": 6.038559007141397e-08,
+      "loss": 0.2073,
+      "step": 98
+    },
+    {
+      "epoch": 0.08270676691729323,
+      "grad_norm": 0.8136470913887024,
+      "learning_rate": 1.510400188028116e-08,
+      "loss": 0.2351,
+      "step": 99
+    },
+    {
+      "epoch": 0.0835421888053467,
+      "grad_norm": 0.37833961844444275,
+      "learning_rate": 0.0,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 0.0835421888053467,
+      "eval_loss": 0.15805459022521973,
+      "eval_runtime": 39.1836,
+      "eval_samples_per_second": 12.863,
+      "eval_steps_per_second": 3.216,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.3812411056128e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null