Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8c02aa708c291e0eea70feb5e9f38cf9913e491c1f5818047df88b837b8e48a
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:11a8a134e697760cd920b025f558e56e5322cd52591e028f4c9dc8de18e2a890
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd251ba2660823f5a7c3937cc857fdf02f30399abfa5eaff4f414225121ffd04
 size 1279641042

 version https://git-lfs.github.com/spec/v1
+oid sha256:573957c3c709c22721d2676e7cde32d079e87daf4d70374a360a95e6084e7fae
 size 1279641042

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21279ccb5d0a5f42f8bbcd3e43f95f672c3da9da4b8cd0f63d1a5329b035a175
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f99d60c34c5e0afbaf90da0043467577d559b315976c49cee303ef279be3fcc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d689b2d605789fff660b4cfbb1ce6889a5251953118f474a71b33d74e2d19be7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.611772060394287,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.18570102135561745,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 13.409,
       "eval_steps_per_second": 1.684,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.7624822112256e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.974936306476593,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.3714020427112349,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.409,
       "eval_steps_per_second": 1.684,
       "step": 50
+    },
+    {
+      "epoch": 0.1894150417827298,
+      "grad_norm": 1.2142438888549805,
+      "learning_rate": 3.969463130731183e-06,
+      "loss": 0.8321,
+      "step": 51
+    },
+    {
+      "epoch": 0.19312906220984216,
+      "grad_norm": 2.127845048904419,
+      "learning_rate": 3.8772424536302565e-06,
+      "loss": 1.1498,
+      "step": 52
+    },
+    {
+      "epoch": 0.1968430826369545,
+      "grad_norm": 2.3324849605560303,
+      "learning_rate": 3.782248193514766e-06,
+      "loss": 1.2167,
+      "step": 53
+    },
+    {
+      "epoch": 0.20055710306406685,
+      "grad_norm": 2.875474691390991,
+      "learning_rate": 3.684671656182497e-06,
+      "loss": 1.3003,
+      "step": 54
+    },
+    {
+      "epoch": 0.2042711234911792,
+      "grad_norm": 2.1670145988464355,
+      "learning_rate": 3.5847093477938955e-06,
+      "loss": 1.3164,
+      "step": 55
+    },
+    {
+      "epoch": 0.20798514391829154,
+      "grad_norm": 2.674121379852295,
+      "learning_rate": 3.4825625791348093e-06,
+      "loss": 1.2361,
+      "step": 56
+    },
+    {
+      "epoch": 0.2116991643454039,
+      "grad_norm": 2.671863555908203,
+      "learning_rate": 3.3784370602033572e-06,
+      "loss": 1.2381,
+      "step": 57
+    },
+    {
+      "epoch": 0.21541318477251625,
+      "grad_norm": 2.4426980018615723,
+      "learning_rate": 3.272542485937369e-06,
+      "loss": 1.3495,
+      "step": 58
+    },
+    {
+      "epoch": 0.2191272051996286,
+      "grad_norm": 2.743527412414551,
+      "learning_rate": 3.165092113916688e-06,
+      "loss": 1.3194,
+      "step": 59
+    },
+    {
+      "epoch": 0.22284122562674094,
+      "grad_norm": 2.485083818435669,
+      "learning_rate": 3.056302334890786e-06,
+      "loss": 1.3947,
+      "step": 60
+    },
+    {
+      "epoch": 0.2265552460538533,
+      "grad_norm": 2.3887181282043457,
+      "learning_rate": 2.946392236996592e-06,
+      "loss": 1.2135,
+      "step": 61
+    },
+    {
+      "epoch": 0.23026926648096566,
+      "grad_norm": 2.7468771934509277,
+      "learning_rate": 2.835583164544139e-06,
+      "loss": 1.2523,
+      "step": 62
+    },
+    {
+      "epoch": 0.233983286908078,
+      "grad_norm": 2.5111334323883057,
+      "learning_rate": 2.724098272258584e-06,
+      "loss": 1.2471,
+      "step": 63
+    },
+    {
+      "epoch": 0.23769730733519034,
+      "grad_norm": 2.4568049907684326,
+      "learning_rate": 2.6121620758762877e-06,
+      "loss": 1.1975,
+      "step": 64
+    },
+    {
+      "epoch": 0.2414113277623027,
+      "grad_norm": 2.5363759994506836,
+      "learning_rate": 2.5e-06,
+      "loss": 1.1698,
+      "step": 65
+    },
+    {
+      "epoch": 0.24512534818941503,
+      "grad_norm": 2.9774973392486572,
+      "learning_rate": 2.3878379241237136e-06,
+      "loss": 0.9025,
+      "step": 66
+    },
+    {
+      "epoch": 0.2488393686165274,
+      "grad_norm": 3.5985605716705322,
+      "learning_rate": 2.2759017277414165e-06,
+      "loss": 1.22,
+      "step": 67
+    },
+    {
+      "epoch": 0.2525533890436397,
+      "grad_norm": 2.659881830215454,
+      "learning_rate": 2.1644168354558623e-06,
+      "loss": 1.1345,
+      "step": 68
+    },
+    {
+      "epoch": 0.2562674094707521,
+      "grad_norm": 2.756131172180176,
+      "learning_rate": 2.053607763003409e-06,
+      "loss": 1.1833,
+      "step": 69
+    },
+    {
+      "epoch": 0.25998142989786444,
+      "grad_norm": 2.8494410514831543,
+      "learning_rate": 1.9436976651092143e-06,
+      "loss": 1.3538,
+      "step": 70
+    },
+    {
+      "epoch": 0.26369545032497677,
+      "grad_norm": 2.196274518966675,
+      "learning_rate": 1.8349078860833125e-06,
+      "loss": 0.8695,
+      "step": 71
+    },
+    {
+      "epoch": 0.26740947075208915,
+      "grad_norm": 2.421745538711548,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 1.1275,
+      "step": 72
+    },
+    {
+      "epoch": 0.2711234911792015,
+      "grad_norm": 3.1137969493865967,
+      "learning_rate": 1.6215629397966432e-06,
+      "loss": 1.2171,
+      "step": 73
+    },
+    {
+      "epoch": 0.2748375116063138,
+      "grad_norm": 2.646501064300537,
+      "learning_rate": 1.5174374208651913e-06,
+      "loss": 1.234,
+      "step": 74
+    },
+    {
+      "epoch": 0.2785515320334262,
+      "grad_norm": 2.5165648460388184,
+      "learning_rate": 1.415290652206105e-06,
+      "loss": 1.1426,
+      "step": 75
+    },
+    {
+      "epoch": 0.2822655524605385,
+      "grad_norm": 2.521559476852417,
+      "learning_rate": 1.3153283438175036e-06,
+      "loss": 1.0068,
+      "step": 76
+    },
+    {
+      "epoch": 0.28597957288765086,
+      "grad_norm": 2.283813238143921,
+      "learning_rate": 1.217751806485235e-06,
+      "loss": 1.1615,
+      "step": 77
+    },
+    {
+      "epoch": 0.28969359331476324,
+      "grad_norm": 3.4305078983306885,
+      "learning_rate": 1.122757546369744e-06,
+      "loss": 1.1356,
+      "step": 78
+    },
+    {
+      "epoch": 0.2934076137418756,
+      "grad_norm": 2.714026927947998,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 1.086,
+      "step": 79
+    },
+    {
+      "epoch": 0.2971216341689879,
+      "grad_norm": 2.8863515853881836,
+      "learning_rate": 9.412754953531664e-07,
+      "loss": 0.854,
+      "step": 80
+    },
+    {
+      "epoch": 0.3008356545961003,
+      "grad_norm": 2.6695327758789062,
+      "learning_rate": 8.551531851507186e-07,
+      "loss": 1.1173,
+      "step": 81
+    },
+    {
+      "epoch": 0.3045496750232126,
+      "grad_norm": 2.6120901107788086,
+      "learning_rate": 7.723433775328385e-07,
+      "loss": 0.9034,
+      "step": 82
+    },
+    {
+      "epoch": 0.308263695450325,
+      "grad_norm": 2.606943368911743,
+      "learning_rate": 6.930128404315214e-07,
+      "loss": 1.0242,
+      "step": 83
+    },
+    {
+      "epoch": 0.31197771587743733,
+      "grad_norm": 2.955063819885254,
+      "learning_rate": 6.17321334990973e-07,
+      "loss": 0.9499,
+      "step": 84
+    },
+    {
+      "epoch": 0.31569173630454966,
+      "grad_norm": 2.4737842082977295,
+      "learning_rate": 5.454212938299256e-07,
+      "loss": 1.0119,
+      "step": 85
+    },
+    {
+      "epoch": 0.31940575673166205,
+      "grad_norm": 3.265795946121216,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 0.855,
+      "step": 86
+    },
+    {
+      "epoch": 0.3231197771587744,
+      "grad_norm": 3.479175090789795,
+      "learning_rate": 4.1356686569674344e-07,
+      "loss": 0.8147,
+      "step": 87
+    },
+    {
+      "epoch": 0.3268337975858867,
+      "grad_norm": 3.1530840396881104,
+      "learning_rate": 3.538780159953348e-07,
+      "loss": 1.1186,
+      "step": 88
+    },
+    {
+      "epoch": 0.3305478180129991,
+      "grad_norm": 4.578067779541016,
+      "learning_rate": 2.98511170358155e-07,
+      "loss": 0.9307,
+      "step": 89
+    },
+    {
+      "epoch": 0.3342618384401114,
+      "grad_norm": 3.7903590202331543,
+      "learning_rate": 2.4757783024395244e-07,
+      "loss": 0.8859,
+      "step": 90
+    },
+    {
+      "epoch": 0.33797585886722376,
+      "grad_norm": 4.344192028045654,
+      "learning_rate": 2.0118056862137358e-07,
+      "loss": 0.9719,
+      "step": 91
+    },
+    {
+      "epoch": 0.34168987929433614,
+      "grad_norm": 6.286164283752441,
+      "learning_rate": 1.59412823400657e-07,
+      "loss": 1.1164,
+      "step": 92
+    },
+    {
+      "epoch": 0.34540389972144847,
+      "grad_norm": 4.787858963012695,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 1.0283,
+      "step": 93
+    },
+    {
+      "epoch": 0.3491179201485608,
+      "grad_norm": 7.135058403015137,
+      "learning_rate": 9.00928482603669e-08,
+      "loss": 0.9365,
+      "step": 94
+    },
+    {
+      "epoch": 0.3528319405756732,
+      "grad_norm": 7.054929256439209,
+      "learning_rate": 6.268021954544095e-08,
+      "loss": 1.0428,
+      "step": 95
+    },
+    {
+      "epoch": 0.3565459610027855,
+      "grad_norm": 8.841050148010254,
+      "learning_rate": 4.017602850342584e-08,
+      "loss": 0.9227,
+      "step": 96
+    },
+    {
+      "epoch": 0.36025998142989785,
+      "grad_norm": 9.116321563720703,
+      "learning_rate": 2.262559558016325e-08,
+      "loss": 1.0278,
+      "step": 97
+    },
+    {
+      "epoch": 0.36397400185701023,
+      "grad_norm": 8.069734573364258,
+      "learning_rate": 1.006426501190233e-08,
+      "loss": 1.2367,
+      "step": 98
+    },
+    {
+      "epoch": 0.36768802228412256,
+      "grad_norm": 10.388524055480957,
+      "learning_rate": 2.5173336467135266e-09,
+      "loss": 1.0777,
+      "step": 99
+    },
+    {
+      "epoch": 0.3714020427112349,
+      "grad_norm": 9.496840476989746,
+      "learning_rate": 0.0,
+      "loss": 1.8257,
+      "step": 100
+    },
+    {
+      "epoch": 0.3714020427112349,
+      "eval_loss": 0.974936306476593,
+      "eval_runtime": 33.8821,
+      "eval_samples_per_second": 13.399,
+      "eval_steps_per_second": 1.682,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.35249644224512e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null