Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cc3a50c6ff5cc65ed81da623d743ab51532136fb0c3ad190dc345cd0d0906cb
 size 138995824

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0ee0b5fde164e42dbc7adba78901d6eb3c4117a5748ae53b15e11c2589b77b3
 size 138995824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e15a193a7e404868906ac97c436783ef62ed80e77f1b141fc438701e46464ba
 size 278249554

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5b6d93cba01fb7ab1dd004b8ea6e549a0b8723689347a558da13b3cb8570836
 size 278249554

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:882cadf0f16f6fed17a22db46b2b6d413a832cc42c4b260fdf6270fc45427d58
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:edc53f004550fbfb938771f96829dd9089de4bfcabc4ffac804d6825d2a023f5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c9fd70aa4cf68daad2242bc04a8a03f22adc681e42e4ebf5294902cea9d0a87
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2660377025604248,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.34305317324185247,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 35.487,
       "eval_steps_per_second": 17.78,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7212323517235200.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.2121704816818237,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.5145797598627787,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 35.487,
       "eval_steps_per_second": 17.78,
       "step": 100
+    },
+    {
+      "epoch": 0.346483704974271,
+      "grad_norm": 0.46960487961769104,
+      "learning_rate": 6.279213887972179e-05,
+      "loss": 1.3384,
+      "step": 101
+    },
+    {
+      "epoch": 0.34991423670668953,
+      "grad_norm": 0.4278581142425537,
+      "learning_rate": 6.189675975213094e-05,
+      "loss": 1.5108,
+      "step": 102
+    },
+    {
+      "epoch": 0.35334476843910806,
+      "grad_norm": 0.3242987394332886,
+      "learning_rate": 6.099731789198344e-05,
+      "loss": 1.4282,
+      "step": 103
+    },
+    {
+      "epoch": 0.3567753001715266,
+      "grad_norm": 0.2774030864238739,
+      "learning_rate": 6.009412045785051e-05,
+      "loss": 1.3264,
+      "step": 104
+    },
+    {
+      "epoch": 0.3602058319039451,
+      "grad_norm": 0.28905799984931946,
+      "learning_rate": 5.918747589082853e-05,
+      "loss": 1.3673,
+      "step": 105
+    },
+    {
+      "epoch": 0.36363636363636365,
+      "grad_norm": 0.286334753036499,
+      "learning_rate": 5.82776938092065e-05,
+      "loss": 1.3391,
+      "step": 106
+    },
+    {
+      "epoch": 0.3670668953687822,
+      "grad_norm": 0.28761571645736694,
+      "learning_rate": 5.736508490273188e-05,
+      "loss": 1.3136,
+      "step": 107
+    },
+    {
+      "epoch": 0.3704974271012007,
+      "grad_norm": 0.23601886630058289,
+      "learning_rate": 5.644996082651017e-05,
+      "loss": 1.4489,
+      "step": 108
+    },
+    {
+      "epoch": 0.37392795883361923,
+      "grad_norm": 0.26613518595695496,
+      "learning_rate": 5.553263409457504e-05,
+      "loss": 1.2763,
+      "step": 109
+    },
+    {
+      "epoch": 0.37735849056603776,
+      "grad_norm": 0.24539977312088013,
+      "learning_rate": 5.4613417973165106e-05,
+      "loss": 1.2729,
+      "step": 110
+    },
+    {
+      "epoch": 0.38078902229845624,
+      "grad_norm": 0.23666392266750336,
+      "learning_rate": 5.3692626373743706e-05,
+      "loss": 1.3449,
+      "step": 111
+    },
+    {
+      "epoch": 0.38421955403087477,
+      "grad_norm": 0.2275010645389557,
+      "learning_rate": 5.27705737457985e-05,
+      "loss": 1.2318,
+      "step": 112
+    },
+    {
+      "epoch": 0.3876500857632933,
+      "grad_norm": 0.24695216119289398,
+      "learning_rate": 5.184757496945726e-05,
+      "loss": 1.3218,
+      "step": 113
+    },
+    {
+      "epoch": 0.3910806174957118,
+      "grad_norm": 0.25049513578414917,
+      "learning_rate": 5.092394524795649e-05,
+      "loss": 1.3459,
+      "step": 114
+    },
+    {
+      "epoch": 0.39451114922813035,
+      "grad_norm": 0.2567203640937805,
+      "learning_rate": 5e-05,
+      "loss": 1.2598,
+      "step": 115
+    },
+    {
+      "epoch": 0.3979416809605489,
+      "grad_norm": 0.22766773402690887,
+      "learning_rate": 4.907605475204352e-05,
+      "loss": 1.217,
+      "step": 116
+    },
+    {
+      "epoch": 0.4013722126929674,
+      "grad_norm": 0.24498119950294495,
+      "learning_rate": 4.8152425030542766e-05,
+      "loss": 1.2414,
+      "step": 117
+    },
+    {
+      "epoch": 0.40480274442538594,
+      "grad_norm": 0.2613143026828766,
+      "learning_rate": 4.72294262542015e-05,
+      "loss": 1.2934,
+      "step": 118
+    },
+    {
+      "epoch": 0.40823327615780447,
+      "grad_norm": 0.2593250870704651,
+      "learning_rate": 4.6307373626256306e-05,
+      "loss": 1.2766,
+      "step": 119
+    },
+    {
+      "epoch": 0.411663807890223,
+      "grad_norm": 0.23566867411136627,
+      "learning_rate": 4.5386582026834906e-05,
+      "loss": 1.221,
+      "step": 120
+    },
+    {
+      "epoch": 0.41509433962264153,
+      "grad_norm": 0.2302766591310501,
+      "learning_rate": 4.446736590542497e-05,
+      "loss": 1.2444,
+      "step": 121
+    },
+    {
+      "epoch": 0.41852487135506006,
+      "grad_norm": 0.22572791576385498,
+      "learning_rate": 4.3550039173489845e-05,
+      "loss": 1.2369,
+      "step": 122
+    },
+    {
+      "epoch": 0.4219554030874786,
+      "grad_norm": 0.2492562234401703,
+      "learning_rate": 4.2634915097268115e-05,
+      "loss": 1.1794,
+      "step": 123
+    },
+    {
+      "epoch": 0.42538593481989706,
+      "grad_norm": 0.26300421357154846,
+      "learning_rate": 4.1722306190793495e-05,
+      "loss": 1.2073,
+      "step": 124
+    },
+    {
+      "epoch": 0.4288164665523156,
+      "grad_norm": 0.25651299953460693,
+      "learning_rate": 4.0812524109171476e-05,
+      "loss": 1.2113,
+      "step": 125
+    },
+    {
+      "epoch": 0.4322469982847341,
+      "grad_norm": 0.25439882278442383,
+      "learning_rate": 3.99058795421495e-05,
+      "loss": 1.3366,
+      "step": 126
+    },
+    {
+      "epoch": 0.43567753001715265,
+      "grad_norm": 0.25761932134628296,
+      "learning_rate": 3.9002682108016585e-05,
+      "loss": 1.2507,
+      "step": 127
+    },
+    {
+      "epoch": 0.4391080617495712,
+      "grad_norm": 0.25570839643478394,
+      "learning_rate": 3.8103240247869075e-05,
+      "loss": 1.2225,
+      "step": 128
+    },
+    {
+      "epoch": 0.4425385934819897,
+      "grad_norm": 0.28426146507263184,
+      "learning_rate": 3.720786112027822e-05,
+      "loss": 1.261,
+      "step": 129
+    },
+    {
+      "epoch": 0.44596912521440824,
+      "grad_norm": 0.2929293215274811,
+      "learning_rate": 3.631685049639586e-05,
+      "loss": 1.1955,
+      "step": 130
+    },
+    {
+      "epoch": 0.44939965694682676,
+      "grad_norm": 0.26186859607696533,
+      "learning_rate": 3.543051265553377e-05,
+      "loss": 1.2179,
+      "step": 131
+    },
+    {
+      "epoch": 0.4528301886792453,
+      "grad_norm": 0.2666632831096649,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 1.1792,
+      "step": 132
+    },
+    {
+      "epoch": 0.4562607204116638,
+      "grad_norm": 0.30045852065086365,
+      "learning_rate": 3.367306435799584e-05,
+      "loss": 1.2296,
+      "step": 133
+    },
+    {
+      "epoch": 0.45969125214408235,
+      "grad_norm": 0.2867948114871979,
+      "learning_rate": 3.2802554068303596e-05,
+      "loss": 1.166,
+      "step": 134
+    },
+    {
+      "epoch": 0.4631217838765009,
+      "grad_norm": 0.3068961501121521,
+      "learning_rate": 3.1937916690642356e-05,
+      "loss": 1.2112,
+      "step": 135
+    },
+    {
+      "epoch": 0.4665523156089194,
+      "grad_norm": 0.31013593077659607,
+      "learning_rate": 3.107944749788449e-05,
+      "loss": 1.2374,
+      "step": 136
+    },
+    {
+      "epoch": 0.4699828473413379,
+      "grad_norm": 0.3226790726184845,
+      "learning_rate": 3.0227439656472877e-05,
+      "loss": 1.2555,
+      "step": 137
+    },
+    {
+      "epoch": 0.4734133790737564,
+      "grad_norm": 0.30750301480293274,
+      "learning_rate": 2.9382184126304834e-05,
+      "loss": 1.2291,
+      "step": 138
+    },
+    {
+      "epoch": 0.47684391080617494,
+      "grad_norm": 0.3350450396537781,
+      "learning_rate": 2.8543969561369556e-05,
+      "loss": 1.2717,
+      "step": 139
+    },
+    {
+      "epoch": 0.48027444253859347,
+      "grad_norm": 0.33156758546829224,
+      "learning_rate": 2.771308221117309e-05,
+      "loss": 1.2334,
+      "step": 140
+    },
+    {
+      "epoch": 0.483704974271012,
+      "grad_norm": 0.3310351073741913,
+      "learning_rate": 2.688980582298435e-05,
+      "loss": 1.1668,
+      "step": 141
+    },
+    {
+      "epoch": 0.48713550600343053,
+      "grad_norm": 0.3672282099723816,
+      "learning_rate": 2.607442154493568e-05,
+      "loss": 1.1896,
+      "step": 142
+    },
+    {
+      "epoch": 0.49056603773584906,
+      "grad_norm": 0.38358932733535767,
+      "learning_rate": 2.5267207830011068e-05,
+      "loss": 1.2141,
+      "step": 143
+    },
+    {
+      "epoch": 0.4939965694682676,
+      "grad_norm": 0.3809894025325775,
+      "learning_rate": 2.446844034095466e-05,
+      "loss": 1.2576,
+      "step": 144
+    },
+    {
+      "epoch": 0.4974271012006861,
+      "grad_norm": 0.3670910894870758,
+      "learning_rate": 2.3678391856132204e-05,
+      "loss": 1.199,
+      "step": 145
+    },
+    {
+      "epoch": 0.5008576329331046,
+      "grad_norm": 0.4124976098537445,
+      "learning_rate": 2.2897332176377528e-05,
+      "loss": 1.169,
+      "step": 146
+    },
+    {
+      "epoch": 0.5042881646655232,
+      "grad_norm": 0.4204322099685669,
+      "learning_rate": 2.2125528032855724e-05,
+      "loss": 1.1429,
+      "step": 147
+    },
+    {
+      "epoch": 0.5077186963979416,
+      "grad_norm": 0.4493495523929596,
+      "learning_rate": 2.136324299597474e-05,
+      "loss": 1.2079,
+      "step": 148
+    },
+    {
+      "epoch": 0.5111492281303602,
+      "grad_norm": 0.5025396943092346,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 1.3403,
+      "step": 149
+    },
+    {
+      "epoch": 0.5145797598627787,
+      "grad_norm": 0.6223259568214417,
+      "learning_rate": 1.9868268181037185e-05,
+      "loss": 1.3662,
+      "step": 150
+    },
+    {
+      "epoch": 0.5145797598627787,
+      "eval_loss": 1.2121704816818237,
+      "eval_runtime": 13.7988,
+      "eval_samples_per_second": 35.583,
+      "eval_steps_per_second": 17.828,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.080131307700224e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null