Training in progress, step 1956, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +312 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e893d538403d4ac222e2baaf746a33535ee8031c07cf1939cc3355ea15106a0
 size 2503003904

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e0a562a914d2be2b4ee279fe187629b69bd04971e3acab4f1c60939e5ec5996
 size 2503003904

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbf245997dbc83cd89bcfb5067dfa742724b5f13ff1993cd0ad6d3d60a4c987a
 size 5006244836

 version https://git-lfs.github.com/spec/v1
+oid sha256:9234c66d2cf43b48b78afca3f04cf7c13b9d3436c6cfdb169398c3ecd80cfe02
 size 5006244836

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12f09fa1a152c2febaa1b0be3c98d7abd70a22c5965d994af5b7173cc3e6ff7f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:48c25ffa179744c0719c7b65566206a3ffbc025b1b73bf62d6945f9035c21dfa
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c767cedc54b733779ba8a20f635d848598fd89e5cfee0706f6c63df8c1e6b2d8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:98f465f8ef34d3200760108c9ddb9bd27e97b140bce5b4d84a91de037dadb420
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9775051124744376,
   "eval_steps": 500,
-  "global_step": 1912,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -13391,6 +13391,314 @@
       "learning_rate": 1.3860803461989146e-07,
       "loss": 0.8676,
       "step": 1912
     }
   ],
   "logging_steps": 1,
@@ -13405,12 +13713,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.310480002095514e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 1956,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.3860803461989146e-07,
       "loss": 0.8676,
       "step": 1912
+    },
+    {
+      "epoch": 0.9780163599182005,
+      "grad_norm": 2.835423469543457,
+      "learning_rate": 1.3238201455040844e-07,
+      "loss": 0.8435,
+      "step": 1913
+    },
+    {
+      "epoch": 0.9785276073619632,
+      "grad_norm": 3.2276833057403564,
+      "learning_rate": 1.2629887148061792e-07,
+      "loss": 0.9043,
+      "step": 1914
+    },
+    {
+      "epoch": 0.9790388548057259,
+      "grad_norm": 3.260972261428833,
+      "learning_rate": 1.203586228395004e-07,
+      "loss": 0.9502,
+      "step": 1915
+    },
+    {
+      "epoch": 0.9795501022494888,
+      "grad_norm": 3.3064229488372803,
+      "learning_rate": 1.1456128564660273e-07,
+      "loss": 0.9968,
+      "step": 1916
+    },
+    {
+      "epoch": 0.9800613496932515,
+      "grad_norm": 3.274178981781006,
+      "learning_rate": 1.0890687651203823e-07,
+      "loss": 0.8302,
+      "step": 1917
+    },
+    {
+      "epoch": 0.9805725971370143,
+      "grad_norm": 3.076536178588867,
+      "learning_rate": 1.0339541163639776e-07,
+      "loss": 0.9421,
+      "step": 1918
+    },
+    {
+      "epoch": 0.9810838445807771,
+      "grad_norm": 3.247903823852539,
+      "learning_rate": 9.802690681071647e-08,
+      "loss": 0.9819,
+      "step": 1919
+    },
+    {
+      "epoch": 0.9815950920245399,
+      "grad_norm": 3.3538260459899902,
+      "learning_rate": 9.280137741643491e-08,
+      "loss": 0.8744,
+      "step": 1920
+    },
+    {
+      "epoch": 0.9821063394683026,
+      "grad_norm": 3.515782356262207,
+      "learning_rate": 8.771883842536021e-08,
+      "loss": 0.9124,
+      "step": 1921
+    },
+    {
+      "epoch": 0.9826175869120655,
+      "grad_norm": 3.6226806640625,
+      "learning_rate": 8.277930439959946e-08,
+      "loss": 0.9011,
+      "step": 1922
+    },
+    {
+      "epoch": 0.9831288343558282,
+      "grad_norm": 3.3394203186035156,
+      "learning_rate": 7.798278949154303e-08,
+      "loss": 0.8316,
+      "step": 1923
+    },
+    {
+      "epoch": 0.983640081799591,
+      "grad_norm": 3.246371030807495,
+      "learning_rate": 7.332930744380906e-08,
+      "loss": 0.8556,
+      "step": 1924
+    },
+    {
+      "epoch": 0.9841513292433538,
+      "grad_norm": 3.402927875518799,
+      "learning_rate": 6.881887158920464e-08,
+      "loss": 0.7978,
+      "step": 1925
+    },
+    {
+      "epoch": 0.9846625766871165,
+      "grad_norm": 3.8112809658050537,
+      "learning_rate": 6.445149485070357e-08,
+      "loss": 0.9133,
+      "step": 1926
+    },
+    {
+      "epoch": 0.9851738241308794,
+      "grad_norm": 3.5460119247436523,
+      "learning_rate": 6.022718974137975e-08,
+      "loss": 0.8158,
+      "step": 1927
+    },
+    {
+      "epoch": 0.9856850715746421,
+      "grad_norm": 3.341395854949951,
+      "learning_rate": 5.614596836440722e-08,
+      "loss": 0.8246,
+      "step": 1928
+    },
+    {
+      "epoch": 0.9861963190184049,
+      "grad_norm": 3.6873090267181396,
+      "learning_rate": 5.2207842412999034e-08,
+      "loss": 0.8714,
+      "step": 1929
+    },
+    {
+      "epoch": 0.9867075664621677,
+      "grad_norm": 3.4815688133239746,
+      "learning_rate": 4.841282317037399e-08,
+      "loss": 0.8948,
+      "step": 1930
+    },
+    {
+      "epoch": 0.9872188139059305,
+      "grad_norm": 3.5316038131713867,
+      "learning_rate": 4.476092150975109e-08,
+      "loss": 0.8622,
+      "step": 1931
+    },
+    {
+      "epoch": 0.9877300613496932,
+      "grad_norm": 3.5975794792175293,
+      "learning_rate": 4.1252147894277336e-08,
+      "loss": 0.881,
+      "step": 1932
+    },
+    {
+      "epoch": 0.9882413087934561,
+      "grad_norm": 3.441171646118164,
+      "learning_rate": 3.7886512377033334e-08,
+      "loss": 0.8396,
+      "step": 1933
+    },
+    {
+      "epoch": 0.9887525562372188,
+      "grad_norm": 3.8511383533477783,
+      "learning_rate": 3.4664024600988835e-08,
+      "loss": 0.9208,
+      "step": 1934
+    },
+    {
+      "epoch": 0.9892638036809815,
+      "grad_norm": 3.8687822818756104,
+      "learning_rate": 3.158469379898055e-08,
+      "loss": 0.9135,
+      "step": 1935
+    },
+    {
+      "epoch": 0.9897750511247444,
+      "grad_norm": 3.593276023864746,
+      "learning_rate": 2.8648528793673302e-08,
+      "loss": 0.8474,
+      "step": 1936
+    },
+    {
+      "epoch": 0.9902862985685071,
+      "grad_norm": 4.0986127853393555,
+      "learning_rate": 2.5855537997548917e-08,
+      "loss": 0.8883,
+      "step": 1937
+    },
+    {
+      "epoch": 0.99079754601227,
+      "grad_norm": 4.03285551071167,
+      "learning_rate": 2.3205729412884016e-08,
+      "loss": 0.7779,
+      "step": 1938
+    },
+    {
+      "epoch": 0.9913087934560327,
+      "grad_norm": 4.346153736114502,
+      "learning_rate": 2.0699110631711148e-08,
+      "loss": 0.8757,
+      "step": 1939
+    },
+    {
+      "epoch": 0.9918200408997955,
+      "grad_norm": 4.283609390258789,
+      "learning_rate": 1.8335688835802167e-08,
+      "loss": 0.8173,
+      "step": 1940
+    },
+    {
+      "epoch": 0.9923312883435583,
+      "grad_norm": 4.301876068115234,
+      "learning_rate": 1.6115470796662647e-08,
+      "loss": 0.9134,
+      "step": 1941
+    },
+    {
+      "epoch": 0.9928425357873211,
+      "grad_norm": 4.885223865509033,
+      "learning_rate": 1.4038462875504143e-08,
+      "loss": 0.8289,
+      "step": 1942
+    },
+    {
+      "epoch": 0.9933537832310838,
+      "grad_norm": 4.63042688369751,
+      "learning_rate": 1.2104671023199787e-08,
+      "loss": 0.8625,
+      "step": 1943
+    },
+    {
+      "epoch": 0.9938650306748467,
+      "grad_norm": 4.702084064483643,
+      "learning_rate": 1.0314100780317581e-08,
+      "loss": 0.9342,
+      "step": 1944
+    },
+    {
+      "epoch": 0.9943762781186094,
+      "grad_norm": 4.4585771560668945,
+      "learning_rate": 8.666757277064897e-09,
+      "loss": 0.6828,
+      "step": 1945
+    },
+    {
+      "epoch": 0.9948875255623721,
+      "grad_norm": 4.869369029998779,
+      "learning_rate": 7.162645233282916e-09,
+      "loss": 0.8505,
+      "step": 1946
+    },
+    {
+      "epoch": 0.995398773006135,
+      "grad_norm": 4.623004913330078,
+      "learning_rate": 5.8017689584521915e-09,
+      "loss": 0.6772,
+      "step": 1947
+    },
+    {
+      "epoch": 0.9959100204498977,
+      "grad_norm": 5.718740940093994,
+      "learning_rate": 4.584132351642678e-09,
+      "loss": 0.8251,
+      "step": 1948
+    },
+    {
+      "epoch": 0.9964212678936605,
+      "grad_norm": 5.196649551391602,
+      "learning_rate": 3.509738901547044e-09,
+      "loss": 0.6039,
+      "step": 1949
+    },
+    {
+      "epoch": 0.9969325153374233,
+      "grad_norm": 6.253082752227783,
+      "learning_rate": 2.5785916864307092e-09,
+      "loss": 0.5829,
+      "step": 1950
+    },
+    {
+      "epoch": 0.9974437627811861,
+      "grad_norm": 2.393533229827881,
+      "learning_rate": 1.7906933741484999e-09,
+      "loss": 0.869,
+      "step": 1951
+    },
+    {
+      "epoch": 0.9979550102249489,
+      "grad_norm": 2.994673728942871,
+      "learning_rate": 1.1460462221279944e-09,
+      "loss": 0.9103,
+      "step": 1952
+    },
+    {
+      "epoch": 0.9984662576687117,
+      "grad_norm": 3.5846893787384033,
+      "learning_rate": 6.446520773695231e-10,
+      "loss": 0.8366,
+      "step": 1953
+    },
+    {
+      "epoch": 0.9989775051124744,
+      "grad_norm": 3.5298497676849365,
+      "learning_rate": 2.8651237642396414e-10,
+      "loss": 0.7694,
+      "step": 1954
+    },
+    {
+      "epoch": 0.9994887525562373,
+      "grad_norm": 3.686530828475952,
+      "learning_rate": 7.162814541494811e-11,
+      "loss": 0.67,
+      "step": 1955
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 5.693774223327637,
+      "learning_rate": 0.0,
+      "loss": 0.7087,
+      "step": 1956
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.478583213010452e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null