Training in progress, step 3000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d06292f4cfff6108e76a84ff1846a1d32e89d62f13082913264c5b615dd9e7c9
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b0a789e71e02f4f859c128d91086b86c69bb7ae9f01c97e4f195af72afff004
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6b690b46af234d211b11a61647aa50170554cf8da968c06e331c64d763222fb
 size 111142

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fb78d46fffae00a13d0526987c7b047340e8e343db76d12fc0a5600e3b3b861
 size 111142

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46638a04f2f3ae6fac02cab0b9deac56e4cbf6a64649d198c1bccbdb5254d0c1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c53a9630e6978c05d301e5d7e5200b5d2c8eea7ef124884409824d8ad349fc02
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e65de98652405ddd50931cd2bce741ba57ff1258497a35536784d2df1ef3b1af
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b7a58285d5e0bbfbdededdbf4101ad8a6d7c67b1022483e5d546b1b401edbd2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.642865180969238,
   "best_model_checkpoint": "miner_id_24/checkpoint-2800",
-  "epoch": 0.8363072708265744,
   "eval_steps": 100,
-  "global_step": 2900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -20547,6 +20547,714 @@
       "eval_samples_per_second": 123.96,
       "eval_steps_per_second": 30.99,
       "step": 2900
     }
   ],
   "logging_steps": 1,
@@ -20561,7 +21269,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -20570,12 +21278,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2369846309289984.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.642865180969238,
   "best_model_checkpoint": "miner_id_24/checkpoint-2800",
+  "epoch": 0.8651454525792149,
   "eval_steps": 100,
+  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 123.96,
       "eval_steps_per_second": 30.99,
       "step": 2900
+    },
+    {
+      "epoch": 0.8365956526441007,
+      "grad_norm": 0.18284933269023895,
+      "learning_rate": 4.48232259229554e-05,
+      "loss": 11.6659,
+      "step": 2901
+    },
+    {
+      "epoch": 0.8368840344616272,
+      "grad_norm": 0.13595181703567505,
+      "learning_rate": 4.476106562329155e-05,
+      "loss": 11.6795,
+      "step": 2902
+    },
+    {
+      "epoch": 0.8371724162791536,
+      "grad_norm": 0.1296217292547226,
+      "learning_rate": 4.46989360248704e-05,
+      "loss": 11.6749,
+      "step": 2903
+    },
+    {
+      "epoch": 0.8374607980966801,
+      "grad_norm": 0.18848179280757904,
+      "learning_rate": 4.4636837162222965e-05,
+      "loss": 11.6528,
+      "step": 2904
+    },
+    {
+      "epoch": 0.8377491799142064,
+      "grad_norm": 0.16293784976005554,
+      "learning_rate": 4.4574769069863075e-05,
+      "loss": 11.6722,
+      "step": 2905
+    },
+    {
+      "epoch": 0.8380375617317328,
+      "grad_norm": 0.18424579501152039,
+      "learning_rate": 4.451273178228764e-05,
+      "loss": 11.6811,
+      "step": 2906
+    },
+    {
+      "epoch": 0.8383259435492593,
+      "grad_norm": 0.17344294488430023,
+      "learning_rate": 4.445072533397635e-05,
+      "loss": 11.6816,
+      "step": 2907
+    },
+    {
+      "epoch": 0.8386143253667856,
+      "grad_norm": 0.13144327700138092,
+      "learning_rate": 4.4388749759391754e-05,
+      "loss": 11.6759,
+      "step": 2908
+    },
+    {
+      "epoch": 0.838902707184312,
+      "grad_norm": 0.18985804915428162,
+      "learning_rate": 4.432680509297924e-05,
+      "loss": 11.6346,
+      "step": 2909
+    },
+    {
+      "epoch": 0.8391910890018385,
+      "grad_norm": 0.16098152101039886,
+      "learning_rate": 4.426489136916704e-05,
+      "loss": 11.6683,
+      "step": 2910
+    },
+    {
+      "epoch": 0.8394794708193648,
+      "grad_norm": 0.1585586965084076,
+      "learning_rate": 4.420300862236617e-05,
+      "loss": 11.6843,
+      "step": 2911
+    },
+    {
+      "epoch": 0.8397678526368912,
+      "grad_norm": 0.25072965025901794,
+      "learning_rate": 4.4141156886970445e-05,
+      "loss": 11.6972,
+      "step": 2912
+    },
+    {
+      "epoch": 0.8400562344544177,
+      "grad_norm": 0.1607411801815033,
+      "learning_rate": 4.4079336197356436e-05,
+      "loss": 11.6893,
+      "step": 2913
+    },
+    {
+      "epoch": 0.840344616271944,
+      "grad_norm": 0.16909179091453552,
+      "learning_rate": 4.401754658788344e-05,
+      "loss": 11.683,
+      "step": 2914
+    },
+    {
+      "epoch": 0.8406329980894705,
+      "grad_norm": 0.14870120584964752,
+      "learning_rate": 4.395578809289349e-05,
+      "loss": 11.7093,
+      "step": 2915
+    },
+    {
+      "epoch": 0.8409213799069969,
+      "grad_norm": 0.2617393136024475,
+      "learning_rate": 4.389406074671143e-05,
+      "loss": 11.6255,
+      "step": 2916
+    },
+    {
+      "epoch": 0.8412097617245232,
+      "grad_norm": 0.12288763374090195,
+      "learning_rate": 4.383236458364455e-05,
+      "loss": 11.6889,
+      "step": 2917
+    },
+    {
+      "epoch": 0.8414981435420497,
+      "grad_norm": 0.17696166038513184,
+      "learning_rate": 4.377069963798309e-05,
+      "loss": 11.6541,
+      "step": 2918
+    },
+    {
+      "epoch": 0.8417865253595761,
+      "grad_norm": 0.165409654378891,
+      "learning_rate": 4.3709065943999696e-05,
+      "loss": 11.6684,
+      "step": 2919
+    },
+    {
+      "epoch": 0.8420749071771025,
+      "grad_norm": 0.11674615740776062,
+      "learning_rate": 4.364746353594985e-05,
+      "loss": 11.6983,
+      "step": 2920
+    },
+    {
+      "epoch": 0.8423632889946289,
+      "grad_norm": 0.16131404042243958,
+      "learning_rate": 4.358589244807144e-05,
+      "loss": 11.6456,
+      "step": 2921
+    },
+    {
+      "epoch": 0.8426516708121553,
+      "grad_norm": 0.15852144360542297,
+      "learning_rate": 4.352435271458516e-05,
+      "loss": 11.6479,
+      "step": 2922
+    },
+    {
+      "epoch": 0.8429400526296817,
+      "grad_norm": 0.1303740292787552,
+      "learning_rate": 4.3462844369694124e-05,
+      "loss": 11.6934,
+      "step": 2923
+    },
+    {
+      "epoch": 0.8432284344472081,
+      "grad_norm": 0.08299141377210617,
+      "learning_rate": 4.3401367447584065e-05,
+      "loss": 11.7121,
+      "step": 2924
+    },
+    {
+      "epoch": 0.8435168162647345,
+      "grad_norm": 0.13092194497585297,
+      "learning_rate": 4.3339921982423214e-05,
+      "loss": 11.6822,
+      "step": 2925
+    },
+    {
+      "epoch": 0.843805198082261,
+      "grad_norm": 0.17801184952259064,
+      "learning_rate": 4.327850800836236e-05,
+      "loss": 11.6495,
+      "step": 2926
+    },
+    {
+      "epoch": 0.8440935798997873,
+      "grad_norm": 0.12021403759717941,
+      "learning_rate": 4.3217125559534764e-05,
+      "loss": 11.7029,
+      "step": 2927
+    },
+    {
+      "epoch": 0.8443819617173137,
+      "grad_norm": 0.12242776900529861,
+      "learning_rate": 4.315577467005618e-05,
+      "loss": 11.6713,
+      "step": 2928
+    },
+    {
+      "epoch": 0.8446703435348402,
+      "grad_norm": 0.11881870031356812,
+      "learning_rate": 4.30944553740248e-05,
+      "loss": 11.6996,
+      "step": 2929
+    },
+    {
+      "epoch": 0.8449587253523665,
+      "grad_norm": 0.11722824722528458,
+      "learning_rate": 4.3033167705521284e-05,
+      "loss": 11.6695,
+      "step": 2930
+    },
+    {
+      "epoch": 0.8452471071698929,
+      "grad_norm": 0.16650351881980896,
+      "learning_rate": 4.297191169860865e-05,
+      "loss": 11.6721,
+      "step": 2931
+    },
+    {
+      "epoch": 0.8455354889874194,
+      "grad_norm": 0.21880216896533966,
+      "learning_rate": 4.291068738733248e-05,
+      "loss": 11.6653,
+      "step": 2932
+    },
+    {
+      "epoch": 0.8458238708049457,
+      "grad_norm": 0.16557948291301727,
+      "learning_rate": 4.28494948057205e-05,
+      "loss": 11.6768,
+      "step": 2933
+    },
+    {
+      "epoch": 0.8461122526224721,
+      "grad_norm": 0.17281831800937653,
+      "learning_rate": 4.278833398778306e-05,
+      "loss": 11.6481,
+      "step": 2934
+    },
+    {
+      "epoch": 0.8464006344399986,
+      "grad_norm": 0.232573002576828,
+      "learning_rate": 4.272720496751258e-05,
+      "loss": 11.627,
+      "step": 2935
+    },
+    {
+      "epoch": 0.846689016257525,
+      "grad_norm": 0.21352550387382507,
+      "learning_rate": 4.2666107778884065e-05,
+      "loss": 11.6415,
+      "step": 2936
+    },
+    {
+      "epoch": 0.8469773980750513,
+      "grad_norm": 0.16628918051719666,
+      "learning_rate": 4.260504245585469e-05,
+      "loss": 11.6933,
+      "step": 2937
+    },
+    {
+      "epoch": 0.8472657798925778,
+      "grad_norm": 0.1400359719991684,
+      "learning_rate": 4.254400903236394e-05,
+      "loss": 11.6807,
+      "step": 2938
+    },
+    {
+      "epoch": 0.8475541617101042,
+      "grad_norm": 0.1728946715593338,
+      "learning_rate": 4.24830075423336e-05,
+      "loss": 11.6956,
+      "step": 2939
+    },
+    {
+      "epoch": 0.8478425435276306,
+      "grad_norm": 0.17397397756576538,
+      "learning_rate": 4.242203801966759e-05,
+      "loss": 11.6632,
+      "step": 2940
+    },
+    {
+      "epoch": 0.848130925345157,
+      "grad_norm": 0.20304632186889648,
+      "learning_rate": 4.236110049825228e-05,
+      "loss": 11.6562,
+      "step": 2941
+    },
+    {
+      "epoch": 0.8484193071626834,
+      "grad_norm": 0.15627586841583252,
+      "learning_rate": 4.230019501195601e-05,
+      "loss": 11.6637,
+      "step": 2942
+    },
+    {
+      "epoch": 0.8487076889802098,
+      "grad_norm": 0.14984051883220673,
+      "learning_rate": 4.223932159462954e-05,
+      "loss": 11.6232,
+      "step": 2943
+    },
+    {
+      "epoch": 0.8489960707977362,
+      "grad_norm": 0.1769644021987915,
+      "learning_rate": 4.2178480280105645e-05,
+      "loss": 11.6534,
+      "step": 2944
+    },
+    {
+      "epoch": 0.8492844526152626,
+      "grad_norm": 0.12905630469322205,
+      "learning_rate": 4.211767110219934e-05,
+      "loss": 11.7013,
+      "step": 2945
+    },
+    {
+      "epoch": 0.849572834432789,
+      "grad_norm": 0.15312866866588593,
+      "learning_rate": 4.2056894094707734e-05,
+      "loss": 11.6644,
+      "step": 2946
+    },
+    {
+      "epoch": 0.8498612162503154,
+      "grad_norm": 0.1332063376903534,
+      "learning_rate": 4.199614929141008e-05,
+      "loss": 11.6627,
+      "step": 2947
+    },
+    {
+      "epoch": 0.8501495980678418,
+      "grad_norm": 0.18677252531051636,
+      "learning_rate": 4.1935436726067735e-05,
+      "loss": 11.6428,
+      "step": 2948
+    },
+    {
+      "epoch": 0.8504379798853682,
+      "grad_norm": 0.15126672387123108,
+      "learning_rate": 4.1874756432424123e-05,
+      "loss": 11.6711,
+      "step": 2949
+    },
+    {
+      "epoch": 0.8507263617028946,
+      "grad_norm": 0.17418427765369415,
+      "learning_rate": 4.181410844420474e-05,
+      "loss": 11.6944,
+      "step": 2950
+    },
+    {
+      "epoch": 0.8510147435204211,
+      "grad_norm": 0.19993005692958832,
+      "learning_rate": 4.175349279511712e-05,
+      "loss": 11.6651,
+      "step": 2951
+    },
+    {
+      "epoch": 0.8513031253379475,
+      "grad_norm": 0.18031910061836243,
+      "learning_rate": 4.169290951885081e-05,
+      "loss": 11.6374,
+      "step": 2952
+    },
+    {
+      "epoch": 0.8515915071554738,
+      "grad_norm": 0.1512380689382553,
+      "learning_rate": 4.1632358649077475e-05,
+      "loss": 11.6633,
+      "step": 2953
+    },
+    {
+      "epoch": 0.8518798889730003,
+      "grad_norm": 0.1253197342157364,
+      "learning_rate": 4.157184021945054e-05,
+      "loss": 11.6864,
+      "step": 2954
+    },
+    {
+      "epoch": 0.8521682707905267,
+      "grad_norm": 0.1121377944946289,
+      "learning_rate": 4.1511354263605684e-05,
+      "loss": 11.6855,
+      "step": 2955
+    },
+    {
+      "epoch": 0.852456652608053,
+      "grad_norm": 0.23518823087215424,
+      "learning_rate": 4.145090081516024e-05,
+      "loss": 11.668,
+      "step": 2956
+    },
+    {
+      "epoch": 0.8527450344255795,
+      "grad_norm": 0.15132634341716766,
+      "learning_rate": 4.139047990771378e-05,
+      "loss": 11.6768,
+      "step": 2957
+    },
+    {
+      "epoch": 0.8530334162431059,
+      "grad_norm": 0.11952768266201019,
+      "learning_rate": 4.1330091574847496e-05,
+      "loss": 11.7125,
+      "step": 2958
+    },
+    {
+      "epoch": 0.8533217980606322,
+      "grad_norm": 0.11574079841375351,
+      "learning_rate": 4.1269735850124704e-05,
+      "loss": 11.7085,
+      "step": 2959
+    },
+    {
+      "epoch": 0.8536101798781587,
+      "grad_norm": 0.1615985929965973,
+      "learning_rate": 4.1209412767090484e-05,
+      "loss": 11.6949,
+      "step": 2960
+    },
+    {
+      "epoch": 0.8538985616956851,
+      "grad_norm": 0.15752598643302917,
+      "learning_rate": 4.11491223592718e-05,
+      "loss": 11.6571,
+      "step": 2961
+    },
+    {
+      "epoch": 0.8541869435132114,
+      "grad_norm": 0.2088017612695694,
+      "learning_rate": 4.1088864660177425e-05,
+      "loss": 11.6527,
+      "step": 2962
+    },
+    {
+      "epoch": 0.8544753253307379,
+      "grad_norm": 0.12710094451904297,
+      "learning_rate": 4.1028639703298025e-05,
+      "loss": 11.6452,
+      "step": 2963
+    },
+    {
+      "epoch": 0.8547637071482643,
+      "grad_norm": 0.15736031532287598,
+      "learning_rate": 4.096844752210598e-05,
+      "loss": 11.6585,
+      "step": 2964
+    },
+    {
+      "epoch": 0.8550520889657907,
+      "grad_norm": 0.21575695276260376,
+      "learning_rate": 4.090828815005553e-05,
+      "loss": 11.6362,
+      "step": 2965
+    },
+    {
+      "epoch": 0.8553404707833171,
+      "grad_norm": 0.15497878193855286,
+      "learning_rate": 4.0848161620582625e-05,
+      "loss": 11.7088,
+      "step": 2966
+    },
+    {
+      "epoch": 0.8556288526008435,
+      "grad_norm": 0.17160211503505707,
+      "learning_rate": 4.078806796710499e-05,
+      "loss": 11.6504,
+      "step": 2967
+    },
+    {
+      "epoch": 0.85591723441837,
+      "grad_norm": 0.19605527818202972,
+      "learning_rate": 4.072800722302206e-05,
+      "loss": 11.6268,
+      "step": 2968
+    },
+    {
+      "epoch": 0.8562056162358963,
+      "grad_norm": 0.137605220079422,
+      "learning_rate": 4.0667979421715065e-05,
+      "loss": 11.666,
+      "step": 2969
+    },
+    {
+      "epoch": 0.8564939980534227,
+      "grad_norm": 0.23607651889324188,
+      "learning_rate": 4.060798459654672e-05,
+      "loss": 11.6532,
+      "step": 2970
+    },
+    {
+      "epoch": 0.8567823798709492,
+      "grad_norm": 0.16709445416927338,
+      "learning_rate": 4.054802278086168e-05,
+      "loss": 11.7033,
+      "step": 2971
+    },
+    {
+      "epoch": 0.8570707616884755,
+      "grad_norm": 0.17614519596099854,
+      "learning_rate": 4.0488094007986e-05,
+      "loss": 11.6642,
+      "step": 2972
+    },
+    {
+      "epoch": 0.857359143506002,
+      "grad_norm": 0.15115833282470703,
+      "learning_rate": 4.042819831122761e-05,
+      "loss": 11.7041,
+      "step": 2973
+    },
+    {
+      "epoch": 0.8576475253235284,
+      "grad_norm": 0.1491180956363678,
+      "learning_rate": 4.036833572387578e-05,
+      "loss": 11.6556,
+      "step": 2974
+    },
+    {
+      "epoch": 0.8579359071410547,
+      "grad_norm": 0.16634421050548553,
+      "learning_rate": 4.030850627920166e-05,
+      "loss": 11.6163,
+      "step": 2975
+    },
+    {
+      "epoch": 0.8582242889585812,
+      "grad_norm": 0.12945958971977234,
+      "learning_rate": 4.024871001045785e-05,
+      "loss": 11.6816,
+      "step": 2976
+    },
+    {
+      "epoch": 0.8585126707761076,
+      "grad_norm": 0.1281767189502716,
+      "learning_rate": 4.0188946950878404e-05,
+      "loss": 11.6574,
+      "step": 2977
+    },
+    {
+      "epoch": 0.8588010525936339,
+      "grad_norm": 0.18251970410346985,
+      "learning_rate": 4.012921713367916e-05,
+      "loss": 11.6412,
+      "step": 2978
+    },
+    {
+      "epoch": 0.8590894344111604,
+      "grad_norm": 0.10154789686203003,
+      "learning_rate": 4.006952059205722e-05,
+      "loss": 11.6922,
+      "step": 2979
+    },
+    {
+      "epoch": 0.8593778162286868,
+      "grad_norm": 0.15670567750930786,
+      "learning_rate": 4.000985735919143e-05,
+      "loss": 11.6708,
+      "step": 2980
+    },
+    {
+      "epoch": 0.8596661980462131,
+      "grad_norm": 0.13284581899642944,
+      "learning_rate": 3.995022746824195e-05,
+      "loss": 11.6794,
+      "step": 2981
+    },
+    {
+      "epoch": 0.8599545798637396,
+      "grad_norm": 0.16967470943927765,
+      "learning_rate": 3.989063095235049e-05,
+      "loss": 11.6267,
+      "step": 2982
+    },
+    {
+      "epoch": 0.860242961681266,
+      "grad_norm": 0.2072441130876541,
+      "learning_rate": 3.983106784464021e-05,
+      "loss": 11.6351,
+      "step": 2983
+    },
+    {
+      "epoch": 0.8605313434987925,
+      "grad_norm": 0.13071005046367645,
+      "learning_rate": 3.977153817821566e-05,
+      "loss": 11.6827,
+      "step": 2984
+    },
+    {
+      "epoch": 0.8608197253163188,
+      "grad_norm": 0.19142098724842072,
+      "learning_rate": 3.971204198616284e-05,
+      "loss": 11.612,
+      "step": 2985
+    },
+    {
+      "epoch": 0.8611081071338452,
+      "grad_norm": 0.19070138037204742,
+      "learning_rate": 3.965257930154912e-05,
+      "loss": 11.6318,
+      "step": 2986
+    },
+    {
+      "epoch": 0.8613964889513717,
+      "grad_norm": 0.12564696371555328,
+      "learning_rate": 3.959315015742328e-05,
+      "loss": 11.6543,
+      "step": 2987
+    },
+    {
+      "epoch": 0.861684870768898,
+      "grad_norm": 0.14110830426216125,
+      "learning_rate": 3.953375458681542e-05,
+      "loss": 11.6678,
+      "step": 2988
+    },
+    {
+      "epoch": 0.8619732525864244,
+      "grad_norm": 0.12227228283882141,
+      "learning_rate": 3.9474392622736963e-05,
+      "loss": 11.7028,
+      "step": 2989
+    },
+    {
+      "epoch": 0.8622616344039509,
+      "grad_norm": 0.14122307300567627,
+      "learning_rate": 3.941506429818083e-05,
+      "loss": 11.6789,
+      "step": 2990
+    },
+    {
+      "epoch": 0.8625500162214772,
+      "grad_norm": 0.17069974541664124,
+      "learning_rate": 3.935576964612092e-05,
+      "loss": 11.6435,
+      "step": 2991
+    },
+    {
+      "epoch": 0.8628383980390036,
+      "grad_norm": 0.1299677938222885,
+      "learning_rate": 3.929650869951278e-05,
+      "loss": 11.6771,
+      "step": 2992
+    },
+    {
+      "epoch": 0.8631267798565301,
+      "grad_norm": 0.13179805874824524,
+      "learning_rate": 3.923728149129288e-05,
+      "loss": 11.6816,
+      "step": 2993
+    },
+    {
+      "epoch": 0.8634151616740564,
+      "grad_norm": 0.11812865734100342,
+      "learning_rate": 3.9178088054379255e-05,
+      "loss": 11.6985,
+      "step": 2994
+    },
+    {
+      "epoch": 0.8637035434915828,
+      "grad_norm": 0.15860992670059204,
+      "learning_rate": 3.911892842167089e-05,
+      "loss": 11.7053,
+      "step": 2995
+    },
+    {
+      "epoch": 0.8639919253091093,
+      "grad_norm": 0.21352651715278625,
+      "learning_rate": 3.905980262604819e-05,
+      "loss": 11.648,
+      "step": 2996
+    },
+    {
+      "epoch": 0.8642803071266356,
+      "grad_norm": 0.1386694610118866,
+      "learning_rate": 3.900071070037267e-05,
+      "loss": 11.6742,
+      "step": 2997
+    },
+    {
+      "epoch": 0.8645686889441621,
+      "grad_norm": 0.14939409494400024,
+      "learning_rate": 3.894165267748702e-05,
+      "loss": 11.6969,
+      "step": 2998
+    },
+    {
+      "epoch": 0.8648570707616885,
+      "grad_norm": 0.12354502826929092,
+      "learning_rate": 3.8882628590215074e-05,
+      "loss": 11.6665,
+      "step": 2999
+    },
+    {
+      "epoch": 0.8651454525792149,
+      "grad_norm": 0.19499656558036804,
+      "learning_rate": 3.8823638471361844e-05,
+      "loss": 11.666,
+      "step": 3000
+    },
+    {
+      "epoch": 0.8651454525792149,
+      "eval_loss": 11.642943382263184,
+      "eval_runtime": 40.4397,
+      "eval_samples_per_second": 123.641,
+      "eval_steps_per_second": 30.91,
+      "step": 3000
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2451631876866048.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null