minimario's picture
add checkpoint-200
1d223b8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.12367030270238935,
"global_step": 800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.999226963512678e-06,
"loss": 1.1361,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 9.998453927025357e-06,
"loss": 0.7353,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 9.997680890538034e-06,
"loss": 0.6909,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 9.996907854050712e-06,
"loss": 0.6498,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 9.99613481756339e-06,
"loss": 0.6414,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 9.995361781076068e-06,
"loss": 0.6415,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 9.994588744588745e-06,
"loss": 0.6317,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 9.993815708101423e-06,
"loss": 0.6378,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 9.9930426716141e-06,
"loss": 0.6347,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 9.992269635126779e-06,
"loss": 0.5924,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 9.991496598639456e-06,
"loss": 0.6046,
"step": 55
},
{
"epoch": 0.01,
"learning_rate": 9.990723562152135e-06,
"loss": 0.6045,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 9.989950525664813e-06,
"loss": 0.6,
"step": 65
},
{
"epoch": 0.01,
"learning_rate": 9.98917748917749e-06,
"loss": 0.5504,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 9.988404452690169e-06,
"loss": 0.5747,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 9.987631416202846e-06,
"loss": 0.5526,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 9.986858379715523e-06,
"loss": 0.5958,
"step": 85
},
{
"epoch": 0.01,
"learning_rate": 9.9860853432282e-06,
"loss": 0.608,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 9.985312306740878e-06,
"loss": 0.5988,
"step": 95
},
{
"epoch": 0.02,
"learning_rate": 9.984539270253557e-06,
"loss": 0.5861,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 9.983766233766234e-06,
"loss": 0.5749,
"step": 105
},
{
"epoch": 0.02,
"learning_rate": 9.982993197278913e-06,
"loss": 0.5498,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 9.98222016079159e-06,
"loss": 0.5841,
"step": 115
},
{
"epoch": 0.02,
"learning_rate": 9.981447124304268e-06,
"loss": 0.5973,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 9.980674087816947e-06,
"loss": 0.5954,
"step": 125
},
{
"epoch": 0.02,
"learning_rate": 9.979901051329624e-06,
"loss": 0.527,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 9.979128014842301e-06,
"loss": 0.5321,
"step": 135
},
{
"epoch": 0.02,
"learning_rate": 9.978354978354979e-06,
"loss": 0.5781,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 9.977581941867656e-06,
"loss": 0.5119,
"step": 145
},
{
"epoch": 0.02,
"learning_rate": 9.976808905380335e-06,
"loss": 0.5271,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 9.976035868893012e-06,
"loss": 0.5814,
"step": 155
},
{
"epoch": 0.02,
"learning_rate": 9.97526283240569e-06,
"loss": 0.518,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 9.974489795918369e-06,
"loss": 0.5335,
"step": 165
},
{
"epoch": 0.03,
"learning_rate": 9.973716759431046e-06,
"loss": 0.5062,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 9.972943722943725e-06,
"loss": 0.5253,
"step": 175
},
{
"epoch": 0.03,
"learning_rate": 9.972170686456402e-06,
"loss": 0.5856,
"step": 180
},
{
"epoch": 0.03,
"learning_rate": 9.97139764996908e-06,
"loss": 0.5196,
"step": 185
},
{
"epoch": 0.03,
"learning_rate": 9.970624613481757e-06,
"loss": 0.4764,
"step": 190
},
{
"epoch": 0.03,
"learning_rate": 9.969851576994434e-06,
"loss": 0.5254,
"step": 195
},
{
"epoch": 0.03,
"learning_rate": 9.969078540507111e-06,
"loss": 0.5442,
"step": 200
},
{
"epoch": 0.03,
"eval_accuracy": 0.5804400673190799,
"eval_accuracy_sklearn": 0.5804400673190799,
"eval_f1": 0.5294915349019279,
"eval_loss": 0.7918509840965271,
"eval_precision": 0.6370946036872561,
"eval_recall": 0.45298409281186464,
"eval_runtime": 4914.2737,
"eval_samples_per_second": 16.323,
"eval_steps_per_second": 2.04,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 9.96830550401979e-06,
"loss": 0.5163,
"step": 205
},
{
"epoch": 0.03,
"learning_rate": 9.967532467532468e-06,
"loss": 0.5044,
"step": 210
},
{
"epoch": 0.03,
"learning_rate": 9.966759431045147e-06,
"loss": 0.5078,
"step": 215
},
{
"epoch": 0.03,
"learning_rate": 9.965986394557824e-06,
"loss": 0.4623,
"step": 220
},
{
"epoch": 0.03,
"learning_rate": 9.965213358070501e-06,
"loss": 0.5359,
"step": 225
},
{
"epoch": 0.04,
"learning_rate": 9.96444032158318e-06,
"loss": 0.5068,
"step": 230
},
{
"epoch": 0.04,
"learning_rate": 9.963667285095858e-06,
"loss": 0.5029,
"step": 235
},
{
"epoch": 0.04,
"learning_rate": 9.962894248608535e-06,
"loss": 0.5084,
"step": 240
},
{
"epoch": 0.04,
"learning_rate": 9.962121212121212e-06,
"loss": 0.4783,
"step": 245
},
{
"epoch": 0.04,
"learning_rate": 9.96134817563389e-06,
"loss": 0.5216,
"step": 250
},
{
"epoch": 0.04,
"learning_rate": 9.960575139146569e-06,
"loss": 0.54,
"step": 255
},
{
"epoch": 0.04,
"learning_rate": 9.959802102659246e-06,
"loss": 0.5494,
"step": 260
},
{
"epoch": 0.04,
"learning_rate": 9.959029066171925e-06,
"loss": 0.5401,
"step": 265
},
{
"epoch": 0.04,
"learning_rate": 9.958256029684602e-06,
"loss": 0.5073,
"step": 270
},
{
"epoch": 0.04,
"learning_rate": 9.95748299319728e-06,
"loss": 0.4598,
"step": 275
},
{
"epoch": 0.04,
"learning_rate": 9.956709956709958e-06,
"loss": 0.4913,
"step": 280
},
{
"epoch": 0.04,
"learning_rate": 9.955936920222636e-06,
"loss": 0.4947,
"step": 285
},
{
"epoch": 0.04,
"learning_rate": 9.955163883735313e-06,
"loss": 0.4806,
"step": 290
},
{
"epoch": 0.05,
"learning_rate": 9.95439084724799e-06,
"loss": 0.4659,
"step": 295
},
{
"epoch": 0.05,
"learning_rate": 9.953617810760668e-06,
"loss": 0.4555,
"step": 300
},
{
"epoch": 0.05,
"learning_rate": 9.952844774273347e-06,
"loss": 0.4606,
"step": 305
},
{
"epoch": 0.05,
"learning_rate": 9.952071737786024e-06,
"loss": 0.4905,
"step": 310
},
{
"epoch": 0.05,
"learning_rate": 9.951298701298701e-06,
"loss": 0.4423,
"step": 315
},
{
"epoch": 0.05,
"learning_rate": 9.95052566481138e-06,
"loss": 0.4855,
"step": 320
},
{
"epoch": 0.05,
"learning_rate": 9.949752628324058e-06,
"loss": 0.486,
"step": 325
},
{
"epoch": 0.05,
"learning_rate": 9.948979591836737e-06,
"loss": 0.4774,
"step": 330
},
{
"epoch": 0.05,
"learning_rate": 9.948206555349414e-06,
"loss": 0.4909,
"step": 335
},
{
"epoch": 0.05,
"learning_rate": 9.947433518862091e-06,
"loss": 0.47,
"step": 340
},
{
"epoch": 0.05,
"learning_rate": 9.946660482374768e-06,
"loss": 0.4496,
"step": 345
},
{
"epoch": 0.05,
"learning_rate": 9.945887445887446e-06,
"loss": 0.5146,
"step": 350
},
{
"epoch": 0.05,
"learning_rate": 9.945114409400125e-06,
"loss": 0.4876,
"step": 355
},
{
"epoch": 0.06,
"learning_rate": 9.944341372912802e-06,
"loss": 0.4747,
"step": 360
},
{
"epoch": 0.06,
"learning_rate": 9.94356833642548e-06,
"loss": 0.4614,
"step": 365
},
{
"epoch": 0.06,
"learning_rate": 9.942795299938158e-06,
"loss": 0.4755,
"step": 370
},
{
"epoch": 0.06,
"learning_rate": 9.942022263450836e-06,
"loss": 0.4785,
"step": 375
},
{
"epoch": 0.06,
"learning_rate": 9.941249226963513e-06,
"loss": 0.4581,
"step": 380
},
{
"epoch": 0.06,
"learning_rate": 9.940476190476192e-06,
"loss": 0.4671,
"step": 385
},
{
"epoch": 0.06,
"learning_rate": 9.93970315398887e-06,
"loss": 0.4327,
"step": 390
},
{
"epoch": 0.06,
"learning_rate": 9.938930117501547e-06,
"loss": 0.4906,
"step": 395
},
{
"epoch": 0.06,
"learning_rate": 9.938157081014226e-06,
"loss": 0.5006,
"step": 400
},
{
"epoch": 0.06,
"eval_accuracy": 0.5519790562862308,
"eval_accuracy_sklearn": 0.5519790562862308,
"eval_f1": 0.39240549130993435,
"eval_loss": 0.969095766544342,
"eval_precision": 0.6691460531626593,
"eval_recall": 0.27759837340031096,
"eval_runtime": 4903.2377,
"eval_samples_per_second": 16.36,
"eval_steps_per_second": 2.045,
"step": 400
},
{
"epoch": 0.06,
"learning_rate": 9.937384044526903e-06,
"loss": 0.4531,
"step": 405
},
{
"epoch": 0.06,
"learning_rate": 9.93661100803958e-06,
"loss": 0.4467,
"step": 410
},
{
"epoch": 0.06,
"learning_rate": 9.935837971552257e-06,
"loss": 0.4753,
"step": 415
},
{
"epoch": 0.06,
"learning_rate": 9.935064935064936e-06,
"loss": 0.428,
"step": 420
},
{
"epoch": 0.07,
"learning_rate": 9.934291898577614e-06,
"loss": 0.4418,
"step": 425
},
{
"epoch": 0.07,
"learning_rate": 9.933518862090291e-06,
"loss": 0.5087,
"step": 430
},
{
"epoch": 0.07,
"learning_rate": 9.93274582560297e-06,
"loss": 0.4775,
"step": 435
},
{
"epoch": 0.07,
"learning_rate": 9.931972789115647e-06,
"loss": 0.4923,
"step": 440
},
{
"epoch": 0.07,
"learning_rate": 9.931199752628325e-06,
"loss": 0.4721,
"step": 445
},
{
"epoch": 0.07,
"learning_rate": 9.930426716141004e-06,
"loss": 0.4718,
"step": 450
},
{
"epoch": 0.07,
"learning_rate": 9.929653679653681e-06,
"loss": 0.4392,
"step": 455
},
{
"epoch": 0.07,
"learning_rate": 9.928880643166358e-06,
"loss": 0.4315,
"step": 460
},
{
"epoch": 0.07,
"learning_rate": 9.928107606679036e-06,
"loss": 0.4641,
"step": 465
},
{
"epoch": 0.07,
"learning_rate": 9.927334570191713e-06,
"loss": 0.4417,
"step": 470
},
{
"epoch": 0.07,
"learning_rate": 9.926561533704392e-06,
"loss": 0.454,
"step": 475
},
{
"epoch": 0.07,
"learning_rate": 9.925788497217069e-06,
"loss": 0.4414,
"step": 480
},
{
"epoch": 0.07,
"learning_rate": 9.925015460729748e-06,
"loss": 0.4802,
"step": 485
},
{
"epoch": 0.08,
"learning_rate": 9.924242424242425e-06,
"loss": 0.4262,
"step": 490
},
{
"epoch": 0.08,
"learning_rate": 9.923469387755103e-06,
"loss": 0.4543,
"step": 495
},
{
"epoch": 0.08,
"learning_rate": 9.922696351267782e-06,
"loss": 0.407,
"step": 500
},
{
"epoch": 0.08,
"learning_rate": 9.921923314780459e-06,
"loss": 0.4328,
"step": 505
},
{
"epoch": 0.08,
"learning_rate": 9.921150278293136e-06,
"loss": 0.4589,
"step": 510
},
{
"epoch": 0.08,
"learning_rate": 9.920377241805814e-06,
"loss": 0.4588,
"step": 515
},
{
"epoch": 0.08,
"learning_rate": 9.919604205318491e-06,
"loss": 0.4273,
"step": 520
},
{
"epoch": 0.08,
"learning_rate": 9.91883116883117e-06,
"loss": 0.4688,
"step": 525
},
{
"epoch": 0.08,
"learning_rate": 9.918058132343847e-06,
"loss": 0.4324,
"step": 530
},
{
"epoch": 0.08,
"learning_rate": 9.917285095856525e-06,
"loss": 0.4846,
"step": 535
},
{
"epoch": 0.08,
"learning_rate": 9.916512059369204e-06,
"loss": 0.4366,
"step": 540
},
{
"epoch": 0.08,
"learning_rate": 9.91573902288188e-06,
"loss": 0.442,
"step": 545
},
{
"epoch": 0.09,
"learning_rate": 9.91496598639456e-06,
"loss": 0.4241,
"step": 550
},
{
"epoch": 0.09,
"learning_rate": 9.914192949907237e-06,
"loss": 0.4574,
"step": 555
},
{
"epoch": 0.09,
"learning_rate": 9.913419913419914e-06,
"loss": 0.4173,
"step": 560
},
{
"epoch": 0.09,
"learning_rate": 9.912646876932592e-06,
"loss": 0.4202,
"step": 565
},
{
"epoch": 0.09,
"learning_rate": 9.911873840445269e-06,
"loss": 0.4694,
"step": 570
},
{
"epoch": 0.09,
"learning_rate": 9.911100803957948e-06,
"loss": 0.4595,
"step": 575
},
{
"epoch": 0.09,
"learning_rate": 9.910327767470625e-06,
"loss": 0.464,
"step": 580
},
{
"epoch": 0.09,
"learning_rate": 9.909554730983303e-06,
"loss": 0.4688,
"step": 585
},
{
"epoch": 0.09,
"learning_rate": 9.908781694495982e-06,
"loss": 0.4932,
"step": 590
},
{
"epoch": 0.09,
"learning_rate": 9.908008658008659e-06,
"loss": 0.4648,
"step": 595
},
{
"epoch": 0.09,
"learning_rate": 9.907235621521336e-06,
"loss": 0.5136,
"step": 600
},
{
"epoch": 0.09,
"eval_accuracy": 0.595349996883376,
"eval_accuracy_sklearn": 0.595349996883376,
"eval_f1": 0.6681389238209163,
"eval_loss": 0.8079590201377869,
"eval_precision": 0.5834404685379616,
"eval_recall": 0.7816050711637363,
"eval_runtime": 4915.3389,
"eval_samples_per_second": 16.319,
"eval_steps_per_second": 2.04,
"step": 600
},
{
"epoch": 0.09,
"learning_rate": 9.906462585034015e-06,
"loss": 0.5246,
"step": 605
},
{
"epoch": 0.09,
"learning_rate": 9.905689548546693e-06,
"loss": 0.4751,
"step": 610
},
{
"epoch": 0.1,
"learning_rate": 9.90491651205937e-06,
"loss": 0.4224,
"step": 615
},
{
"epoch": 0.1,
"learning_rate": 9.904143475572047e-06,
"loss": 0.3842,
"step": 620
},
{
"epoch": 0.1,
"learning_rate": 9.903370439084724e-06,
"loss": 0.4524,
"step": 625
},
{
"epoch": 0.1,
"learning_rate": 9.902597402597403e-06,
"loss": 0.4657,
"step": 630
},
{
"epoch": 0.1,
"learning_rate": 9.90182436611008e-06,
"loss": 0.4596,
"step": 635
},
{
"epoch": 0.1,
"learning_rate": 9.90105132962276e-06,
"loss": 0.4383,
"step": 640
},
{
"epoch": 0.1,
"learning_rate": 9.900278293135437e-06,
"loss": 0.4351,
"step": 645
},
{
"epoch": 0.1,
"learning_rate": 9.899505256648114e-06,
"loss": 0.4263,
"step": 650
},
{
"epoch": 0.1,
"learning_rate": 9.898732220160793e-06,
"loss": 0.448,
"step": 655
},
{
"epoch": 0.1,
"learning_rate": 9.89795918367347e-06,
"loss": 0.4372,
"step": 660
},
{
"epoch": 0.1,
"learning_rate": 9.897186147186148e-06,
"loss": 0.4681,
"step": 665
},
{
"epoch": 0.1,
"learning_rate": 9.896413110698825e-06,
"loss": 0.4783,
"step": 670
},
{
"epoch": 0.1,
"learning_rate": 9.895640074211502e-06,
"loss": 0.4155,
"step": 675
},
{
"epoch": 0.11,
"learning_rate": 9.894867037724181e-06,
"loss": 0.4398,
"step": 680
},
{
"epoch": 0.11,
"learning_rate": 9.894094001236859e-06,
"loss": 0.4364,
"step": 685
},
{
"epoch": 0.11,
"learning_rate": 9.893320964749536e-06,
"loss": 0.4175,
"step": 690
},
{
"epoch": 0.11,
"learning_rate": 9.892547928262215e-06,
"loss": 0.4763,
"step": 695
},
{
"epoch": 0.11,
"learning_rate": 9.891774891774892e-06,
"loss": 0.4629,
"step": 700
},
{
"epoch": 0.11,
"learning_rate": 9.891001855287571e-06,
"loss": 0.5083,
"step": 705
},
{
"epoch": 0.11,
"learning_rate": 9.890228818800249e-06,
"loss": 0.4867,
"step": 710
},
{
"epoch": 0.11,
"learning_rate": 9.889455782312926e-06,
"loss": 0.4855,
"step": 715
},
{
"epoch": 0.11,
"learning_rate": 9.888682745825603e-06,
"loss": 0.4219,
"step": 720
},
{
"epoch": 0.11,
"learning_rate": 9.88790970933828e-06,
"loss": 0.4676,
"step": 725
},
{
"epoch": 0.11,
"learning_rate": 9.88713667285096e-06,
"loss": 0.3963,
"step": 730
},
{
"epoch": 0.11,
"learning_rate": 9.886363636363637e-06,
"loss": 0.4347,
"step": 735
},
{
"epoch": 0.11,
"learning_rate": 9.885590599876314e-06,
"loss": 0.4721,
"step": 740
},
{
"epoch": 0.12,
"learning_rate": 9.884817563388993e-06,
"loss": 0.3992,
"step": 745
},
{
"epoch": 0.12,
"learning_rate": 9.88404452690167e-06,
"loss": 0.413,
"step": 750
},
{
"epoch": 0.12,
"learning_rate": 9.883271490414348e-06,
"loss": 0.4881,
"step": 755
},
{
"epoch": 0.12,
"learning_rate": 9.882498453927027e-06,
"loss": 0.4478,
"step": 760
},
{
"epoch": 0.12,
"learning_rate": 9.881725417439704e-06,
"loss": 0.4453,
"step": 765
},
{
"epoch": 0.12,
"learning_rate": 9.880952380952381e-06,
"loss": 0.4252,
"step": 770
},
{
"epoch": 0.12,
"learning_rate": 9.88017934446506e-06,
"loss": 0.411,
"step": 775
},
{
"epoch": 0.12,
"learning_rate": 9.879406307977738e-06,
"loss": 0.435,
"step": 780
},
{
"epoch": 0.12,
"learning_rate": 9.878633271490415e-06,
"loss": 0.3948,
"step": 785
},
{
"epoch": 0.12,
"learning_rate": 9.877860235003092e-06,
"loss": 0.4004,
"step": 790
},
{
"epoch": 0.12,
"learning_rate": 9.877087198515771e-06,
"loss": 0.4515,
"step": 795
},
{
"epoch": 0.12,
"learning_rate": 9.876314162028449e-06,
"loss": 0.4117,
"step": 800
},
{
"epoch": 0.12,
"eval_accuracy": 0.5725113756778657,
"eval_accuracy_sklearn": 0.5725113756778657,
"eval_f1": 0.5080130274465917,
"eval_loss": 0.9323258996009827,
"eval_precision": 0.6346884634688463,
"eval_recall": 0.423490013156321,
"eval_runtime": 4928.5167,
"eval_samples_per_second": 16.276,
"eval_steps_per_second": 2.034,
"step": 800
}
],
"max_steps": 64680,
"num_train_epochs": 10,
"total_flos": 9.50142335188992e+16,
"trial_name": null,
"trial_params": null
}