sara-nabhani's picture
Training in progress, step 1400
f2fcb6e
{
"best_metric": 0.40153906255014676,
"best_model_checkpoint": "/home2/s5432073/language-tech-project/results/ltp-roberta-large-defaultltp-roberta-large-default-0/checkpoint-1200",
"epoch": 8.284023668639053,
"global_step": 1400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.18,
"learning_rate": 9.28348909657321e-06,
"loss": 0.4442,
"step": 200
},
{
"epoch": 1.18,
"eval_f1": 0.14004634931160595,
"eval_f1_all": [
0.11111111111111112,
0.19823008849557522,
0.0,
0.0,
0.5077262693156732,
0.0,
0.015037593984962407,
0.0,
0.7197452229299364,
0.6306954436450838,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.5973799126637556,
0.015625,
0.0,
0.005376344086021506
],
"eval_loss": 0.3573361933231354,
"eval_runtime": 3.7892,
"eval_samples_per_second": 500.363,
"eval_steps_per_second": 15.834,
"step": 200
},
{
"epoch": 2.37,
"learning_rate": 8.037383177570094e-06,
"loss": 0.3303,
"step": 400
},
{
"epoch": 2.37,
"eval_f1": 0.26419617591690636,
"eval_f1_all": [
0.43804034582132567,
0.31333333333333335,
0.0,
0.0,
0.616,
0.03592814371257485,
0.3128491620111732,
0.0,
0.7331058020477816,
0.5398601398601399,
0.16580310880829016,
0.391025641025641,
0.0,
0.0,
0.13119533527696794,
0.0,
0.6373056994818653,
0.6169154228855722,
0.02643171806167401,
0.32612966601178783
],
"eval_loss": 0.32742321491241455,
"eval_runtime": 3.2193,
"eval_samples_per_second": 588.954,
"eval_steps_per_second": 18.638,
"step": 400
},
{
"epoch": 3.55,
"learning_rate": 6.791277258566978e-06,
"loss": 0.2955,
"step": 600
},
{
"epoch": 3.55,
"eval_f1": 0.33856925982901875,
"eval_f1_all": [
0.5025125628140703,
0.5899632802937577,
0.0,
0.07407407407407407,
0.6084275436793423,
0.0588235294117647,
0.3567567567567568,
0.0,
0.7440944881889764,
0.6120481927710844,
0.3916666666666666,
0.49431818181818177,
0.0,
0.015625,
0.5071868583162218,
0.06382978723404255,
0.6857597454256166,
0.6403940886699507,
0.10526315789473682,
0.3206412825651303
],
"eval_loss": 0.31045234203338623,
"eval_runtime": 3.1516,
"eval_samples_per_second": 601.599,
"eval_steps_per_second": 19.038,
"step": 600
},
{
"epoch": 4.73,
"learning_rate": 5.545171339563863e-06,
"loss": 0.275,
"step": 800
},
{
"epoch": 4.73,
"eval_f1": 0.35946421270983664,
"eval_f1_all": [
0.5432692307692307,
0.5728770595690748,
0.055944055944055944,
0.125,
0.6434782608695652,
0.11891891891891894,
0.33513513513513515,
0.0,
0.7636594663278272,
0.638655462184874,
0.41071428571428564,
0.5251396648044693,
0.0,
0.015503875968992248,
0.5346733668341709,
0.07017543859649122,
0.657762938230384,
0.6666666666666667,
0.11244979919678717,
0.39926062846580407
],
"eval_loss": 0.3056282103061676,
"eval_runtime": 3.1101,
"eval_samples_per_second": 609.632,
"eval_steps_per_second": 19.292,
"step": 800
},
{
"epoch": 5.92,
"learning_rate": 4.299065420560748e-06,
"loss": 0.2582,
"step": 1000
},
{
"epoch": 5.92,
"eval_f1": 0.3779236416443889,
"eval_f1_all": [
0.5119617224880383,
0.5549872122762148,
0.09523809523809525,
0.24793388429752064,
0.6457739791073124,
0.10112359550561797,
0.31351351351351353,
0.0,
0.7588785046728972,
0.6312056737588652,
0.44999999999999996,
0.5300546448087431,
0.0,
0.015384615384615385,
0.5868995633187772,
0.17857142857142858,
0.6494401378122309,
0.7022222222222223,
0.12851405622489961,
0.45676998368678634
],
"eval_loss": 0.30811235308647156,
"eval_runtime": 3.5499,
"eval_samples_per_second": 534.102,
"eval_steps_per_second": 16.902,
"step": 1000
},
{
"epoch": 7.1,
"learning_rate": 3.0529595015576325e-06,
"loss": 0.2446,
"step": 1200
},
{
"epoch": 7.1,
"eval_f1": 0.40153906255014676,
"eval_f1_all": [
0.5172413793103449,
0.5871121718377088,
0.12,
0.3759398496240602,
0.630952380952381,
0.17258883248730966,
0.3768115942028986,
0.0,
0.7563683866753755,
0.6462513199577613,
0.4758064516129032,
0.5527369826435248,
0.0,
0.015037593984962405,
0.5758683729433272,
0.21203438395415472,
0.6746794871794872,
0.7136563876651982,
0.17777777777777778,
0.44991789819376027
],
"eval_loss": 0.30632713437080383,
"eval_runtime": 3.2144,
"eval_samples_per_second": 589.85,
"eval_steps_per_second": 18.666,
"step": 1200
},
{
"epoch": 8.28,
"learning_rate": 1.8068535825545173e-06,
"loss": 0.2344,
"step": 1400
},
{
"epoch": 8.28,
"eval_f1": 0.39818355623683893,
"eval_f1_all": [
0.5059665871121718,
0.6030267753201397,
0.13071895424836602,
0.3609022556390977,
0.6425073457394711,
0.17616580310880828,
0.3827751196172249,
0.0,
0.7609942638623327,
0.6363636363636364,
0.4672131147540983,
0.5517241379310345,
0.0,
0.015151515151515152,
0.5907099035933392,
0.14012738853503187,
0.7006960556844547,
0.7053571428571428,
0.1484375,
0.4448336252189142
],
"eval_loss": 0.30558517575263977,
"eval_runtime": 3.0733,
"eval_samples_per_second": 616.921,
"eval_steps_per_second": 19.523,
"step": 1400
}
],
"max_steps": 1690,
"num_train_epochs": 10,
"total_flos": 7696217086325736.0,
"trial_name": null,
"trial_params": null
}