cpm.in.lstm.inclusive.seed66 / trainer_state.json
KarelDO's picture
commit files to HF hub
a3bc850
{
"best_metric": 0.44485729932785034,
"best_model_checkpoint": "model_output/e2e_opentable_5_way__19684-shot__seed-66__lstm/checkpoint-1650",
"epoch": 10.714285714285714,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13,
"learning_rate": 4.9783362218370885e-05,
"loss": 19.0082,
"step": 20
},
{
"epoch": 0.26,
"learning_rate": 4.956672443674177e-05,
"loss": 18.4578,
"step": 40
},
{
"epoch": 0.32,
"eval_distillation_accuracy_counterfactual": 0.4474089276552078,
"eval_distillation_accuracy_factual": 0.87891226269882,
"eval_distillation_f1_counterfactual": 0.4340733851900615,
"eval_distillation_f1_factual": 0.8800573438030022,
"eval_groundtruth_accuracy_counterfactual": 0.36685479733196513,
"eval_groundtruth_f1_counterfactual": 0.3475149282833413,
"eval_groundtruth_f1_factual": 0.5718715108033396,
"eval_icace_cosine": 0.6489389538764954,
"eval_icace_l2": 0.7266151905059814,
"eval_icace_normdiff": 0.6456053256988525,
"eval_loss": 19.274616241455078,
"eval_runtime": 4.1804,
"eval_samples_per_second": 932.439,
"eval_steps_per_second": 7.416,
"step": 50
},
{
"epoch": 0.39,
"learning_rate": 4.935008665511265e-05,
"loss": 16.6971,
"step": 60
},
{
"epoch": 0.52,
"learning_rate": 4.913344887348354e-05,
"loss": 16.0217,
"step": 80
},
{
"epoch": 0.65,
"learning_rate": 4.891681109185442e-05,
"loss": 14.7054,
"step": 100
},
{
"epoch": 0.65,
"eval_distillation_accuracy_counterfactual": 0.4663930220625962,
"eval_distillation_accuracy_factual": 0.8155464340687532,
"eval_distillation_f1_counterfactual": 0.4524610795367727,
"eval_distillation_f1_factual": 0.8181025319127461,
"eval_groundtruth_accuracy_counterfactual": 0.387891226269882,
"eval_groundtruth_f1_counterfactual": 0.3650946763135223,
"eval_groundtruth_f1_factual": 0.5588856066889839,
"eval_icace_cosine": 0.5872419476509094,
"eval_icace_l2": 0.6969975233078003,
"eval_icace_normdiff": 0.5919448733329773,
"eval_loss": 15.743273735046387,
"eval_runtime": 4.4635,
"eval_samples_per_second": 873.3,
"eval_steps_per_second": 6.945,
"step": 100
},
{
"epoch": 0.78,
"learning_rate": 4.8700173310225307e-05,
"loss": 13.7184,
"step": 120
},
{
"epoch": 0.91,
"learning_rate": 4.848353552859619e-05,
"loss": 12.0381,
"step": 140
},
{
"epoch": 0.97,
"eval_distillation_accuracy_counterfactual": 0.49640841457157514,
"eval_distillation_accuracy_factual": 0.7883530015392509,
"eval_distillation_f1_counterfactual": 0.4823375562217772,
"eval_distillation_f1_factual": 0.7879840021980078,
"eval_groundtruth_accuracy_counterfactual": 0.4089276552077989,
"eval_groundtruth_f1_counterfactual": 0.38657714790711345,
"eval_groundtruth_f1_factual": 0.5442379284746408,
"eval_icace_cosine": 0.5305299758911133,
"eval_icace_l2": 0.6551415920257568,
"eval_icace_normdiff": 0.5328993201255798,
"eval_loss": 12.449591636657715,
"eval_runtime": 4.0979,
"eval_samples_per_second": 951.214,
"eval_steps_per_second": 7.565,
"step": 150
},
{
"epoch": 1.04,
"learning_rate": 4.826689774696707e-05,
"loss": 10.7403,
"step": 160
},
{
"epoch": 1.17,
"learning_rate": 4.8050259965337955e-05,
"loss": 10.1961,
"step": 180
},
{
"epoch": 1.3,
"learning_rate": 4.7833622183708845e-05,
"loss": 9.0642,
"step": 200
},
{
"epoch": 1.3,
"eval_distillation_accuracy_counterfactual": 0.5608004104669061,
"eval_distillation_accuracy_factual": 0.7837352488455618,
"eval_distillation_f1_counterfactual": 0.5400836383519623,
"eval_distillation_f1_factual": 0.7725193659488229,
"eval_groundtruth_accuracy_counterfactual": 0.4789635710620831,
"eval_groundtruth_f1_counterfactual": 0.44404241289909907,
"eval_groundtruth_f1_factual": 0.5322559226557246,
"eval_icace_cosine": 0.48327478766441345,
"eval_icace_l2": 0.5759322643280029,
"eval_icace_normdiff": 0.43174561858177185,
"eval_loss": 9.551604270935059,
"eval_runtime": 4.2248,
"eval_samples_per_second": 922.636,
"eval_steps_per_second": 7.338,
"step": 200
},
{
"epoch": 1.43,
"learning_rate": 4.761698440207972e-05,
"loss": 8.7283,
"step": 220
},
{
"epoch": 1.56,
"learning_rate": 4.740034662045061e-05,
"loss": 8.4523,
"step": 240
},
{
"epoch": 1.62,
"eval_distillation_accuracy_counterfactual": 0.5695228322216521,
"eval_distillation_accuracy_factual": 0.8114417650076963,
"eval_distillation_f1_counterfactual": 0.5467305836862474,
"eval_distillation_f1_factual": 0.8027820833992848,
"eval_groundtruth_accuracy_counterfactual": 0.4953822473063109,
"eval_groundtruth_f1_counterfactual": 0.4591082174445691,
"eval_groundtruth_f1_factual": 0.550223520551177,
"eval_icace_cosine": 0.4666258990764618,
"eval_icace_l2": 0.5574371814727783,
"eval_icace_normdiff": 0.3956855237483978,
"eval_loss": 8.846331596374512,
"eval_runtime": 4.3676,
"eval_samples_per_second": 892.49,
"eval_steps_per_second": 7.098,
"step": 250
},
{
"epoch": 1.69,
"learning_rate": 4.7183708838821494e-05,
"loss": 7.9309,
"step": 260
},
{
"epoch": 1.82,
"learning_rate": 4.6967071057192376e-05,
"loss": 8.1515,
"step": 280
},
{
"epoch": 1.95,
"learning_rate": 4.675043327556326e-05,
"loss": 8.2538,
"step": 300
},
{
"epoch": 1.95,
"eval_distillation_accuracy_counterfactual": 0.5785017957927142,
"eval_distillation_accuracy_factual": 0.8099025141097999,
"eval_distillation_f1_counterfactual": 0.5510551496999538,
"eval_distillation_f1_factual": 0.8034508877679645,
"eval_groundtruth_accuracy_counterfactual": 0.5192406362237044,
"eval_groundtruth_f1_counterfactual": 0.4773009173751467,
"eval_groundtruth_f1_factual": 0.5601908475598169,
"eval_icace_cosine": 0.46449822187423706,
"eval_icace_l2": 0.5464323163032532,
"eval_icace_normdiff": 0.3835403621196747,
"eval_loss": 8.654885292053223,
"eval_runtime": 4.2096,
"eval_samples_per_second": 925.989,
"eval_steps_per_second": 7.364,
"step": 300
},
{
"epoch": 2.08,
"learning_rate": 4.653379549393415e-05,
"loss": 7.67,
"step": 320
},
{
"epoch": 2.21,
"learning_rate": 4.6317157712305025e-05,
"loss": 7.2333,
"step": 340
},
{
"epoch": 2.27,
"eval_distillation_accuracy_counterfactual": 0.5828630066700872,
"eval_distillation_accuracy_factual": 0.8129810159055926,
"eval_distillation_f1_counterfactual": 0.5580116722898498,
"eval_distillation_f1_factual": 0.8059470822588534,
"eval_groundtruth_accuracy_counterfactual": 0.5161621344279117,
"eval_groundtruth_f1_counterfactual": 0.47873711691724863,
"eval_groundtruth_f1_factual": 0.5669218861407466,
"eval_icace_cosine": 0.4622446298599243,
"eval_icace_l2": 0.5438258051872253,
"eval_icace_normdiff": 0.3780592679977417,
"eval_loss": 8.574095726013184,
"eval_runtime": 4.5769,
"eval_samples_per_second": 851.663,
"eval_steps_per_second": 6.773,
"step": 350
},
{
"epoch": 2.34,
"learning_rate": 4.6100519930675915e-05,
"loss": 7.5403,
"step": 360
},
{
"epoch": 2.47,
"learning_rate": 4.58838821490468e-05,
"loss": 7.5941,
"step": 380
},
{
"epoch": 2.6,
"learning_rate": 4.566724436741768e-05,
"loss": 7.4074,
"step": 400
},
{
"epoch": 2.6,
"eval_distillation_accuracy_counterfactual": 0.5913288866085172,
"eval_distillation_accuracy_factual": 0.8127244740892765,
"eval_distillation_f1_counterfactual": 0.5655321920235192,
"eval_distillation_f1_factual": 0.8043989295732594,
"eval_groundtruth_accuracy_counterfactual": 0.5236018471010775,
"eval_groundtruth_f1_counterfactual": 0.483629889760966,
"eval_groundtruth_f1_factual": 0.566873960250893,
"eval_icace_cosine": 0.4574372470378876,
"eval_icace_l2": 0.5330100059509277,
"eval_icace_normdiff": 0.3700498342514038,
"eval_loss": 8.39880084991455,
"eval_runtime": 4.3152,
"eval_samples_per_second": 903.312,
"eval_steps_per_second": 7.184,
"step": 400
},
{
"epoch": 2.73,
"learning_rate": 4.5450606585788563e-05,
"loss": 7.4824,
"step": 420
},
{
"epoch": 2.86,
"learning_rate": 4.5233968804159446e-05,
"loss": 7.3475,
"step": 440
},
{
"epoch": 2.92,
"eval_distillation_accuracy_counterfactual": 0.5872242175474602,
"eval_distillation_accuracy_factual": 0.8124679322729605,
"eval_distillation_f1_counterfactual": 0.5625259640226865,
"eval_distillation_f1_factual": 0.8042766212220256,
"eval_groundtruth_accuracy_counterfactual": 0.5246280143663418,
"eval_groundtruth_f1_counterfactual": 0.48658870418382777,
"eval_groundtruth_f1_factual": 0.5685471377307885,
"eval_icace_cosine": 0.45905160903930664,
"eval_icace_l2": 0.5355878472328186,
"eval_icace_normdiff": 0.36660289764404297,
"eval_loss": 8.368142127990723,
"eval_runtime": 4.1481,
"eval_samples_per_second": 939.704,
"eval_steps_per_second": 7.473,
"step": 450
},
{
"epoch": 2.99,
"learning_rate": 4.501733102253033e-05,
"loss": 7.0594,
"step": 460
},
{
"epoch": 3.12,
"learning_rate": 4.480069324090121e-05,
"loss": 6.9375,
"step": 480
},
{
"epoch": 3.25,
"learning_rate": 4.45840554592721e-05,
"loss": 6.7366,
"step": 500
},
{
"epoch": 3.25,
"eval_distillation_accuracy_counterfactual": 0.595177013853258,
"eval_distillation_accuracy_factual": 0.8070805541303232,
"eval_distillation_f1_counterfactual": 0.5692331323641199,
"eval_distillation_f1_factual": 0.7998207924147414,
"eval_groundtruth_accuracy_counterfactual": 0.5361723961005644,
"eval_groundtruth_f1_counterfactual": 0.4974673759555204,
"eval_groundtruth_f1_factual": 0.5562996925682391,
"eval_icace_cosine": 0.4534068703651428,
"eval_icace_l2": 0.526775062084198,
"eval_icace_normdiff": 0.3587745130062103,
"eval_loss": 8.27935791015625,
"eval_runtime": 4.1869,
"eval_samples_per_second": 931.008,
"eval_steps_per_second": 7.404,
"step": 500
},
{
"epoch": 3.38,
"learning_rate": 4.436741767764298e-05,
"loss": 7.1133,
"step": 520
},
{
"epoch": 3.51,
"learning_rate": 4.415077989601387e-05,
"loss": 6.8301,
"step": 540
},
{
"epoch": 3.57,
"eval_distillation_accuracy_counterfactual": 0.5974858902001026,
"eval_distillation_accuracy_factual": 0.801693175987686,
"eval_distillation_f1_counterfactual": 0.5726258996562326,
"eval_distillation_f1_factual": 0.7907913618161765,
"eval_groundtruth_accuracy_counterfactual": 0.5320677270395074,
"eval_groundtruth_f1_counterfactual": 0.4923485191341066,
"eval_groundtruth_f1_factual": 0.5537961164314067,
"eval_icace_cosine": 0.45169007778167725,
"eval_icace_l2": 0.5279433727264404,
"eval_icace_normdiff": 0.36275714635849,
"eval_loss": 8.32939338684082,
"eval_runtime": 5.2077,
"eval_samples_per_second": 748.509,
"eval_steps_per_second": 5.953,
"step": 550
},
{
"epoch": 3.64,
"learning_rate": 4.393414211438475e-05,
"loss": 6.7981,
"step": 560
},
{
"epoch": 3.77,
"learning_rate": 4.371750433275563e-05,
"loss": 7.1438,
"step": 580
},
{
"epoch": 3.9,
"learning_rate": 4.3500866551126516e-05,
"loss": 6.8997,
"step": 600
},
{
"epoch": 3.9,
"eval_distillation_accuracy_counterfactual": 0.5926115956900975,
"eval_distillation_accuracy_factual": 0.8004104669061057,
"eval_distillation_f1_counterfactual": 0.5673896946111567,
"eval_distillation_f1_factual": 0.7916000675958308,
"eval_groundtruth_accuracy_counterfactual": 0.534633145202668,
"eval_groundtruth_f1_counterfactual": 0.49769412855789863,
"eval_groundtruth_f1_factual": 0.5643847406573682,
"eval_icace_cosine": 0.456226110458374,
"eval_icace_l2": 0.529480516910553,
"eval_icace_normdiff": 0.3591635823249817,
"eval_loss": 8.281085014343262,
"eval_runtime": 4.129,
"eval_samples_per_second": 944.045,
"eval_steps_per_second": 7.508,
"step": 600
},
{
"epoch": 4.03,
"learning_rate": 4.3284228769497406e-05,
"loss": 6.9036,
"step": 620
},
{
"epoch": 4.16,
"learning_rate": 4.306759098786828e-05,
"loss": 6.5021,
"step": 640
},
{
"epoch": 4.22,
"eval_distillation_accuracy_counterfactual": 0.6010774756285274,
"eval_distillation_accuracy_factual": 0.8068240123140071,
"eval_distillation_f1_counterfactual": 0.5748719382246691,
"eval_distillation_f1_factual": 0.7986215841545606,
"eval_groundtruth_accuracy_counterfactual": 0.5366854797331965,
"eval_groundtruth_f1_counterfactual": 0.496750865177183,
"eval_groundtruth_f1_factual": 0.5667016917523627,
"eval_icace_cosine": 0.4511929750442505,
"eval_icace_l2": 0.5219342708587646,
"eval_icace_normdiff": 0.35563668608665466,
"eval_loss": 8.183207511901855,
"eval_runtime": 4.6568,
"eval_samples_per_second": 837.051,
"eval_steps_per_second": 6.657,
"step": 650
},
{
"epoch": 4.29,
"learning_rate": 4.285095320623917e-05,
"loss": 6.6648,
"step": 660
},
{
"epoch": 4.42,
"learning_rate": 4.2634315424610055e-05,
"loss": 6.5765,
"step": 680
},
{
"epoch": 4.55,
"learning_rate": 4.241767764298094e-05,
"loss": 6.8262,
"step": 700
},
{
"epoch": 4.55,
"eval_distillation_accuracy_counterfactual": 0.6021036428937917,
"eval_distillation_accuracy_factual": 0.8011800923550538,
"eval_distillation_f1_counterfactual": 0.5782623323950351,
"eval_distillation_f1_factual": 0.792234544848666,
"eval_groundtruth_accuracy_counterfactual": 0.534633145202668,
"eval_groundtruth_f1_counterfactual": 0.4980566768981932,
"eval_groundtruth_f1_factual": 0.5659870209728001,
"eval_icace_cosine": 0.45206764340400696,
"eval_icace_l2": 0.5196635723114014,
"eval_icace_normdiff": 0.3521099090576172,
"eval_loss": 8.192151069641113,
"eval_runtime": 4.2168,
"eval_samples_per_second": 924.404,
"eval_steps_per_second": 7.352,
"step": 700
},
{
"epoch": 4.68,
"learning_rate": 4.220103986135182e-05,
"loss": 6.3849,
"step": 720
},
{
"epoch": 4.81,
"learning_rate": 4.198440207972271e-05,
"loss": 6.2939,
"step": 740
},
{
"epoch": 4.87,
"eval_distillation_accuracy_counterfactual": 0.6015905592611596,
"eval_distillation_accuracy_factual": 0.7947665469471524,
"eval_distillation_f1_counterfactual": 0.5720518381601882,
"eval_distillation_f1_factual": 0.7847422669196575,
"eval_groundtruth_accuracy_counterfactual": 0.5443817342226783,
"eval_groundtruth_f1_counterfactual": 0.5024275126053424,
"eval_groundtruth_f1_factual": 0.5643444437069502,
"eval_icace_cosine": 0.45017117261886597,
"eval_icace_l2": 0.52012699842453,
"eval_icace_normdiff": 0.3526724576950073,
"eval_loss": 8.209163665771484,
"eval_runtime": 4.3873,
"eval_samples_per_second": 888.48,
"eval_steps_per_second": 7.066,
"step": 750
},
{
"epoch": 4.94,
"learning_rate": 4.1767764298093586e-05,
"loss": 6.5551,
"step": 760
},
{
"epoch": 5.06,
"learning_rate": 4.1551126516464476e-05,
"loss": 6.1778,
"step": 780
},
{
"epoch": 5.19,
"learning_rate": 4.133448873483536e-05,
"loss": 6.4037,
"step": 800
},
{
"epoch": 5.19,
"eval_distillation_accuracy_counterfactual": 0.5987685992816829,
"eval_distillation_accuracy_factual": 0.7981015905592611,
"eval_distillation_f1_counterfactual": 0.5722989846682422,
"eval_distillation_f1_factual": 0.7868708739280531,
"eval_groundtruth_accuracy_counterfactual": 0.538481272447409,
"eval_groundtruth_f1_counterfactual": 0.5003432328028673,
"eval_groundtruth_f1_factual": 0.5649331255060308,
"eval_icace_cosine": 0.4531785547733307,
"eval_icace_l2": 0.5209774374961853,
"eval_icace_normdiff": 0.35254132747650146,
"eval_loss": 8.21574878692627,
"eval_runtime": 4.3077,
"eval_samples_per_second": 904.888,
"eval_steps_per_second": 7.196,
"step": 800
},
{
"epoch": 5.32,
"learning_rate": 4.111785095320624e-05,
"loss": 6.3784,
"step": 820
},
{
"epoch": 5.45,
"learning_rate": 4.0901213171577124e-05,
"loss": 6.2024,
"step": 840
},
{
"epoch": 5.52,
"eval_distillation_accuracy_counterfactual": 0.6033863519753719,
"eval_distillation_accuracy_factual": 0.78450487429451,
"eval_distillation_f1_counterfactual": 0.5779296645132648,
"eval_distillation_f1_factual": 0.7716967495103348,
"eval_groundtruth_accuracy_counterfactual": 0.5425859415084658,
"eval_groundtruth_f1_counterfactual": 0.5062616807207319,
"eval_groundtruth_f1_factual": 0.5623510278704009,
"eval_icace_cosine": 0.45046135783195496,
"eval_icace_l2": 0.5194692611694336,
"eval_icace_normdiff": 0.349165141582489,
"eval_loss": 8.216791152954102,
"eval_runtime": 4.7068,
"eval_samples_per_second": 828.166,
"eval_steps_per_second": 6.586,
"step": 850
},
{
"epoch": 5.58,
"learning_rate": 4.068457538994801e-05,
"loss": 6.0701,
"step": 860
},
{
"epoch": 5.71,
"learning_rate": 4.04679376083189e-05,
"loss": 6.347,
"step": 880
},
{
"epoch": 5.84,
"learning_rate": 4.025129982668977e-05,
"loss": 6.2456,
"step": 900
},
{
"epoch": 5.84,
"eval_distillation_accuracy_counterfactual": 0.6059517701385326,
"eval_distillation_accuracy_factual": 0.7880964597229349,
"eval_distillation_f1_counterfactual": 0.5782256234051215,
"eval_distillation_f1_factual": 0.7772220422916071,
"eval_groundtruth_accuracy_counterfactual": 0.5448948178553105,
"eval_groundtruth_f1_counterfactual": 0.5055672923847118,
"eval_groundtruth_f1_factual": 0.5609910180857884,
"eval_icace_cosine": 0.448850154876709,
"eval_icace_l2": 0.5155501365661621,
"eval_icace_normdiff": 0.34565335512161255,
"eval_loss": 8.134432792663574,
"eval_runtime": 4.6313,
"eval_samples_per_second": 841.656,
"eval_steps_per_second": 6.694,
"step": 900
},
{
"epoch": 5.97,
"learning_rate": 4.003466204506066e-05,
"loss": 6.2184,
"step": 920
},
{
"epoch": 6.1,
"learning_rate": 3.9818024263431546e-05,
"loss": 6.167,
"step": 940
},
{
"epoch": 6.17,
"eval_distillation_accuracy_counterfactual": 0.607747562852745,
"eval_distillation_accuracy_factual": 0.7904053360697794,
"eval_distillation_f1_counterfactual": 0.5830284801765655,
"eval_distillation_f1_factual": 0.7803493947200735,
"eval_groundtruth_accuracy_counterfactual": 0.5410466906105695,
"eval_groundtruth_f1_counterfactual": 0.5040873537566849,
"eval_groundtruth_f1_factual": 0.5644313788434017,
"eval_icace_cosine": 0.4496186673641205,
"eval_icace_l2": 0.5139620304107666,
"eval_icace_normdiff": 0.34691381454467773,
"eval_loss": 8.117876052856445,
"eval_runtime": 4.5117,
"eval_samples_per_second": 863.979,
"eval_steps_per_second": 6.871,
"step": 950
},
{
"epoch": 6.23,
"learning_rate": 3.960138648180243e-05,
"loss": 6.0826,
"step": 960
},
{
"epoch": 6.36,
"learning_rate": 3.938474870017331e-05,
"loss": 6.0767,
"step": 980
},
{
"epoch": 6.49,
"learning_rate": 3.91681109185442e-05,
"loss": 5.8502,
"step": 1000
},
{
"epoch": 6.49,
"eval_distillation_accuracy_counterfactual": 0.6062083119548486,
"eval_distillation_accuracy_factual": 0.7834787070292457,
"eval_distillation_f1_counterfactual": 0.580469937457371,
"eval_distillation_f1_factual": 0.7727583593749504,
"eval_groundtruth_accuracy_counterfactual": 0.5487429451000513,
"eval_groundtruth_f1_counterfactual": 0.5097369423713092,
"eval_groundtruth_f1_factual": 0.5561243921701791,
"eval_icace_cosine": 0.44893112778663635,
"eval_icace_l2": 0.5155116319656372,
"eval_icace_normdiff": 0.3455946445465088,
"eval_loss": 8.187788963317871,
"eval_runtime": 4.1698,
"eval_samples_per_second": 934.828,
"eval_steps_per_second": 7.434,
"step": 1000
},
{
"epoch": 6.62,
"learning_rate": 3.895147313691508e-05,
"loss": 5.9917,
"step": 1020
},
{
"epoch": 6.75,
"learning_rate": 3.873483535528597e-05,
"loss": 6.1073,
"step": 1040
},
{
"epoch": 6.82,
"eval_distillation_accuracy_counterfactual": 0.607747562852745,
"eval_distillation_accuracy_factual": 0.7873268342739866,
"eval_distillation_f1_counterfactual": 0.5805601357737593,
"eval_distillation_f1_factual": 0.7743939373190072,
"eval_groundtruth_accuracy_counterfactual": 0.5407901487942535,
"eval_groundtruth_f1_counterfactual": 0.5018039142178429,
"eval_groundtruth_f1_factual": 0.5561166543910189,
"eval_icace_cosine": 0.4540616571903229,
"eval_icace_l2": 0.5179798007011414,
"eval_icace_normdiff": 0.34727445244789124,
"eval_loss": 8.15165901184082,
"eval_runtime": 4.1114,
"eval_samples_per_second": 948.103,
"eval_steps_per_second": 7.54,
"step": 1050
},
{
"epoch": 6.88,
"learning_rate": 3.851819757365685e-05,
"loss": 5.8656,
"step": 1060
},
{
"epoch": 7.01,
"learning_rate": 3.830155979202773e-05,
"loss": 5.8636,
"step": 1080
},
{
"epoch": 7.14,
"learning_rate": 3.8084922010398616e-05,
"loss": 5.7869,
"step": 1100
},
{
"epoch": 7.14,
"eval_distillation_accuracy_counterfactual": 0.6062083119548486,
"eval_distillation_accuracy_factual": 0.7811698306824012,
"eval_distillation_f1_counterfactual": 0.5821710152799346,
"eval_distillation_f1_factual": 0.7697086841325075,
"eval_groundtruth_accuracy_counterfactual": 0.5489994869163674,
"eval_groundtruth_f1_counterfactual": 0.5136511907225106,
"eval_groundtruth_f1_factual": 0.5620812168179984,
"eval_icace_cosine": 0.4501829743385315,
"eval_icace_l2": 0.5179650187492371,
"eval_icace_normdiff": 0.34533482789993286,
"eval_loss": 8.237076759338379,
"eval_runtime": 4.8254,
"eval_samples_per_second": 807.813,
"eval_steps_per_second": 6.424,
"step": 1100
},
{
"epoch": 7.27,
"learning_rate": 3.78682842287695e-05,
"loss": 5.7736,
"step": 1120
},
{
"epoch": 7.4,
"learning_rate": 3.765164644714038e-05,
"loss": 5.8242,
"step": 1140
},
{
"epoch": 7.47,
"eval_distillation_accuracy_counterfactual": 0.6080041046690611,
"eval_distillation_accuracy_factual": 0.7819394561313494,
"eval_distillation_f1_counterfactual": 0.5840769716039321,
"eval_distillation_f1_factual": 0.772426412144742,
"eval_groundtruth_accuracy_counterfactual": 0.5461775269368907,
"eval_groundtruth_f1_counterfactual": 0.5101523591778137,
"eval_groundtruth_f1_factual": 0.5610800146045478,
"eval_icace_cosine": 0.45291048288345337,
"eval_icace_l2": 0.5156511068344116,
"eval_icace_normdiff": 0.3455710709095001,
"eval_loss": 8.170882225036621,
"eval_runtime": 4.1782,
"eval_samples_per_second": 932.944,
"eval_steps_per_second": 7.42,
"step": 1150
},
{
"epoch": 7.53,
"learning_rate": 3.7435008665511264e-05,
"loss": 5.8566,
"step": 1160
},
{
"epoch": 7.66,
"learning_rate": 3.7218370883882154e-05,
"loss": 5.9544,
"step": 1180
},
{
"epoch": 7.79,
"learning_rate": 3.700173310225303e-05,
"loss": 5.6634,
"step": 1200
},
{
"epoch": 7.79,
"eval_distillation_accuracy_counterfactual": 0.603129810159056,
"eval_distillation_accuracy_factual": 0.7850179579271421,
"eval_distillation_f1_counterfactual": 0.5749303676931736,
"eval_distillation_f1_factual": 0.7744112177462537,
"eval_groundtruth_accuracy_counterfactual": 0.5464340687532068,
"eval_groundtruth_f1_counterfactual": 0.5066990026042995,
"eval_groundtruth_f1_factual": 0.5557605077495628,
"eval_icace_cosine": 0.4530617296695709,
"eval_icace_l2": 0.5165730714797974,
"eval_icace_normdiff": 0.3451988697052002,
"eval_loss": 8.165973663330078,
"eval_runtime": 4.1529,
"eval_samples_per_second": 938.624,
"eval_steps_per_second": 7.465,
"step": 1200
},
{
"epoch": 7.92,
"learning_rate": 3.678509532062392e-05,
"loss": 5.77,
"step": 1220
},
{
"epoch": 8.05,
"learning_rate": 3.65684575389948e-05,
"loss": 5.7482,
"step": 1240
},
{
"epoch": 8.12,
"eval_distillation_accuracy_counterfactual": 0.6126218573627501,
"eval_distillation_accuracy_factual": 0.7909184197024115,
"eval_distillation_f1_counterfactual": 0.5843182944697987,
"eval_distillation_f1_factual": 0.7795092000338903,
"eval_groundtruth_accuracy_counterfactual": 0.5543868650590046,
"eval_groundtruth_f1_counterfactual": 0.5137168302820662,
"eval_groundtruth_f1_factual": 0.5678129066363756,
"eval_icace_cosine": 0.4470921456813812,
"eval_icace_l2": 0.5108927488327026,
"eval_icace_normdiff": 0.3416588604450226,
"eval_loss": 8.150604248046875,
"eval_runtime": 4.251,
"eval_samples_per_second": 916.95,
"eval_steps_per_second": 7.292,
"step": 1250
},
{
"epoch": 8.18,
"learning_rate": 3.6351819757365686e-05,
"loss": 5.5205,
"step": 1260
},
{
"epoch": 8.31,
"learning_rate": 3.613518197573657e-05,
"loss": 5.8927,
"step": 1280
},
{
"epoch": 8.44,
"learning_rate": 3.591854419410746e-05,
"loss": 5.5395,
"step": 1300
},
{
"epoch": 8.44,
"eval_distillation_accuracy_counterfactual": 0.6103129810159056,
"eval_distillation_accuracy_factual": 0.7809132888660851,
"eval_distillation_f1_counterfactual": 0.5841396061759035,
"eval_distillation_f1_factual": 0.7714136537747058,
"eval_groundtruth_accuracy_counterfactual": 0.5497691123653156,
"eval_groundtruth_f1_counterfactual": 0.5118760188223795,
"eval_groundtruth_f1_factual": 0.5589055520339826,
"eval_icace_cosine": 0.45225635170936584,
"eval_icace_l2": 0.5154218077659607,
"eval_icace_normdiff": 0.344652384519577,
"eval_loss": 8.205676078796387,
"eval_runtime": 4.091,
"eval_samples_per_second": 952.826,
"eval_steps_per_second": 7.578,
"step": 1300
},
{
"epoch": 8.57,
"learning_rate": 3.5701906412478334e-05,
"loss": 5.8121,
"step": 1320
},
{
"epoch": 8.7,
"learning_rate": 3.5485268630849224e-05,
"loss": 5.441,
"step": 1340
},
{
"epoch": 8.77,
"eval_distillation_accuracy_counterfactual": 0.6087737301180093,
"eval_distillation_accuracy_factual": 0.7806567470497691,
"eval_distillation_f1_counterfactual": 0.5802259630201693,
"eval_distillation_f1_factual": 0.769666178731678,
"eval_groundtruth_accuracy_counterfactual": 0.5497691123653156,
"eval_groundtruth_f1_counterfactual": 0.5083817804397743,
"eval_groundtruth_f1_factual": 0.5560730633022766,
"eval_icace_cosine": 0.45125773549079895,
"eval_icace_l2": 0.5135722160339355,
"eval_icace_normdiff": 0.342280775308609,
"eval_loss": 8.19408893585205,
"eval_runtime": 4.2164,
"eval_samples_per_second": 924.475,
"eval_steps_per_second": 7.352,
"step": 1350
},
{
"epoch": 8.83,
"learning_rate": 3.526863084922011e-05,
"loss": 5.7596,
"step": 1360
},
{
"epoch": 8.96,
"learning_rate": 3.505199306759099e-05,
"loss": 5.4167,
"step": 1380
},
{
"epoch": 9.09,
"learning_rate": 3.483535528596187e-05,
"loss": 5.4932,
"step": 1400
},
{
"epoch": 9.09,
"eval_distillation_accuracy_counterfactual": 0.6074910210364289,
"eval_distillation_accuracy_factual": 0.7865572088250384,
"eval_distillation_f1_counterfactual": 0.5796303315597431,
"eval_distillation_f1_factual": 0.7750390231154594,
"eval_groundtruth_accuracy_counterfactual": 0.5477167778347871,
"eval_groundtruth_f1_counterfactual": 0.5087150951199725,
"eval_groundtruth_f1_factual": 0.5570574749661429,
"eval_icace_cosine": 0.4498434364795685,
"eval_icace_l2": 0.516267716884613,
"eval_icace_normdiff": 0.3403330147266388,
"eval_loss": 8.22058391571045,
"eval_runtime": 4.1848,
"eval_samples_per_second": 931.47,
"eval_steps_per_second": 7.408,
"step": 1400
},
{
"epoch": 9.22,
"learning_rate": 3.461871750433276e-05,
"loss": 5.5678,
"step": 1420
},
{
"epoch": 9.35,
"learning_rate": 3.440207972270364e-05,
"loss": 5.4716,
"step": 1440
},
{
"epoch": 9.42,
"eval_distillation_accuracy_counterfactual": 0.6108260646485377,
"eval_distillation_accuracy_factual": 0.780143663417137,
"eval_distillation_f1_counterfactual": 0.5842917139467018,
"eval_distillation_f1_factual": 0.7709900996479784,
"eval_groundtruth_accuracy_counterfactual": 0.5533606977937404,
"eval_groundtruth_f1_counterfactual": 0.5127818121914747,
"eval_groundtruth_f1_factual": 0.5573652255180004,
"eval_icace_cosine": 0.44960257411003113,
"eval_icace_l2": 0.5159686207771301,
"eval_icace_normdiff": 0.3424537181854248,
"eval_loss": 8.257763862609863,
"eval_runtime": 4.3143,
"eval_samples_per_second": 903.512,
"eval_steps_per_second": 7.185,
"step": 1450
},
{
"epoch": 9.48,
"learning_rate": 3.418544194107453e-05,
"loss": 5.4555,
"step": 1460
},
{
"epoch": 9.61,
"learning_rate": 3.396880415944541e-05,
"loss": 5.3255,
"step": 1480
},
{
"epoch": 9.74,
"learning_rate": 3.3752166377816294e-05,
"loss": 5.3938,
"step": 1500
},
{
"epoch": 9.74,
"eval_distillation_accuracy_counterfactual": 0.6090302719343252,
"eval_distillation_accuracy_factual": 0.7811698306824012,
"eval_distillation_f1_counterfactual": 0.5842265693397752,
"eval_distillation_f1_factual": 0.7722150004829093,
"eval_groundtruth_accuracy_counterfactual": 0.5477167778347871,
"eval_groundtruth_f1_counterfactual": 0.5107558580095164,
"eval_groundtruth_f1_factual": 0.5685387585098263,
"eval_icace_cosine": 0.4469769597053528,
"eval_icace_l2": 0.5149244666099548,
"eval_icace_normdiff": 0.3419099450111389,
"eval_loss": 8.219366073608398,
"eval_runtime": 4.196,
"eval_samples_per_second": 928.989,
"eval_steps_per_second": 7.388,
"step": 1500
},
{
"epoch": 9.87,
"learning_rate": 3.353552859618718e-05,
"loss": 5.5647,
"step": 1520
},
{
"epoch": 10.0,
"learning_rate": 3.331889081455806e-05,
"loss": 5.6336,
"step": 1540
},
{
"epoch": 10.06,
"eval_distillation_accuracy_counterfactual": 0.6074910210364289,
"eval_distillation_accuracy_factual": 0.7834787070292457,
"eval_distillation_f1_counterfactual": 0.5825764249392762,
"eval_distillation_f1_factual": 0.7747389963457406,
"eval_groundtruth_accuracy_counterfactual": 0.5456644433042586,
"eval_groundtruth_f1_counterfactual": 0.5091238986988211,
"eval_groundtruth_f1_factual": 0.5599697616151967,
"eval_icace_cosine": 0.4470330774784088,
"eval_icace_l2": 0.5163958668708801,
"eval_icace_normdiff": 0.3423549234867096,
"eval_loss": 8.21544361114502,
"eval_runtime": 4.0325,
"eval_samples_per_second": 966.654,
"eval_steps_per_second": 7.688,
"step": 1550
},
{
"epoch": 10.13,
"learning_rate": 3.310225303292894e-05,
"loss": 5.3259,
"step": 1560
},
{
"epoch": 10.26,
"learning_rate": 3.2885615251299825e-05,
"loss": 5.2595,
"step": 1580
},
{
"epoch": 10.39,
"learning_rate": 3.2668977469670715e-05,
"loss": 5.4352,
"step": 1600
},
{
"epoch": 10.39,
"eval_distillation_accuracy_counterfactual": 0.6087737301180093,
"eval_distillation_accuracy_factual": 0.7773217034376604,
"eval_distillation_f1_counterfactual": 0.5823531075369515,
"eval_distillation_f1_factual": 0.7693792326965714,
"eval_groundtruth_accuracy_counterfactual": 0.5477167778347871,
"eval_groundtruth_f1_counterfactual": 0.508966719485753,
"eval_groundtruth_f1_factual": 0.5640053248952209,
"eval_icace_cosine": 0.4500087797641754,
"eval_icace_l2": 0.5169069170951843,
"eval_icace_normdiff": 0.3419443964958191,
"eval_loss": 8.280415534973145,
"eval_runtime": 4.4038,
"eval_samples_per_second": 885.139,
"eval_steps_per_second": 7.039,
"step": 1600
},
{
"epoch": 10.52,
"learning_rate": 3.245233968804159e-05,
"loss": 5.5017,
"step": 1620
},
{
"epoch": 10.65,
"learning_rate": 3.223570190641248e-05,
"loss": 5.57,
"step": 1640
},
{
"epoch": 10.71,
"eval_distillation_accuracy_counterfactual": 0.6082606464853771,
"eval_distillation_accuracy_factual": 0.7857875833760903,
"eval_distillation_f1_counterfactual": 0.5800740243677368,
"eval_distillation_f1_factual": 0.7760634987859257,
"eval_groundtruth_accuracy_counterfactual": 0.5500256541816316,
"eval_groundtruth_f1_counterfactual": 0.508281184676912,
"eval_groundtruth_f1_factual": 0.5590992844943354,
"eval_icace_cosine": 0.44485729932785034,
"eval_icace_l2": 0.5137429237365723,
"eval_icace_normdiff": 0.33999693393707275,
"eval_loss": 8.240898132324219,
"eval_runtime": 4.1787,
"eval_samples_per_second": 932.837,
"eval_steps_per_second": 7.419,
"step": 1650
}
],
"max_steps": 4616,
"num_train_epochs": 30,
"total_flos": 249055307919360.0,
"trial_name": null,
"trial_params": null
}