{ "best_metric": 0.02460244856774807, "best_model_checkpoint": "/kaggle/working/output/checkpoint-73", "epoch": 26.0, "eval_steps": 500, "global_step": 91, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8571428571428571, "eval_LCC": -0.13996786173466005, "eval_SROCC": -0.1660984393757503, "eval_loss": 0.2684723138809204, "eval_runtime": 39.3373, "eval_samples_per_second": 1.271, "eval_steps_per_second": 0.051, "step": 3 }, { "epoch": 2.0, "eval_LCC": -0.13191836249511346, "eval_SROCC": -0.20710684273709484, "eval_loss": 0.06745556741952896, "eval_runtime": 37.0134, "eval_samples_per_second": 1.351, "eval_steps_per_second": 0.054, "step": 7 }, { "epoch": 2.857142857142857, "grad_norm": 5.974637508392334, "learning_rate": 1.3333333333333333e-05, "loss": 0.223, "step": 10 }, { "epoch": 2.857142857142857, "eval_LCC": -0.11444761651756143, "eval_SROCC": -0.19721488595438177, "eval_loss": 0.13801459968090057, "eval_runtime": 37.041, "eval_samples_per_second": 1.35, "eval_steps_per_second": 0.054, "step": 10 }, { "epoch": 4.0, "eval_LCC": -0.11619739343449043, "eval_SROCC": -0.23620648259303723, "eval_loss": 0.0638759583234787, "eval_runtime": 37.3454, "eval_samples_per_second": 1.339, "eval_steps_per_second": 0.054, "step": 14 }, { "epoch": 4.857142857142857, "eval_LCC": -0.1097281268596262, "eval_SROCC": -0.17599039615846337, "eval_loss": 0.06009223312139511, "eval_runtime": 39.2582, "eval_samples_per_second": 1.274, "eval_steps_per_second": 0.051, "step": 17 }, { "epoch": 5.714285714285714, "grad_norm": 1.5656846761703491, "learning_rate": 1.925925925925926e-05, "loss": 0.0607, "step": 20 }, { "epoch": 6.0, "eval_LCC": -0.08523254844178266, "eval_SROCC": -0.12902761104441776, "eval_loss": 0.06266126781702042, "eval_runtime": 37.4159, "eval_samples_per_second": 1.336, "eval_steps_per_second": 0.053, "step": 21 }, { "epoch": 6.857142857142857, "eval_LCC": -0.07908973191513438, "eval_SROCC": -0.10501800720288115, "eval_loss": 0.054282378405332565, "eval_runtime": 39.9947, "eval_samples_per_second": 1.25, "eval_steps_per_second": 0.05, "step": 24 }, { "epoch": 8.0, "eval_LCC": -0.07022943984845728, "eval_SROCC": -0.0683313325330132, "eval_loss": 0.04083505645394325, "eval_runtime": 39.4734, "eval_samples_per_second": 1.267, "eval_steps_per_second": 0.051, "step": 28 }, { "epoch": 8.571428571428571, "grad_norm": 0.6326273679733276, "learning_rate": 1.7777777777777777e-05, "loss": 0.0212, "step": 30 }, { "epoch": 8.857142857142858, "eval_LCC": -0.05666279490414187, "eval_SROCC": -0.06919567827130851, "eval_loss": 0.04194454103708267, "eval_runtime": 37.518, "eval_samples_per_second": 1.333, "eval_steps_per_second": 0.053, "step": 31 }, { "epoch": 10.0, "eval_LCC": -0.02743218726796948, "eval_SROCC": -0.037022809123649456, "eval_loss": 0.03434378281235695, "eval_runtime": 37.3074, "eval_samples_per_second": 1.34, "eval_steps_per_second": 0.054, "step": 35 }, { "epoch": 10.857142857142858, "eval_LCC": -0.0012650189550020947, "eval_SROCC": -0.033949579831932766, "eval_loss": 0.03074028715491295, "eval_runtime": 39.2094, "eval_samples_per_second": 1.275, "eval_steps_per_second": 0.051, "step": 38 }, { "epoch": 11.428571428571429, "grad_norm": 0.3264749348163605, "learning_rate": 1.6296296296296297e-05, "loss": 0.0168, "step": 40 }, { "epoch": 12.0, "eval_LCC": 0.02330881609272888, "eval_SROCC": -0.02809123649459784, "eval_loss": 0.029941115528345108, "eval_runtime": 39.1241, "eval_samples_per_second": 1.278, "eval_steps_per_second": 0.051, "step": 42 }, { "epoch": 12.857142857142858, "eval_LCC": 0.03261216335612809, "eval_SROCC": -0.042785114045618244, "eval_loss": 0.03004513680934906, "eval_runtime": 36.9998, "eval_samples_per_second": 1.351, "eval_steps_per_second": 0.054, "step": 45 }, { "epoch": 14.0, "eval_LCC": 0.051745647526359385, "eval_SROCC": -0.02376950780312125, "eval_loss": 0.028606927022337914, "eval_runtime": 36.9029, "eval_samples_per_second": 1.355, "eval_steps_per_second": 0.054, "step": 49 }, { "epoch": 14.285714285714286, "grad_norm": 0.6258419156074524, "learning_rate": 1.4814814814814815e-05, "loss": 0.0143, "step": 50 }, { "epoch": 14.857142857142858, "eval_LCC": 0.06012754354341758, "eval_SROCC": -0.018583433373349337, "eval_loss": 0.028338493779301643, "eval_runtime": 39.2004, "eval_samples_per_second": 1.275, "eval_steps_per_second": 0.051, "step": 52 }, { "epoch": 16.0, "eval_LCC": 0.08678963760193395, "eval_SROCC": -0.0024489795918367346, "eval_loss": 0.027331581339240074, "eval_runtime": 39.1787, "eval_samples_per_second": 1.276, "eval_steps_per_second": 0.051, "step": 56 }, { "epoch": 16.857142857142858, "eval_LCC": 0.11189936135943072, "eval_SROCC": 0.028283313325330132, "eval_loss": 0.02574434131383896, "eval_runtime": 37.6775, "eval_samples_per_second": 1.327, "eval_steps_per_second": 0.053, "step": 59 }, { "epoch": 17.142857142857142, "grad_norm": 0.2967870831489563, "learning_rate": 1.3333333333333333e-05, "loss": 0.013, "step": 60 }, { "epoch": 18.0, "eval_LCC": 0.140408573006196, "eval_SROCC": 0.05421368547418968, "eval_loss": 0.024704232811927795, "eval_runtime": 39.5048, "eval_samples_per_second": 1.266, "eval_steps_per_second": 0.051, "step": 63 }, { "epoch": 18.857142857142858, "eval_LCC": 0.15329553575685126, "eval_SROCC": 0.07025210084033613, "eval_loss": 0.0247227493673563, "eval_runtime": 37.5863, "eval_samples_per_second": 1.33, "eval_steps_per_second": 0.053, "step": 66 }, { "epoch": 20.0, "grad_norm": 0.35133129358291626, "learning_rate": 1.1851851851851852e-05, "loss": 0.0111, "step": 70 }, { "epoch": 20.0, "eval_LCC": 0.16704999475534, "eval_SROCC": 0.08004801920768306, "eval_loss": 0.02460792474448681, "eval_runtime": 39.6008, "eval_samples_per_second": 1.263, "eval_steps_per_second": 0.051, "step": 70 }, { "epoch": 20.857142857142858, "eval_LCC": 0.17734737426317984, "eval_SROCC": 0.08955582232893158, "eval_loss": 0.02460244856774807, "eval_runtime": 39.3838, "eval_samples_per_second": 1.27, "eval_steps_per_second": 0.051, "step": 73 }, { "epoch": 22.0, "eval_LCC": 0.183477067706457, "eval_SROCC": 0.09983193277310923, "eval_loss": 0.025651078671216965, "eval_runtime": 37.2011, "eval_samples_per_second": 1.344, "eval_steps_per_second": 0.054, "step": 77 }, { "epoch": 22.857142857142858, "grad_norm": 0.3938016891479492, "learning_rate": 1.037037037037037e-05, "loss": 0.0104, "step": 80 }, { "epoch": 22.857142857142858, "eval_LCC": 0.1943336496302965, "eval_SROCC": 0.10165666266506602, "eval_loss": 0.02549559995532036, "eval_runtime": 39.3404, "eval_samples_per_second": 1.271, "eval_steps_per_second": 0.051, "step": 80 }, { "epoch": 24.0, "eval_LCC": 0.20850983626278138, "eval_SROCC": 0.11490996398559422, "eval_loss": 0.02545199543237686, "eval_runtime": 39.5182, "eval_samples_per_second": 1.265, "eval_steps_per_second": 0.051, "step": 84 }, { "epoch": 24.857142857142858, "eval_LCC": 0.21549093905447098, "eval_SROCC": 0.12451380552220888, "eval_loss": 0.025542089715600014, "eval_runtime": 37.4091, "eval_samples_per_second": 1.337, "eval_steps_per_second": 0.053, "step": 87 }, { "epoch": 25.714285714285715, "grad_norm": 0.5060675740242004, "learning_rate": 8.888888888888888e-06, "loss": 0.0088, "step": 90 }, { "epoch": 26.0, "eval_LCC": 0.22575900814493188, "eval_SROCC": 0.13190876350540215, "eval_loss": 0.026240630075335503, "eval_runtime": 38.9035, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.051, "step": 91 }, { "epoch": 26.0, "step": 91, "total_flos": 2.2813585220126638e+18, "train_loss": 0.041763259517540646, "train_runtime": 5114.8643, "train_samples_per_second": 2.092, "train_steps_per_second": 0.029 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.2813585220126638e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }