{ "best_metric": 41.842087703189854, "best_model_checkpoint": "/root/turkic_qa/en_kaz_models/orig_kaz_roberta_base_model/checkpoint-6020", "epoch": 10.0, "eval_steps": 500, "global_step": 6020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 602, "train_exact_match": 4.595404595404595, "train_f1": 11.363671882650538, "train_runtime": 9.9586, "train_samples_per_second": 112.967, "train_steps_per_second": 4.117 }, { "epoch": 1.0, "grad_norm": 7.829487323760986, "learning_rate": 5e-06, "loss": 4.7559, "step": 602 }, { "epoch": 1.0, "eval_exact_match": 4.0625, "eval_f1": 10.65255335870508, "eval_runtime": 31.5322, "eval_samples_per_second": 113.281, "eval_steps_per_second": 4.059, "step": 602 }, { "epoch": 2.0, "step": 1204, "train_exact_match": 16.083916083916083, "train_f1": 24.66111862744578, "train_runtime": 10.0503, "train_samples_per_second": 113.131, "train_steps_per_second": 4.079 }, { "epoch": 2.0, "grad_norm": 9.797904014587402, "learning_rate": 1e-05, "loss": 3.6465, "step": 1204 }, { "epoch": 2.0, "eval_exact_match": 12.375, "eval_f1": 19.998235228782818, "eval_runtime": 31.6899, "eval_samples_per_second": 112.717, "eval_steps_per_second": 4.039, "step": 1204 }, { "epoch": 3.0, "step": 1806, "train_exact_match": 28.571428571428573, "train_f1": 39.799779936991705, "train_runtime": 9.9917, "train_samples_per_second": 112.694, "train_steps_per_second": 4.103 }, { "epoch": 3.0, "grad_norm": 15.731840133666992, "learning_rate": 8.750000000000001e-06, "loss": 3.0904, "step": 1806 }, { "epoch": 3.0, "eval_exact_match": 20.8125, "eval_f1": 31.83521011700413, "eval_runtime": 31.6833, "eval_samples_per_second": 112.741, "eval_steps_per_second": 4.04, "step": 1806 }, { "epoch": 4.0, "step": 2408, "train_exact_match": 37.76223776223776, "train_f1": 50.512835216096065, "train_runtime": 10.2559, "train_samples_per_second": 113.009, "train_steps_per_second": 4.095 }, { "epoch": 4.0, "grad_norm": 15.223044395446777, "learning_rate": 7.500000000000001e-06, "loss": 2.5695, "step": 2408 }, { "epoch": 4.0, "eval_exact_match": 26.34375, "eval_f1": 37.68196751270749, "eval_runtime": 31.6681, "eval_samples_per_second": 112.795, "eval_steps_per_second": 4.042, "step": 2408 }, { "epoch": 5.0, "step": 3010, "train_exact_match": 46.553446553446555, "train_f1": 59.64402432927812, "train_runtime": 9.9179, "train_samples_per_second": 112.827, "train_steps_per_second": 4.033 }, { "epoch": 5.0, "grad_norm": 19.780818939208984, "learning_rate": 6.25e-06, "loss": 2.2508, "step": 3010 }, { "epoch": 5.0, "eval_exact_match": 27.5625, "eval_f1": 39.44221624780652, "eval_runtime": 31.714, "eval_samples_per_second": 112.632, "eval_steps_per_second": 4.036, "step": 3010 }, { "epoch": 6.0, "step": 3612, "train_exact_match": 50.94905094905095, "train_f1": 63.29384394642245, "train_runtime": 9.8492, "train_samples_per_second": 111.278, "train_steps_per_second": 4.061 }, { "epoch": 6.0, "grad_norm": 19.713895797729492, "learning_rate": 5e-06, "loss": 2.0297, "step": 3612 }, { "epoch": 6.0, "eval_exact_match": 28.75, "eval_f1": 41.0194110809582, "eval_runtime": 31.7278, "eval_samples_per_second": 112.583, "eval_steps_per_second": 4.034, "step": 3612 }, { "epoch": 7.0, "step": 4214, "train_exact_match": 53.74625374625375, "train_f1": 65.89175767102272, "train_runtime": 9.8541, "train_samples_per_second": 113.658, "train_steps_per_second": 4.059 }, { "epoch": 7.0, "grad_norm": 18.399381637573242, "learning_rate": 3.7500000000000005e-06, "loss": 1.8692, "step": 4214 }, { "epoch": 7.0, "eval_exact_match": 29.0625, "eval_f1": 41.37431545961761, "eval_runtime": 31.718, "eval_samples_per_second": 112.617, "eval_steps_per_second": 4.036, "step": 4214 }, { "epoch": 8.0, "step": 4816, "train_exact_match": 57.04295704295704, "train_f1": 69.23809998725315, "train_runtime": 9.8266, "train_samples_per_second": 113.06, "train_steps_per_second": 4.071 }, { "epoch": 8.0, "grad_norm": 19.948137283325195, "learning_rate": 2.5e-06, "loss": 1.7415, "step": 4816 }, { "epoch": 8.0, "eval_exact_match": 29.09375, "eval_f1": 41.58572921874224, "eval_runtime": 31.6379, "eval_samples_per_second": 112.903, "eval_steps_per_second": 4.046, "step": 4816 }, { "epoch": 9.0, "step": 5418, "train_exact_match": 59.14085914085914, "train_f1": 70.72972336346831, "train_runtime": 10.1, "train_samples_per_second": 111.485, "train_steps_per_second": 4.059 }, { "epoch": 9.0, "grad_norm": 18.038774490356445, "learning_rate": 1.25e-06, "loss": 1.6377, "step": 5418 }, { "epoch": 9.0, "eval_exact_match": 29.21875, "eval_f1": 41.68346527288109, "eval_runtime": 31.8369, "eval_samples_per_second": 112.197, "eval_steps_per_second": 4.02, "step": 5418 }, { "epoch": 10.0, "step": 6020, "train_exact_match": 59.94005994005994, "train_f1": 72.84928684458019, "train_runtime": 9.7454, "train_samples_per_second": 112.976, "train_steps_per_second": 4.104 }, { "epoch": 10.0, "grad_norm": 21.71363639831543, "learning_rate": 0.0, "loss": 1.583, "step": 6020 }, { "epoch": 10.0, "eval_exact_match": 29.28125, "eval_f1": 41.842087703189854, "eval_runtime": 31.896, "eval_samples_per_second": 111.989, "eval_steps_per_second": 4.013, "step": 6020 }, { "epoch": 10.0, "step": 6020, "total_flos": 1.651520510954496e+16, "train_loss": 2.5174192206804142, "train_runtime": 2180.484, "train_samples_per_second": 77.295, "train_steps_per_second": 2.761 } ], "logging_steps": 500, "max_steps": 6020, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.651520510954496e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }