{ "best_metric": 0.8255903124809265, "best_model_checkpoint": "sbert-ru-huawei-sentiment-fine-additional/checkpoint-1217", "epoch": 1.0, "eval_steps": 500, "global_step": 1217, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020542317173377157, "grad_norm": 5.6351447105407715, "learning_rate": 3.415300546448088e-06, "loss": 0.8092, "step": 25 }, { "epoch": 0.041084634346754315, "grad_norm": 4.872824668884277, "learning_rate": 6.830601092896176e-06, "loss": 0.7381, "step": 50 }, { "epoch": 0.06162695152013147, "grad_norm": 8.779582023620605, "learning_rate": 1.0109289617486339e-05, "loss": 0.756, "step": 75 }, { "epoch": 0.08216926869350863, "grad_norm": 7.241989612579346, "learning_rate": 1.3524590163934428e-05, "loss": 0.7791, "step": 100 }, { "epoch": 0.10271158586688578, "grad_norm": 4.666262149810791, "learning_rate": 1.6939890710382514e-05, "loss": 0.7416, "step": 125 }, { "epoch": 0.12325390304026294, "grad_norm": 3.8794476985931396, "learning_rate": 2.03551912568306e-05, "loss": 0.746, "step": 150 }, { "epoch": 0.1437962202136401, "grad_norm": 8.589350700378418, "learning_rate": 2.377049180327869e-05, "loss": 0.6762, "step": 175 }, { "epoch": 0.16433853738701726, "grad_norm": 6.607258319854736, "learning_rate": 2.718579234972678e-05, "loss": 0.8163, "step": 200 }, { "epoch": 0.1848808545603944, "grad_norm": 4.876005172729492, "learning_rate": 3.0601092896174864e-05, "loss": 0.7367, "step": 225 }, { "epoch": 0.20542317173377156, "grad_norm": 5.488433361053467, "learning_rate": 3.387978142076503e-05, "loss": 0.7825, "step": 250 }, { "epoch": 0.22596548890714874, "grad_norm": 5.337741851806641, "learning_rate": 3.729508196721312e-05, "loss": 0.7578, "step": 275 }, { "epoch": 0.2465078060805259, "grad_norm": 4.779869079589844, "learning_rate": 4.07103825136612e-05, "loss": 0.7796, "step": 300 }, { "epoch": 0.26705012325390304, "grad_norm": 10.677047729492188, "learning_rate": 4.412568306010929e-05, "loss": 0.78, "step": 325 }, { "epoch": 0.2875924404272802, "grad_norm": 8.01126766204834, "learning_rate": 4.754098360655738e-05, "loss": 0.8134, "step": 350 }, { "epoch": 0.30813475760065734, "grad_norm": 4.4165263175964355, "learning_rate": 4.9893455098934556e-05, "loss": 0.7755, "step": 375 }, { "epoch": 0.3286770747740345, "grad_norm": 6.150760650634766, "learning_rate": 4.951293759512938e-05, "loss": 0.7532, "step": 400 }, { "epoch": 0.3492193919474117, "grad_norm": 5.5157341957092285, "learning_rate": 4.9132420091324204e-05, "loss": 0.8006, "step": 425 }, { "epoch": 0.3697617091207888, "grad_norm": 5.472987174987793, "learning_rate": 4.875190258751903e-05, "loss": 0.7316, "step": 450 }, { "epoch": 0.390304026294166, "grad_norm": 5.641602039337158, "learning_rate": 4.837138508371385e-05, "loss": 0.7612, "step": 475 }, { "epoch": 0.4108463434675431, "grad_norm": 8.425959587097168, "learning_rate": 4.799086757990868e-05, "loss": 0.7982, "step": 500 }, { "epoch": 0.4313886606409203, "grad_norm": 4.366255283355713, "learning_rate": 4.76103500761035e-05, "loss": 0.7686, "step": 525 }, { "epoch": 0.4519309778142975, "grad_norm": 5.278375148773193, "learning_rate": 4.7229832572298326e-05, "loss": 0.8059, "step": 550 }, { "epoch": 0.4724732949876746, "grad_norm": 4.964376926422119, "learning_rate": 4.684931506849316e-05, "loss": 0.7684, "step": 575 }, { "epoch": 0.4930156121610518, "grad_norm": 6.820468902587891, "learning_rate": 4.6468797564687975e-05, "loss": 0.7901, "step": 600 }, { "epoch": 0.5135579293344289, "grad_norm": 7.251264572143555, "learning_rate": 4.6088280060882806e-05, "loss": 0.7791, "step": 625 }, { "epoch": 0.5341002465078061, "grad_norm": 7.436245441436768, "learning_rate": 4.5707762557077624e-05, "loss": 0.7577, "step": 650 }, { "epoch": 0.5546425636811833, "grad_norm": 13.443830490112305, "learning_rate": 4.5327245053272455e-05, "loss": 0.7666, "step": 675 }, { "epoch": 0.5751848808545604, "grad_norm": 5.962008476257324, "learning_rate": 4.494672754946728e-05, "loss": 0.7564, "step": 700 }, { "epoch": 0.5957271980279375, "grad_norm": 3.0770370960235596, "learning_rate": 4.4566210045662104e-05, "loss": 0.7902, "step": 725 }, { "epoch": 0.6162695152013147, "grad_norm": 8.193988800048828, "learning_rate": 4.418569254185693e-05, "loss": 0.7324, "step": 750 }, { "epoch": 0.6368118323746919, "grad_norm": 8.510249137878418, "learning_rate": 4.380517503805175e-05, "loss": 0.7407, "step": 775 }, { "epoch": 0.657354149548069, "grad_norm": 6.3070220947265625, "learning_rate": 4.342465753424658e-05, "loss": 0.7819, "step": 800 }, { "epoch": 0.6778964667214462, "grad_norm": 6.479777812957764, "learning_rate": 4.30441400304414e-05, "loss": 0.7746, "step": 825 }, { "epoch": 0.6984387838948234, "grad_norm": 6.115039348602295, "learning_rate": 4.2663622526636226e-05, "loss": 0.7146, "step": 850 }, { "epoch": 0.7189811010682005, "grad_norm": 9.666036605834961, "learning_rate": 4.228310502283106e-05, "loss": 0.8091, "step": 875 }, { "epoch": 0.7395234182415776, "grad_norm": 5.502596378326416, "learning_rate": 4.1902587519025875e-05, "loss": 0.7606, "step": 900 }, { "epoch": 0.7600657354149548, "grad_norm": 3.2978105545043945, "learning_rate": 4.1522070015220706e-05, "loss": 0.787, "step": 925 }, { "epoch": 0.780608052588332, "grad_norm": 4.256566524505615, "learning_rate": 4.114155251141552e-05, "loss": 0.7761, "step": 950 }, { "epoch": 0.8011503697617092, "grad_norm": 8.732786178588867, "learning_rate": 4.0761035007610354e-05, "loss": 0.7634, "step": 975 }, { "epoch": 0.8216926869350862, "grad_norm": 7.156558036804199, "learning_rate": 4.038051750380518e-05, "loss": 0.6841, "step": 1000 }, { "epoch": 0.8422350041084634, "grad_norm": 5.490506172180176, "learning_rate": 4e-05, "loss": 0.7983, "step": 1025 }, { "epoch": 0.8627773212818406, "grad_norm": 5.703302383422852, "learning_rate": 3.961948249619483e-05, "loss": 0.7687, "step": 1050 }, { "epoch": 0.8833196384552178, "grad_norm": 4.977391242980957, "learning_rate": 3.923896499238965e-05, "loss": 0.7807, "step": 1075 }, { "epoch": 0.903861955628595, "grad_norm": 5.148677349090576, "learning_rate": 3.8858447488584476e-05, "loss": 0.7592, "step": 1100 }, { "epoch": 0.924404272801972, "grad_norm": 3.742412805557251, "learning_rate": 3.84779299847793e-05, "loss": 0.7896, "step": 1125 }, { "epoch": 0.9449465899753492, "grad_norm": 7.102028846740723, "learning_rate": 3.8097412480974125e-05, "loss": 0.7355, "step": 1150 }, { "epoch": 0.9654889071487264, "grad_norm": 8.146830558776855, "learning_rate": 3.7716894977168956e-05, "loss": 0.7436, "step": 1175 }, { "epoch": 0.9860312243221035, "grad_norm": 6.869138240814209, "learning_rate": 3.7336377473363774e-05, "loss": 0.8159, "step": 1200 }, { "epoch": 1.0, "eval_accuracy": 0.6762560361656221, "eval_f1_macro": 0.4664785690298885, "eval_f1_micro": 0.6762560361656221, "eval_f1_weighted": 0.6508483272042555, "eval_loss": 0.8255903124809265, "eval_precision_macro": 0.5211296463695433, "eval_precision_micro": 0.6762560361656221, "eval_precision_weighted": 0.6496572452074998, "eval_recall_macro": 0.4850420718640266, "eval_recall_micro": 0.6762560361656221, "eval_recall_weighted": 0.6762560361656221, "eval_runtime": 61.9536, "eval_samples_per_second": 157.101, "eval_steps_per_second": 2.47, "step": 1217 } ], "logging_steps": 25, "max_steps": 3651, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8146751147933696e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }