|
{ |
|
"best_metric": 0.8255903124809265, |
|
"best_model_checkpoint": "sbert-ru-huawei-sentiment-fine-additional/checkpoint-1217", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1217, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.020542317173377157, |
|
"grad_norm": 5.6351447105407715, |
|
"learning_rate": 3.415300546448088e-06, |
|
"loss": 0.8092, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.041084634346754315, |
|
"grad_norm": 4.872824668884277, |
|
"learning_rate": 6.830601092896176e-06, |
|
"loss": 0.7381, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06162695152013147, |
|
"grad_norm": 8.779582023620605, |
|
"learning_rate": 1.0109289617486339e-05, |
|
"loss": 0.756, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08216926869350863, |
|
"grad_norm": 7.241989612579346, |
|
"learning_rate": 1.3524590163934428e-05, |
|
"loss": 0.7791, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10271158586688578, |
|
"grad_norm": 4.666262149810791, |
|
"learning_rate": 1.6939890710382514e-05, |
|
"loss": 0.7416, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12325390304026294, |
|
"grad_norm": 3.8794476985931396, |
|
"learning_rate": 2.03551912568306e-05, |
|
"loss": 0.746, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1437962202136401, |
|
"grad_norm": 8.589350700378418, |
|
"learning_rate": 2.377049180327869e-05, |
|
"loss": 0.6762, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16433853738701726, |
|
"grad_norm": 6.607258319854736, |
|
"learning_rate": 2.718579234972678e-05, |
|
"loss": 0.8163, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1848808545603944, |
|
"grad_norm": 4.876005172729492, |
|
"learning_rate": 3.0601092896174864e-05, |
|
"loss": 0.7367, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.20542317173377156, |
|
"grad_norm": 5.488433361053467, |
|
"learning_rate": 3.387978142076503e-05, |
|
"loss": 0.7825, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22596548890714874, |
|
"grad_norm": 5.337741851806641, |
|
"learning_rate": 3.729508196721312e-05, |
|
"loss": 0.7578, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2465078060805259, |
|
"grad_norm": 4.779869079589844, |
|
"learning_rate": 4.07103825136612e-05, |
|
"loss": 0.7796, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.26705012325390304, |
|
"grad_norm": 10.677047729492188, |
|
"learning_rate": 4.412568306010929e-05, |
|
"loss": 0.78, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2875924404272802, |
|
"grad_norm": 8.01126766204834, |
|
"learning_rate": 4.754098360655738e-05, |
|
"loss": 0.8134, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.30813475760065734, |
|
"grad_norm": 4.4165263175964355, |
|
"learning_rate": 4.9893455098934556e-05, |
|
"loss": 0.7755, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3286770747740345, |
|
"grad_norm": 6.150760650634766, |
|
"learning_rate": 4.951293759512938e-05, |
|
"loss": 0.7532, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3492193919474117, |
|
"grad_norm": 5.5157341957092285, |
|
"learning_rate": 4.9132420091324204e-05, |
|
"loss": 0.8006, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3697617091207888, |
|
"grad_norm": 5.472987174987793, |
|
"learning_rate": 4.875190258751903e-05, |
|
"loss": 0.7316, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.390304026294166, |
|
"grad_norm": 5.641602039337158, |
|
"learning_rate": 4.837138508371385e-05, |
|
"loss": 0.7612, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4108463434675431, |
|
"grad_norm": 8.425959587097168, |
|
"learning_rate": 4.799086757990868e-05, |
|
"loss": 0.7982, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4313886606409203, |
|
"grad_norm": 4.366255283355713, |
|
"learning_rate": 4.76103500761035e-05, |
|
"loss": 0.7686, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4519309778142975, |
|
"grad_norm": 5.278375148773193, |
|
"learning_rate": 4.7229832572298326e-05, |
|
"loss": 0.8059, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4724732949876746, |
|
"grad_norm": 4.964376926422119, |
|
"learning_rate": 4.684931506849316e-05, |
|
"loss": 0.7684, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4930156121610518, |
|
"grad_norm": 6.820468902587891, |
|
"learning_rate": 4.6468797564687975e-05, |
|
"loss": 0.7901, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5135579293344289, |
|
"grad_norm": 7.251264572143555, |
|
"learning_rate": 4.6088280060882806e-05, |
|
"loss": 0.7791, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5341002465078061, |
|
"grad_norm": 7.436245441436768, |
|
"learning_rate": 4.5707762557077624e-05, |
|
"loss": 0.7577, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5546425636811833, |
|
"grad_norm": 13.443830490112305, |
|
"learning_rate": 4.5327245053272455e-05, |
|
"loss": 0.7666, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5751848808545604, |
|
"grad_norm": 5.962008476257324, |
|
"learning_rate": 4.494672754946728e-05, |
|
"loss": 0.7564, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5957271980279375, |
|
"grad_norm": 3.0770370960235596, |
|
"learning_rate": 4.4566210045662104e-05, |
|
"loss": 0.7902, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6162695152013147, |
|
"grad_norm": 8.193988800048828, |
|
"learning_rate": 4.418569254185693e-05, |
|
"loss": 0.7324, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6368118323746919, |
|
"grad_norm": 8.510249137878418, |
|
"learning_rate": 4.380517503805175e-05, |
|
"loss": 0.7407, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.657354149548069, |
|
"grad_norm": 6.3070220947265625, |
|
"learning_rate": 4.342465753424658e-05, |
|
"loss": 0.7819, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6778964667214462, |
|
"grad_norm": 6.479777812957764, |
|
"learning_rate": 4.30441400304414e-05, |
|
"loss": 0.7746, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6984387838948234, |
|
"grad_norm": 6.115039348602295, |
|
"learning_rate": 4.2663622526636226e-05, |
|
"loss": 0.7146, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7189811010682005, |
|
"grad_norm": 9.666036605834961, |
|
"learning_rate": 4.228310502283106e-05, |
|
"loss": 0.8091, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7395234182415776, |
|
"grad_norm": 5.502596378326416, |
|
"learning_rate": 4.1902587519025875e-05, |
|
"loss": 0.7606, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7600657354149548, |
|
"grad_norm": 3.2978105545043945, |
|
"learning_rate": 4.1522070015220706e-05, |
|
"loss": 0.787, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.780608052588332, |
|
"grad_norm": 4.256566524505615, |
|
"learning_rate": 4.114155251141552e-05, |
|
"loss": 0.7761, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8011503697617092, |
|
"grad_norm": 8.732786178588867, |
|
"learning_rate": 4.0761035007610354e-05, |
|
"loss": 0.7634, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.8216926869350862, |
|
"grad_norm": 7.156558036804199, |
|
"learning_rate": 4.038051750380518e-05, |
|
"loss": 0.6841, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8422350041084634, |
|
"grad_norm": 5.490506172180176, |
|
"learning_rate": 4e-05, |
|
"loss": 0.7983, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8627773212818406, |
|
"grad_norm": 5.703302383422852, |
|
"learning_rate": 3.961948249619483e-05, |
|
"loss": 0.7687, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8833196384552178, |
|
"grad_norm": 4.977391242980957, |
|
"learning_rate": 3.923896499238965e-05, |
|
"loss": 0.7807, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.903861955628595, |
|
"grad_norm": 5.148677349090576, |
|
"learning_rate": 3.8858447488584476e-05, |
|
"loss": 0.7592, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.924404272801972, |
|
"grad_norm": 3.742412805557251, |
|
"learning_rate": 3.84779299847793e-05, |
|
"loss": 0.7896, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9449465899753492, |
|
"grad_norm": 7.102028846740723, |
|
"learning_rate": 3.8097412480974125e-05, |
|
"loss": 0.7355, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9654889071487264, |
|
"grad_norm": 8.146830558776855, |
|
"learning_rate": 3.7716894977168956e-05, |
|
"loss": 0.7436, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9860312243221035, |
|
"grad_norm": 6.869138240814209, |
|
"learning_rate": 3.7336377473363774e-05, |
|
"loss": 0.8159, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6762560361656221, |
|
"eval_f1_macro": 0.4664785690298885, |
|
"eval_f1_micro": 0.6762560361656221, |
|
"eval_f1_weighted": 0.6508483272042555, |
|
"eval_loss": 0.8255903124809265, |
|
"eval_precision_macro": 0.5211296463695433, |
|
"eval_precision_micro": 0.6762560361656221, |
|
"eval_precision_weighted": 0.6496572452074998, |
|
"eval_recall_macro": 0.4850420718640266, |
|
"eval_recall_micro": 0.6762560361656221, |
|
"eval_recall_weighted": 0.6762560361656221, |
|
"eval_runtime": 61.9536, |
|
"eval_samples_per_second": 157.101, |
|
"eval_steps_per_second": 2.47, |
|
"step": 1217 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 3651, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8146751147933696e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|