{ "best_metric": 0.513288052386553, "best_model_checkpoint": "bkai-fine-tuned-legal/checkpoint-240", "epoch": 4.0, "eval_steps": 500, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3051630.25, "learning_rate": 2.9970400926424075e-05, "loss": 0.9663, "step": 60 }, { "epoch": 1.0, "eval_dim_128_cosine_accuracy@1": 0.2080602426439827, "eval_dim_128_cosine_accuracy@10": 0.6729884256031237, "eval_dim_128_cosine_accuracy@3": 0.4049644401059824, "eval_dim_128_cosine_accuracy@5": 0.49504950495049505, "eval_dim_128_cosine_map@100": 0.34660918730461937, "eval_dim_128_cosine_mrr@10": 0.3358910891089097, "eval_dim_128_cosine_ndcg@10": 0.41500734332231626, "eval_dim_128_cosine_precision@1": 0.2080602426439827, "eval_dim_128_cosine_precision@10": 0.06729884256031238, "eval_dim_128_cosine_precision@3": 0.13498814670199413, "eval_dim_128_cosine_precision@5": 0.09900990099009901, "eval_dim_128_cosine_recall@1": 0.2080602426439827, "eval_dim_128_cosine_recall@10": 0.6729884256031237, "eval_dim_128_cosine_recall@3": 0.4049644401059824, "eval_dim_128_cosine_recall@5": 0.49504950495049505, "eval_dim_256_cosine_accuracy@1": 0.21321991354064984, "eval_dim_256_cosine_accuracy@10": 0.6862362292567284, "eval_dim_256_cosine_accuracy@3": 0.41598103472319065, "eval_dim_256_cosine_accuracy@5": 0.5063450006972529, "eval_dim_256_cosine_map@100": 0.3546268888431031, "eval_dim_256_cosine_mrr@10": 0.34420854057236694, "eval_dim_256_cosine_ndcg@10": 0.424598027833536, "eval_dim_256_cosine_precision@1": 0.21321991354064984, "eval_dim_256_cosine_precision@10": 0.06862362292567285, "eval_dim_256_cosine_precision@3": 0.1386603449077302, "eval_dim_256_cosine_precision@5": 0.10126900013945056, "eval_dim_256_cosine_recall@1": 0.21321991354064984, "eval_dim_256_cosine_recall@10": 0.6862362292567284, "eval_dim_256_cosine_recall@3": 0.41598103472319065, "eval_dim_256_cosine_recall@5": 0.5063450006972529, "eval_dim_512_cosine_accuracy@1": 0.21433551805884815, "eval_dim_512_cosine_accuracy@10": 0.6919537024124948, "eval_dim_512_cosine_accuracy@3": 0.4211407056198578, "eval_dim_512_cosine_accuracy@5": 0.508855110863199, "eval_dim_512_cosine_map@100": 0.3564257538175301, "eval_dim_512_cosine_mrr@10": 0.34621192501543907, "eval_dim_512_cosine_ndcg@10": 0.42739973393443165, "eval_dim_512_cosine_precision@1": 0.21433551805884815, "eval_dim_512_cosine_precision@10": 0.06919537024124948, "eval_dim_512_cosine_precision@3": 0.14038023520661924, "eval_dim_512_cosine_precision@5": 0.1017710221726398, "eval_dim_512_cosine_recall@1": 0.21433551805884815, "eval_dim_512_cosine_recall@10": 0.6919537024124948, "eval_dim_512_cosine_recall@3": 0.4211407056198578, "eval_dim_512_cosine_recall@5": 0.508855110863199, "eval_dim_64_cosine_accuracy@1": 0.19704364802677451, "eval_dim_64_cosine_accuracy@10": 0.6503974341096082, "eval_dim_64_cosine_accuracy@3": 0.3846046576488635, "eval_dim_64_cosine_accuracy@5": 0.48096499790824154, "eval_dim_64_cosine_map@100": 0.3332997375022508, "eval_dim_64_cosine_mrr@10": 0.3223062799237654, "eval_dim_64_cosine_ndcg@10": 0.399385228043632, "eval_dim_64_cosine_precision@1": 0.19704364802677451, "eval_dim_64_cosine_precision@10": 0.06503974341096082, "eval_dim_64_cosine_precision@3": 0.12820155254962115, "eval_dim_64_cosine_precision@5": 0.0961929995816483, "eval_dim_64_cosine_recall@1": 0.19704364802677451, "eval_dim_64_cosine_recall@10": 0.6503974341096082, "eval_dim_64_cosine_recall@3": 0.3846046576488635, "eval_dim_64_cosine_recall@5": 0.48096499790824154, "eval_dim_768_cosine_accuracy@1": 0.22116859573281272, "eval_dim_768_cosine_accuracy@10": 0.6947427137079906, "eval_dim_768_cosine_accuracy@3": 0.4196067494073351, "eval_dim_768_cosine_accuracy@5": 0.5128991772416678, "eval_dim_768_cosine_map@100": 0.3609325312259912, "eval_dim_768_cosine_mrr@10": 0.3508835189353944, "eval_dim_768_cosine_ndcg@10": 0.4315716703139609, "eval_dim_768_cosine_precision@1": 0.22116859573281272, "eval_dim_768_cosine_precision@10": 0.06947427137079905, "eval_dim_768_cosine_precision@3": 0.13986891646911168, "eval_dim_768_cosine_precision@5": 0.10257983544833356, "eval_dim_768_cosine_recall@1": 0.22116859573281272, "eval_dim_768_cosine_recall@10": 0.6947427137079906, "eval_dim_768_cosine_recall@3": 0.4196067494073351, "eval_dim_768_cosine_recall@5": 0.5128991772416678, "eval_loss": 0.30127617716789246, "eval_runtime": 4760.4689, "eval_samples_per_second": 1.507, "eval_sequential_score": 0.399385228043632, "eval_steps_per_second": 0.019, "step": 60 }, { "epoch": 2.0, "grad_norm": 1582530.375, "learning_rate": 2.655769864163684e-05, "loss": 0.2399, "step": 120 }, { "epoch": 2.0, "eval_dim_128_cosine_accuracy@1": 0.3225491563240831, "eval_dim_128_cosine_accuracy@10": 0.7071538139729466, "eval_dim_128_cosine_accuracy@3": 0.46102356714544696, "eval_dim_128_cosine_accuracy@5": 0.53772137777158, "eval_dim_128_cosine_map@100": 0.43211454127792864, "eval_dim_128_cosine_mrr@10": 0.42227551225947657, "eval_dim_128_cosine_ndcg@10": 0.488553211966058, "eval_dim_128_cosine_precision@1": 0.3225491563240831, "eval_dim_128_cosine_precision@10": 0.07071538139729466, "eval_dim_128_cosine_precision@3": 0.15367452238181564, "eval_dim_128_cosine_precision@5": 0.107544275554316, "eval_dim_128_cosine_recall@1": 0.3225491563240831, "eval_dim_128_cosine_recall@10": 0.7071538139729466, "eval_dim_128_cosine_recall@3": 0.46102356714544696, "eval_dim_128_cosine_recall@5": 0.53772137777158, "eval_dim_256_cosine_accuracy@1": 0.32854553060939895, "eval_dim_256_cosine_accuracy@10": 0.7131501882582625, "eval_dim_256_cosine_accuracy@3": 0.464928182959141, "eval_dim_256_cosine_accuracy@5": 0.5421837958443732, "eval_dim_256_cosine_map@100": 0.4374891918057777, "eval_dim_256_cosine_mrr@10": 0.42779830799981505, "eval_dim_256_cosine_ndcg@10": 0.49410729071963905, "eval_dim_256_cosine_precision@1": 0.32854553060939895, "eval_dim_256_cosine_precision@10": 0.07131501882582625, "eval_dim_256_cosine_precision@3": 0.1549760609863803, "eval_dim_256_cosine_precision@5": 0.10843675916887462, "eval_dim_256_cosine_recall@1": 0.32854553060939895, "eval_dim_256_cosine_recall@10": 0.7131501882582625, "eval_dim_256_cosine_recall@3": 0.464928182959141, "eval_dim_256_cosine_recall@5": 0.5421837958443732, "eval_dim_512_cosine_accuracy@1": 0.33286849811741737, "eval_dim_512_cosine_accuracy@10": 0.7142657927764607, "eval_dim_512_cosine_accuracy@3": 0.46966950216148373, "eval_dim_512_cosine_accuracy@5": 0.5445544554455446, "eval_dim_512_cosine_map@100": 0.4410220731771345, "eval_dim_512_cosine_mrr@10": 0.4314777000838923, "eval_dim_512_cosine_ndcg@10": 0.4972638536904392, "eval_dim_512_cosine_precision@1": 0.33286849811741737, "eval_dim_512_cosine_precision@10": 0.07142657927764608, "eval_dim_512_cosine_precision@3": 0.15655650072049457, "eval_dim_512_cosine_precision@5": 0.10891089108910891, "eval_dim_512_cosine_recall@1": 0.33286849811741737, "eval_dim_512_cosine_recall@10": 0.7142657927764607, "eval_dim_512_cosine_recall@3": 0.46966950216148373, "eval_dim_512_cosine_recall@5": 0.5445544554455446, "eval_dim_64_cosine_accuracy@1": 0.306233440245433, "eval_dim_64_cosine_accuracy@10": 0.6906986473295217, "eval_dim_64_cosine_accuracy@3": 0.44205829033607585, "eval_dim_64_cosine_accuracy@5": 0.5208478594338307, "eval_dim_64_cosine_map@100": 0.416048690892794, "eval_dim_64_cosine_mrr@10": 0.4057003738603239, "eval_dim_64_cosine_ndcg@10": 0.47195343698452114, "eval_dim_64_cosine_precision@1": 0.306233440245433, "eval_dim_64_cosine_precision@10": 0.06906986473295217, "eval_dim_64_cosine_precision@3": 0.1473527634453586, "eval_dim_64_cosine_precision@5": 0.10416957188676614, "eval_dim_64_cosine_recall@1": 0.306233440245433, "eval_dim_64_cosine_recall@10": 0.6906986473295217, "eval_dim_64_cosine_recall@3": 0.44205829033607585, "eval_dim_64_cosine_recall@5": 0.5208478594338307, "eval_dim_768_cosine_accuracy@1": 0.33412355320039044, "eval_dim_768_cosine_accuracy@10": 0.7152419467298843, "eval_dim_768_cosine_accuracy@3": 0.47106400780923163, "eval_dim_768_cosine_accuracy@5": 0.5448333565750941, "eval_dim_768_cosine_map@100": 0.44207831994901653, "eval_dim_768_cosine_mrr@10": 0.4325288363846451, "eval_dim_768_cosine_ndcg@10": 0.49828039957821635, "eval_dim_768_cosine_precision@1": 0.33412355320039044, "eval_dim_768_cosine_precision@10": 0.07152419467298843, "eval_dim_768_cosine_precision@3": 0.15702133593641054, "eval_dim_768_cosine_precision@5": 0.10896667131501885, "eval_dim_768_cosine_recall@1": 0.33412355320039044, "eval_dim_768_cosine_recall@10": 0.7152419467298843, "eval_dim_768_cosine_recall@3": 0.47106400780923163, "eval_dim_768_cosine_recall@5": 0.5448333565750941, "eval_loss": 0.18898583948612213, "eval_runtime": 4757.4, "eval_samples_per_second": 1.508, "eval_sequential_score": 0.47195343698452114, "eval_steps_per_second": 0.019, "step": 120 }, { "epoch": 3.0, "grad_norm": 792357.3125, "learning_rate": 1.8730348307472828e-05, "loss": 0.1032, "step": 180 }, { "epoch": 3.0, "eval_dim_128_cosine_accuracy@1": 0.3468135545948961, "eval_dim_128_cosine_accuracy@10": 0.7173337052015061, "eval_dim_128_cosine_accuracy@3": 0.46520708408869055, "eval_dim_128_cosine_accuracy@5": 0.5407892901966252, "eval_dim_128_cosine_map@100": 0.447677926706174, "eval_dim_128_cosine_mrr@10": 0.43798207285517343, "eval_dim_128_cosine_ndcg@10": 0.5026334575249692, "eval_dim_128_cosine_precision@1": 0.3468135545948961, "eval_dim_128_cosine_precision@10": 0.07173337052015061, "eval_dim_128_cosine_precision@3": 0.15506902802956352, "eval_dim_128_cosine_precision@5": 0.10815785803932505, "eval_dim_128_cosine_recall@1": 0.3468135545948961, "eval_dim_128_cosine_recall@10": 0.7173337052015061, "eval_dim_128_cosine_recall@3": 0.46520708408869055, "eval_dim_128_cosine_recall@5": 0.5407892901966252, "eval_dim_256_cosine_accuracy@1": 0.35043926927904057, "eval_dim_256_cosine_accuracy@10": 0.7156602984242086, "eval_dim_256_cosine_accuracy@3": 0.4700878538558081, "eval_dim_256_cosine_accuracy@5": 0.544972807139869, "eval_dim_256_cosine_map@100": 0.4507378853512356, "eval_dim_256_cosine_mrr@10": 0.44098468699988747, "eval_dim_256_cosine_ndcg@10": 0.5046872072161374, "eval_dim_256_cosine_precision@1": 0.35043926927904057, "eval_dim_256_cosine_precision@10": 0.07156602984242087, "eval_dim_256_cosine_precision@3": 0.15669595128526936, "eval_dim_256_cosine_precision@5": 0.1089945614279738, "eval_dim_256_cosine_recall@1": 0.35043926927904057, "eval_dim_256_cosine_recall@10": 0.7156602984242086, "eval_dim_256_cosine_recall@3": 0.4700878538558081, "eval_dim_256_cosine_recall@5": 0.544972807139869, "eval_dim_512_cosine_accuracy@1": 0.35169432436201364, "eval_dim_512_cosine_accuracy@10": 0.7231906289220471, "eval_dim_512_cosine_accuracy@3": 0.4700878538558081, "eval_dim_512_cosine_accuracy@5": 0.5448333565750941, "eval_dim_512_cosine_map@100": 0.45212818188900633, "eval_dim_512_cosine_mrr@10": 0.44279272997722396, "eval_dim_512_cosine_ndcg@10": 0.5076751608428753, "eval_dim_512_cosine_precision@1": 0.35169432436201364, "eval_dim_512_cosine_precision@10": 0.07231906289220472, "eval_dim_512_cosine_precision@3": 0.15669595128526936, "eval_dim_512_cosine_precision@5": 0.10896667131501885, "eval_dim_512_cosine_recall@1": 0.35169432436201364, "eval_dim_512_cosine_recall@10": 0.7231906289220471, "eval_dim_512_cosine_recall@3": 0.4700878538558081, "eval_dim_512_cosine_recall@5": 0.5448333565750941, "eval_dim_64_cosine_accuracy@1": 0.3356575094129131, "eval_dim_64_cosine_accuracy@10": 0.6976711755682611, "eval_dim_64_cosine_accuracy@3": 0.4523776321294101, "eval_dim_64_cosine_accuracy@5": 0.5307488495328406, "eval_dim_64_cosine_map@100": 0.43559811808588345, "eval_dim_64_cosine_mrr@10": 0.42538852476797007, "eval_dim_64_cosine_ndcg@10": 0.48852868771634256, "eval_dim_64_cosine_precision@1": 0.3356575094129131, "eval_dim_64_cosine_precision@10": 0.0697671175568261, "eval_dim_64_cosine_precision@3": 0.1507925440431367, "eval_dim_64_cosine_precision@5": 0.10614976990656813, "eval_dim_64_cosine_recall@1": 0.3356575094129131, "eval_dim_64_cosine_recall@10": 0.6976711755682611, "eval_dim_64_cosine_recall@3": 0.4523776321294101, "eval_dim_64_cosine_recall@5": 0.5307488495328406, "eval_dim_768_cosine_accuracy@1": 0.3568539952586808, "eval_dim_768_cosine_accuracy@10": 0.7237484311811463, "eval_dim_768_cosine_accuracy@3": 0.473434667410403, "eval_dim_768_cosine_accuracy@5": 0.5530609398968066, "eval_dim_768_cosine_map@100": 0.456489701012935, "eval_dim_768_cosine_mrr@10": 0.44698465822437466, "eval_dim_768_cosine_ndcg@10": 0.5110884733352402, "eval_dim_768_cosine_precision@1": 0.3568539952586808, "eval_dim_768_cosine_precision@10": 0.07237484311811462, "eval_dim_768_cosine_precision@3": 0.15781155580346765, "eval_dim_768_cosine_precision@5": 0.11061218797936133, "eval_dim_768_cosine_recall@1": 0.3568539952586808, "eval_dim_768_cosine_recall@10": 0.7237484311811463, "eval_dim_768_cosine_recall@3": 0.473434667410403, "eval_dim_768_cosine_recall@5": 0.5530609398968066, "eval_loss": 0.15947215259075165, "eval_runtime": 4772.3881, "eval_samples_per_second": 1.503, "eval_sequential_score": 0.48852868771634256, "eval_steps_per_second": 0.019, "step": 180 }, { "epoch": 4.0, "grad_norm": 528732.6875, "learning_rate": 9.478131709729831e-06, "loss": 0.0562, "step": 240 }, { "epoch": 4.0, "eval_dim_128_cosine_accuracy@1": 0.35016036814949103, "eval_dim_128_cosine_accuracy@10": 0.71803095802538, "eval_dim_128_cosine_accuracy@3": 0.466880490865988, "eval_dim_128_cosine_accuracy@5": 0.5453911588341933, "eval_dim_128_cosine_map@100": 0.45032612042753656, "eval_dim_128_cosine_mrr@10": 0.44050070500007754, "eval_dim_128_cosine_ndcg@10": 0.5047593926261945, "eval_dim_128_cosine_precision@1": 0.35016036814949103, "eval_dim_128_cosine_precision@10": 0.07180309580253802, "eval_dim_128_cosine_precision@3": 0.15562683028866267, "eval_dim_128_cosine_precision@5": 0.10907823176683866, "eval_dim_128_cosine_recall@1": 0.35016036814949103, "eval_dim_128_cosine_recall@10": 0.71803095802538, "eval_dim_128_cosine_recall@3": 0.466880490865988, "eval_dim_128_cosine_recall@5": 0.5453911588341933, "eval_dim_256_cosine_accuracy@1": 0.34932366476084226, "eval_dim_256_cosine_accuracy@10": 0.7227722772277227, "eval_dim_256_cosine_accuracy@3": 0.46660158973643845, "eval_dim_256_cosine_accuracy@5": 0.5445544554455446, "eval_dim_256_cosine_map@100": 0.4503116262429447, "eval_dim_256_cosine_mrr@10": 0.44069499505282583, "eval_dim_256_cosine_ndcg@10": 0.5059436181895386, "eval_dim_256_cosine_precision@1": 0.34932366476084226, "eval_dim_256_cosine_precision@10": 0.07227722772277227, "eval_dim_256_cosine_precision@3": 0.15553386324547946, "eval_dim_256_cosine_precision@5": 0.10891089108910891, "eval_dim_256_cosine_recall@1": 0.34932366476084226, "eval_dim_256_cosine_recall@10": 0.7227722772277227, "eval_dim_256_cosine_recall@3": 0.46660158973643845, "eval_dim_256_cosine_recall@5": 0.5445544554455446, "eval_dim_512_cosine_accuracy@1": 0.3550411379166086, "eval_dim_512_cosine_accuracy@10": 0.7234695300515968, "eval_dim_512_cosine_accuracy@3": 0.4693906010319342, "eval_dim_512_cosine_accuracy@5": 0.5484590712592387, "eval_dim_512_cosine_map@100": 0.45428241375668854, "eval_dim_512_cosine_mrr@10": 0.4447460339595335, "eval_dim_512_cosine_ndcg@10": 0.5091968606671677, "eval_dim_512_cosine_precision@1": 0.3550411379166086, "eval_dim_512_cosine_precision@10": 0.07234695300515968, "eval_dim_512_cosine_precision@3": 0.1564635336773114, "eval_dim_512_cosine_precision@5": 0.10969181425184772, "eval_dim_512_cosine_recall@1": 0.3550411379166086, "eval_dim_512_cosine_recall@10": 0.7234695300515968, "eval_dim_512_cosine_recall@3": 0.4693906010319342, "eval_dim_512_cosine_recall@5": 0.5484590712592387, "eval_dim_64_cosine_accuracy@1": 0.33760981731976014, "eval_dim_64_cosine_accuracy@10": 0.7082694184911449, "eval_dim_64_cosine_accuracy@3": 0.45349323664760843, "eval_dim_64_cosine_accuracy@5": 0.5308883000976153, "eval_dim_64_cosine_map@100": 0.4383018870756752, "eval_dim_64_cosine_mrr@10": 0.4285745828103948, "eval_dim_64_cosine_ndcg@10": 0.49330694449266793, "eval_dim_64_cosine_precision@1": 0.33760981731976014, "eval_dim_64_cosine_precision@10": 0.07082694184911449, "eval_dim_64_cosine_precision@3": 0.1511644122158695, "eval_dim_64_cosine_precision@5": 0.10617766001952307, "eval_dim_64_cosine_recall@1": 0.33760981731976014, "eval_dim_64_cosine_recall@10": 0.7082694184911449, "eval_dim_64_cosine_recall@3": 0.45349323664760843, "eval_dim_64_cosine_recall@5": 0.5308883000976153, "eval_dim_768_cosine_accuracy@1": 0.36061916050760007, "eval_dim_768_cosine_accuracy@10": 0.7238878817459211, "eval_dim_768_cosine_accuracy@3": 0.478036536047971, "eval_dim_768_cosine_accuracy@5": 0.5529214893320318, "eval_dim_768_cosine_map@100": 0.45954079808905995, "eval_dim_768_cosine_mrr@10": 0.44983946583793183, "eval_dim_768_cosine_ndcg@10": 0.513288052386553, "eval_dim_768_cosine_precision@1": 0.36061916050760007, "eval_dim_768_cosine_precision@10": 0.07238878817459211, "eval_dim_768_cosine_precision@3": 0.15934551201599031, "eval_dim_768_cosine_precision@5": 0.11058429786640636, "eval_dim_768_cosine_recall@1": 0.36061916050760007, "eval_dim_768_cosine_recall@10": 0.7238878817459211, "eval_dim_768_cosine_recall@3": 0.478036536047971, "eval_dim_768_cosine_recall@5": 0.5529214893320318, "eval_loss": 0.14603222906589508, "eval_runtime": 4765.1388, "eval_samples_per_second": 1.505, "eval_sequential_score": 0.49330694449266793, "eval_steps_per_second": 0.019, "step": 240 } ], "logging_steps": 500, "max_steps": 354, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 80, "trial_name": null, "trial_params": null }