|
{ |
|
"best_metric": 1.0848859548568726, |
|
"best_model_checkpoint": "output/Baichuan-13B-Chat_lora_wqs/checkpoint-1900", |
|
"epoch": 1.998465865507543, |
|
"global_step": 1954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9996768893414955e-05, |
|
"loss": 1.5804, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.998707640886381e-05, |
|
"loss": 1.36, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.99709250517426e-05, |
|
"loss": 1.2505, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.994831899699185e-05, |
|
"loss": 1.2495, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9919264088017345e-05, |
|
"loss": 1.2084, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.98837678351797e-05, |
|
"loss": 1.1923, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.984183941385301e-05, |
|
"loss": 1.1929, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.979348966205315e-05, |
|
"loss": 1.1736, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9738731077636225e-05, |
|
"loss": 1.1626, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9677577815068056e-05, |
|
"loss": 1.1779, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.1614866256713867, |
|
"eval_runtime": 26.6319, |
|
"eval_samples_per_second": 23.768, |
|
"eval_steps_per_second": 3.004, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9610045681765385e-05, |
|
"loss": 1.1527, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.953615213400987e-05, |
|
"loss": 1.1638, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.945591627243581e-05, |
|
"loss": 1.155, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9369358837092853e-05, |
|
"loss": 1.1672, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.927650220208495e-05, |
|
"loss": 1.1674, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.917737036978689e-05, |
|
"loss": 1.1511, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.907198896463996e-05, |
|
"loss": 1.1497, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.8960385226528335e-05, |
|
"loss": 1.1271, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8842588003737854e-05, |
|
"loss": 1.1888, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.87186277454991e-05, |
|
"loss": 1.1494, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.1362522840499878, |
|
"eval_runtime": 23.9991, |
|
"eval_samples_per_second": 26.376, |
|
"eval_steps_per_second": 3.333, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.858853649411662e-05, |
|
"loss": 1.1425, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.845234787668632e-05, |
|
"loss": 1.1591, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.831009709640329e-05, |
|
"loss": 1.118, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.8161820923462165e-05, |
|
"loss": 1.1469, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.800755768555244e-05, |
|
"loss": 1.1128, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.784734725795123e-05, |
|
"loss": 1.152, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.768123105321596e-05, |
|
"loss": 1.125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7509252010479645e-05, |
|
"loss": 1.123, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.7331454584351686e-05, |
|
"loss": 1.1294, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.714788473342685e-05, |
|
"loss": 1.1299, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.1234092712402344, |
|
"eval_runtime": 23.8841, |
|
"eval_samples_per_second": 26.503, |
|
"eval_steps_per_second": 3.35, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.695858990840544e-05, |
|
"loss": 1.1195, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6763619039827936e-05, |
|
"loss": 1.1233, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.6563022525426905e-05, |
|
"loss": 1.1281, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.6356852217099856e-05, |
|
"loss": 1.0921, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.614516140750604e-05, |
|
"loss": 1.1106, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.592800481629097e-05, |
|
"loss": 1.1103, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.570543857594201e-05, |
|
"loss": 1.1162, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.547752021727873e-05, |
|
"loss": 1.1341, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.52443086545819e-05, |
|
"loss": 1.1098, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.5005864170364784e-05, |
|
"loss": 1.1109, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.115441083908081, |
|
"eval_runtime": 23.9395, |
|
"eval_samples_per_second": 26.442, |
|
"eval_steps_per_second": 3.342, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.476224839979084e-05, |
|
"loss": 1.1114, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.4513524314741714e-05, |
|
"loss": 1.132, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.425975620753973e-05, |
|
"loss": 1.1282, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.4001009674329054e-05, |
|
"loss": 1.1135, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.373735159811988e-05, |
|
"loss": 1.1366, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.3468850131499917e-05, |
|
"loss": 1.1052, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.31955746790177e-05, |
|
"loss": 1.1148, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.291759587924237e-05, |
|
"loss": 1.1329, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.263498558650434e-05, |
|
"loss": 1.1064, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.234781685232187e-05, |
|
"loss": 1.12, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.108825922012329, |
|
"eval_runtime": 23.967, |
|
"eval_samples_per_second": 26.411, |
|
"eval_steps_per_second": 3.338, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.205616390651796e-05, |
|
"loss": 1.0966, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1760102138032956e-05, |
|
"loss": 1.1145, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.145970807543721e-05, |
|
"loss": 1.1018, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.115505936714943e-05, |
|
"loss": 1.1063, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.084623476136541e-05, |
|
"loss": 1.1037, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.053331408570254e-05, |
|
"loss": 1.0981, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.021637822656529e-05, |
|
"loss": 1.1018, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.9895509108236956e-05, |
|
"loss": 1.1154, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.957078967170325e-05, |
|
"loss": 1.1498, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9242303853212944e-05, |
|
"loss": 1.1071, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.103700041770935, |
|
"eval_runtime": 23.9888, |
|
"eval_samples_per_second": 26.387, |
|
"eval_steps_per_second": 3.335, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.891013656258133e-05, |
|
"loss": 1.1107, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.857437366124202e-05, |
|
"loss": 1.0919, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.823510194005273e-05, |
|
"loss": 1.1019, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.789240909686087e-05, |
|
"loss": 1.1217, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.754638371383461e-05, |
|
"loss": 1.1171, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.719711523456545e-05, |
|
"loss": 1.0918, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.684469394094805e-05, |
|
"loss": 1.1195, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.648921092984342e-05, |
|
"loss": 1.1083, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.6130758089531404e-05, |
|
"loss": 1.0735, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.576942807595861e-05, |
|
"loss": 1.1161, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.1002016067504883, |
|
"eval_runtime": 24.0418, |
|
"eval_samples_per_second": 26.329, |
|
"eval_steps_per_second": 3.328, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.540531428878795e-05, |
|
"loss": 1.0867, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.5038510847255846e-05, |
|
"loss": 1.105, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.466911256584355e-05, |
|
"loss": 1.1156, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.42972149297686e-05, |
|
"loss": 1.099, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.3922914070303076e-05, |
|
"loss": 1.0994, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.354630673992473e-05, |
|
"loss": 1.1177, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.316749028730757e-05, |
|
"loss": 1.1095, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.278656263215836e-05, |
|
"loss": 1.1083, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.24036222399055e-05, |
|
"loss": 1.0836, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.2018768096246834e-05, |
|
"loss": 1.0982, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.0972065925598145, |
|
"eval_runtime": 24.0261, |
|
"eval_samples_per_second": 26.346, |
|
"eval_steps_per_second": 3.33, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.1632099681562996e-05, |
|
"loss": 1.1041, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.1243716945202864e-05, |
|
"loss": 1.0974, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.085372027964777e-05, |
|
"loss": 1.0945, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0462210494561283e-05, |
|
"loss": 1.1045, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.0069288790730966e-05, |
|
"loss": 1.0842, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.9675056733909196e-05, |
|
"loss": 1.097, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.9279616228559542e-05, |
|
"loss": 1.1034, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8883069491515696e-05, |
|
"loss": 1.1033, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8485519025559503e-05, |
|
"loss": 1.0894, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.8087067592925252e-05, |
|
"loss": 1.1062, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.0946753025054932, |
|
"eval_runtime": 23.9793, |
|
"eval_samples_per_second": 26.398, |
|
"eval_steps_per_second": 3.336, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7687818188736757e-05, |
|
"loss": 1.1243, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.7287874014384346e-05, |
|
"loss": 1.0975, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6887338450848448e-05, |
|
"loss": 1.0885, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.64863150319768e-05, |
|
"loss": 1.0938, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.6084907417722176e-05, |
|
"loss": 1.1014, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5683219367347432e-05, |
|
"loss": 1.105, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.528135471260493e-05, |
|
"loss": 1.0993, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.4879417330897267e-05, |
|
"loss": 1.0867, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.4477511118426093e-05, |
|
"loss": 1.059, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.4075739963336182e-05, |
|
"loss": 1.0744, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.0925548076629639, |
|
"eval_runtime": 23.9765, |
|
"eval_samples_per_second": 26.401, |
|
"eval_steps_per_second": 3.337, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.367420771886154e-05, |
|
"loss": 1.096, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.3273018176480492e-05, |
|
"loss": 1.0873, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.2872275039086823e-05, |
|
"loss": 1.097, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.2472081894183744e-05, |
|
"loss": 1.0801, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.207254218710772e-05, |
|
"loss": 1.1006, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.1673759194289033e-05, |
|
"loss": 1.0811, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.1275835996556e-05, |
|
"loss": 1.0969, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.08788754524898e-05, |
|
"loss": 1.0888, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.0482980171836648e-05, |
|
"loss": 1.09, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.008825248898443e-05, |
|
"loss": 1.0867, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 1.0904080867767334, |
|
"eval_runtime": 23.9806, |
|
"eval_samples_per_second": 26.396, |
|
"eval_steps_per_second": 3.336, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.969479443651036e-05, |
|
"loss": 1.0925, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.930270771880679e-05, |
|
"loss": 1.0827, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8912093685791748e-05, |
|
"loss": 1.0976, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.8523053306711203e-05, |
|
"loss": 1.0746, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.813568714403957e-05, |
|
"loss": 1.0922, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.7750095327485605e-05, |
|
"loss": 1.121, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.7366377528109895e-05, |
|
"loss": 1.0915, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.6984632932561124e-05, |
|
"loss": 1.0866, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.6604960217437398e-05, |
|
"loss": 1.0822, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.6227457523779532e-05, |
|
"loss": 1.0809, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 1.088823914527893, |
|
"eval_runtime": 23.9277, |
|
"eval_samples_per_second": 26.455, |
|
"eval_steps_per_second": 3.343, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.5852222431702658e-05, |
|
"loss": 1.0903, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.547935193517295e-05, |
|
"loss": 1.088, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.510894241693573e-05, |
|
"loss": 1.0988, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.4741089623601678e-05, |
|
"loss": 1.0935, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.4375888640897438e-05, |
|
"loss": 1.0812, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.4013433869087006e-05, |
|
"loss": 1.1036, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.3653818998570378e-05, |
|
"loss": 1.0917, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.329713698566567e-05, |
|
"loss": 1.0697, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.2943480028580917e-05, |
|
"loss": 1.092, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2592939543581947e-05, |
|
"loss": 1.0994, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 1.0873268842697144, |
|
"eval_runtime": 23.9478, |
|
"eval_samples_per_second": 26.433, |
|
"eval_steps_per_second": 3.341, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2245606141362318e-05, |
|
"loss": 1.0737, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.1901569603621487e-05, |
|
"loss": 1.0718, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.1560918859857247e-05, |
|
"loss": 1.0992, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.1223741964378518e-05, |
|
"loss": 1.0757, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0890126073544276e-05, |
|
"loss": 1.0916, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0560157423234681e-05, |
|
"loss": 1.1017, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.0233921306560029e-05, |
|
"loss": 1.0897, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.911502051813499e-06, |
|
"loss": 1.1047, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.592983000673272e-06, |
|
"loss": 1.0877, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.278446486659642e-06, |
|
"loss": 1.0622, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 1.0861936807632446, |
|
"eval_runtime": 23.9135, |
|
"eval_samples_per_second": 26.47, |
|
"eval_steps_per_second": 3.345, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.967973813852743e-06, |
|
"loss": 1.0791, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.66164523587637e-06, |
|
"loss": 1.0952, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.359539935153368e-06, |
|
"loss": 1.0998, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.061736002437862e-06, |
|
"loss": 1.0714, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.768310416629703e-06, |
|
"loss": 1.0934, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.479339024876283e-06, |
|
"loss": 1.1074, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.1948965229669425e-06, |
|
"loss": 1.0993, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.915056436024964e-06, |
|
"loss": 1.0962, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.63989109950216e-06, |
|
"loss": 1.077, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.3694716404810065e-06, |
|
"loss": 1.1079, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.0856590270996094, |
|
"eval_runtime": 23.9592, |
|
"eval_samples_per_second": 26.42, |
|
"eval_steps_per_second": 3.339, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.10386795928907e-06, |
|
"loss": 1.0622, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5.843148711430621e-06, |
|
"loss": 1.1085, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5.5873812898399546e-06, |
|
"loss": 1.0871, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5.336631807461076e-06, |
|
"loss": 1.1106, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5.090965080158278e-06, |
|
"loss": 1.0841, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.850444609961988e-06, |
|
"loss": 1.1075, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.615132568654215e-06, |
|
"loss": 1.0983, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.385089781697863e-06, |
|
"loss": 1.0859, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.160375712514019e-06, |
|
"loss": 1.0759, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.941048447111387e-06, |
|
"loss": 1.0612, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 1.085319995880127, |
|
"eval_runtime": 23.9411, |
|
"eval_samples_per_second": 26.44, |
|
"eval_steps_per_second": 3.342, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.727164679071696e-06, |
|
"loss": 1.0806, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.5187796948950384e-06, |
|
"loss": 1.0861, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.315947359708993e-06, |
|
"loss": 1.0712, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.118720103345063e-06, |
|
"loss": 1.086, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.9271489067861953e-06, |
|
"loss": 1.1087, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.7412832889887664e-06, |
|
"loss": 1.1036, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.5611712940825065e-06, |
|
"loss": 1.0712, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3868594789516336e-06, |
|
"loss": 1.0695, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.2183929012004527e-06, |
|
"loss": 1.0858, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.055815107506451e-06, |
|
"loss": 1.0839, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 1.0849649906158447, |
|
"eval_runtime": 23.9223, |
|
"eval_samples_per_second": 26.461, |
|
"eval_steps_per_second": 3.344, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.899168122364023e-06, |
|
"loss": 1.0925, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.7484924372215744e-06, |
|
"loss": 1.0888, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.6038270000149903e-06, |
|
"loss": 1.0667, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.4652092051000155e-06, |
|
"loss": 1.0893, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.3326748835862463e-06, |
|
"loss": 1.061, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2062582940751965e-06, |
|
"loss": 1.1043, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.085992113804854e-06, |
|
"loss": 1.0908, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 9.719074302029813e-07, |
|
"loss": 1.1128, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.640337328513743e-07, |
|
"loss": 1.082, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.623989058631459e-07, |
|
"loss": 1.0702, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 1.0849045515060425, |
|
"eval_runtime": 23.9602, |
|
"eval_samples_per_second": 26.419, |
|
"eval_steps_per_second": 3.339, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.67029220674989e-07, |
|
"loss": 1.1089, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.779493292563304e-07, |
|
"loss": 1.0929, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.951822577370785e-07, |
|
"loss": 1.0648, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.1874940045561194e-07, |
|
"loss": 1.0654, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.4867051442860896e-07, |
|
"loss": 1.0822, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.8496371424407975e-07, |
|
"loss": 1.0873, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.2764546737895076e-07, |
|
"loss": 1.0725, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7673058994241432e-07, |
|
"loss": 1.0772, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.3223224284613366e-07, |
|
"loss": 1.07, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.416192840228932e-08, |
|
"loss": 1.0886, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 1.0848859548568726, |
|
"eval_runtime": 23.9384, |
|
"eval_samples_per_second": 26.443, |
|
"eval_steps_per_second": 3.342, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.252948735037678e-08, |
|
"loss": 1.0917, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.734309631348854e-08, |
|
"loss": 1.1042, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.8609265684738086e-08, |
|
"loss": 1.0908, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.332837944400538e-09, |
|
"loss": 1.0769, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.16986408199971e-10, |
|
"loss": 1.078, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1954, |
|
"total_flos": 4.405931905703215e+18, |
|
"train_loss": 1.1088850573849165, |
|
"train_runtime": 12573.3474, |
|
"train_samples_per_second": 9.954, |
|
"train_steps_per_second": 0.155 |
|
} |
|
], |
|
"max_steps": 1954, |
|
"num_train_epochs": 2, |
|
"total_flos": 4.405931905703215e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|