sft_LIMA / trainer_log.jsonl
Xinging's picture
Upload trainer_log.jsonl with huggingface_hub
2e5e887 verified
raw
history blame
19 kB
{"current_steps": 1, "total_steps": 99, "loss": 1.8566, "lr": 2.0000000000000003e-06, "epoch": 0.030303030303030304, "percentage": 1.01, "elapsed_time": "0:00:08", "remaining_time": "0:13:20"}
{"current_steps": 2, "total_steps": 99, "loss": 1.8976, "lr": 4.000000000000001e-06, "epoch": 0.06060606060606061, "percentage": 2.02, "elapsed_time": "0:00:22", "remaining_time": "0:18:30"}
{"current_steps": 3, "total_steps": 99, "loss": 1.7259, "lr": 6e-06, "epoch": 0.09090909090909091, "percentage": 3.03, "elapsed_time": "0:00:32", "remaining_time": "0:17:34"}
{"current_steps": 4, "total_steps": 99, "loss": 1.814, "lr": 8.000000000000001e-06, "epoch": 0.12121212121212122, "percentage": 4.04, "elapsed_time": "0:00:42", "remaining_time": "0:17:00"}
{"current_steps": 5, "total_steps": 99, "loss": 1.5887, "lr": 1e-05, "epoch": 0.15151515151515152, "percentage": 5.05, "elapsed_time": "0:00:51", "remaining_time": "0:16:01"}
{"current_steps": 6, "total_steps": 99, "loss": 1.692, "lr": 1.2e-05, "epoch": 0.18181818181818182, "percentage": 6.06, "elapsed_time": "0:01:00", "remaining_time": "0:15:30"}
{"current_steps": 7, "total_steps": 99, "loss": 1.6883, "lr": 1.4e-05, "epoch": 0.21212121212121213, "percentage": 7.07, "elapsed_time": "0:01:07", "remaining_time": "0:14:47"}
{"current_steps": 8, "total_steps": 99, "loss": 1.6722, "lr": 1.6000000000000003e-05, "epoch": 0.24242424242424243, "percentage": 8.08, "elapsed_time": "0:01:16", "remaining_time": "0:14:26"}
{"current_steps": 9, "total_steps": 99, "loss": 1.6187, "lr": 1.8e-05, "epoch": 0.2727272727272727, "percentage": 9.09, "elapsed_time": "0:01:24", "remaining_time": "0:14:01"}
{"current_steps": 10, "total_steps": 99, "loss": 1.5615, "lr": 2e-05, "epoch": 0.30303030303030304, "percentage": 10.1, "elapsed_time": "0:01:35", "remaining_time": "0:14:12"}
{"current_steps": 11, "total_steps": 99, "loss": 1.6768, "lr": 1.9993770622619784e-05, "epoch": 0.3333333333333333, "percentage": 11.11, "elapsed_time": "0:01:44", "remaining_time": "0:13:59"}
{"current_steps": 12, "total_steps": 99, "loss": 1.7377, "lr": 1.9975090251507637e-05, "epoch": 0.36363636363636365, "percentage": 12.12, "elapsed_time": "0:01:55", "remaining_time": "0:13:53"}
{"current_steps": 13, "total_steps": 99, "loss": 1.7039, "lr": 1.9943982160079823e-05, "epoch": 0.3939393939393939, "percentage": 13.13, "elapsed_time": "0:02:06", "remaining_time": "0:13:57"}
{"current_steps": 14, "total_steps": 99, "loss": 1.8094, "lr": 1.9900485105144544e-05, "epoch": 0.42424242424242425, "percentage": 14.14, "elapsed_time": "0:02:19", "remaining_time": "0:14:09"}
{"current_steps": 15, "total_steps": 99, "loss": 1.7952, "lr": 1.9844653278615836e-05, "epoch": 0.45454545454545453, "percentage": 15.15, "elapsed_time": "0:02:26", "remaining_time": "0:13:40"}
{"current_steps": 16, "total_steps": 99, "loss": 1.7614, "lr": 1.9776556239997146e-05, "epoch": 0.48484848484848486, "percentage": 16.16, "elapsed_time": "0:02:40", "remaining_time": "0:13:52"}
{"current_steps": 17, "total_steps": 99, "loss": 1.8214, "lr": 1.9696278829718882e-05, "epoch": 0.5151515151515151, "percentage": 17.17, "elapsed_time": "0:02:51", "remaining_time": "0:13:47"}
{"current_steps": 18, "total_steps": 99, "loss": 1.688, "lr": 1.9603921063437795e-05, "epoch": 0.5454545454545454, "percentage": 18.18, "elapsed_time": "0:03:01", "remaining_time": "0:13:37"}
{"current_steps": 19, "total_steps": 99, "loss": 1.7641, "lr": 1.949959800742991e-05, "epoch": 0.5757575757575758, "percentage": 19.19, "elapsed_time": "0:03:08", "remaining_time": "0:13:12"}
{"current_steps": 20, "total_steps": 99, "loss": 1.7237, "lr": 1.9383439635232296e-05, "epoch": 0.6060606060606061, "percentage": 20.2, "elapsed_time": "0:03:23", "remaining_time": "0:13:23"}
{"current_steps": 21, "total_steps": 99, "loss": 1.7502, "lr": 1.9255590665712214e-05, "epoch": 0.6363636363636364, "percentage": 21.21, "elapsed_time": "0:03:31", "remaining_time": "0:13:05"}
{"current_steps": 22, "total_steps": 99, "loss": 1.4906, "lr": 1.911621038276542e-05, "epoch": 0.6666666666666666, "percentage": 22.22, "elapsed_time": "0:03:36", "remaining_time": "0:12:36"}
{"current_steps": 23, "total_steps": 99, "loss": 1.5359, "lr": 1.8965472436868288e-05, "epoch": 0.696969696969697, "percentage": 23.23, "elapsed_time": "0:03:41", "remaining_time": "0:12:12"}
{"current_steps": 24, "total_steps": 99, "loss": 1.5969, "lr": 1.8803564628730916e-05, "epoch": 0.7272727272727273, "percentage": 24.24, "elapsed_time": "0:03:54", "remaining_time": "0:12:14"}
{"current_steps": 25, "total_steps": 99, "loss": 1.7511, "lr": 1.8630688675320844e-05, "epoch": 0.7575757575757576, "percentage": 25.25, "elapsed_time": "0:04:04", "remaining_time": "0:12:02"}
{"current_steps": 26, "total_steps": 99, "loss": 1.6303, "lr": 1.8447059958548822e-05, "epoch": 0.7878787878787878, "percentage": 26.26, "elapsed_time": "0:04:14", "remaining_time": "0:11:55"}
{"current_steps": 27, "total_steps": 99, "loss": 1.6744, "lr": 1.8252907256929777e-05, "epoch": 0.8181818181818182, "percentage": 27.27, "elapsed_time": "0:04:25", "remaining_time": "0:11:48"}
{"current_steps": 28, "total_steps": 99, "loss": 1.7386, "lr": 1.804847246055326e-05, "epoch": 0.8484848484848485, "percentage": 28.28, "elapsed_time": "0:04:33", "remaining_time": "0:11:32"}
{"current_steps": 29, "total_steps": 99, "loss": 1.8378, "lr": 1.7834010269718526e-05, "epoch": 0.8787878787878788, "percentage": 29.29, "elapsed_time": "0:04:46", "remaining_time": "0:11:30"}
{"current_steps": 30, "total_steps": 99, "loss": 1.6901, "lr": 1.7609787877609678e-05, "epoch": 0.9090909090909091, "percentage": 30.3, "elapsed_time": "0:04:56", "remaining_time": "0:11:22"}
{"current_steps": 31, "total_steps": 99, "loss": 1.7371, "lr": 1.7376084637406222e-05, "epoch": 0.9393939393939394, "percentage": 31.31, "elapsed_time": "0:05:05", "remaining_time": "0:11:10"}
{"current_steps": 32, "total_steps": 99, "loss": 1.5087, "lr": 1.7133191714243805e-05, "epoch": 0.9696969696969697, "percentage": 32.32, "elapsed_time": "0:05:16", "remaining_time": "0:11:03"}
{"current_steps": 33, "total_steps": 99, "loss": 1.4644, "lr": 1.6881411722458688e-05, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:05:23", "remaining_time": "0:10:47"}
{"current_steps": 34, "total_steps": 99, "loss": 1.2771, "lr": 1.6621058348568008e-05, "epoch": 1.0303030303030303, "percentage": 34.34, "elapsed_time": "0:05:29", "remaining_time": "0:10:29"}
{"current_steps": 35, "total_steps": 99, "loss": 1.324, "lr": 1.6352455960455385e-05, "epoch": 1.0606060606060606, "percentage": 35.35, "elapsed_time": "0:05:38", "remaining_time": "0:10:18"}
{"current_steps": 36, "total_steps": 99, "loss": 1.2817, "lr": 1.607593920324899e-05, "epoch": 1.0909090909090908, "percentage": 36.36, "elapsed_time": "0:05:48", "remaining_time": "0:10:09"}
{"current_steps": 37, "total_steps": 99, "loss": 1.1927, "lr": 1.5791852582395334e-05, "epoch": 1.121212121212121, "percentage": 37.37, "elapsed_time": "0:05:59", "remaining_time": "0:10:01"}
{"current_steps": 38, "total_steps": 99, "loss": 1.2133, "lr": 1.5500550034448415e-05, "epoch": 1.1515151515151516, "percentage": 38.38, "elapsed_time": "0:06:08", "remaining_time": "0:09:51"}
{"current_steps": 39, "total_steps": 99, "loss": 1.1531, "lr": 1.5202394486108823e-05, "epoch": 1.1818181818181819, "percentage": 39.39, "elapsed_time": "0:06:13", "remaining_time": "0:09:34"}
{"current_steps": 40, "total_steps": 99, "loss": 1.2731, "lr": 1.4897757402062285e-05, "epoch": 1.2121212121212122, "percentage": 40.4, "elapsed_time": "0:06:24", "remaining_time": "0:09:26"}
{"current_steps": 41, "total_steps": 99, "loss": 1.2247, "lr": 1.4587018322180906e-05, "epoch": 1.2424242424242424, "percentage": 41.41, "elapsed_time": "0:06:35", "remaining_time": "0:09:20"}
{"current_steps": 42, "total_steps": 99, "loss": 1.0763, "lr": 1.4270564388663761e-05, "epoch": 1.2727272727272727, "percentage": 42.42, "elapsed_time": "0:06:46", "remaining_time": "0:09:11"}
{"current_steps": 43, "total_steps": 99, "loss": 1.1027, "lr": 1.3948789863705914e-05, "epoch": 1.303030303030303, "percentage": 43.43, "elapsed_time": "0:06:56", "remaining_time": "0:09:02"}
{"current_steps": 44, "total_steps": 99, "loss": 1.1478, "lr": 1.3622095638296827e-05, "epoch": 1.3333333333333333, "percentage": 44.44, "elapsed_time": "0:07:06", "remaining_time": "0:08:53"}
{"current_steps": 45, "total_steps": 99, "loss": 1.1792, "lr": 1.32908887327601e-05, "epoch": 1.3636363636363638, "percentage": 45.45, "elapsed_time": "0:07:13", "remaining_time": "0:08:40"}
{"current_steps": 46, "total_steps": 99, "loss": 1.0412, "lr": 1.2955581789656844e-05, "epoch": 1.393939393939394, "percentage": 46.46, "elapsed_time": "0:07:20", "remaining_time": "0:08:27"}
{"current_steps": 47, "total_steps": 99, "loss": 1.1198, "lr": 1.2616592559684408e-05, "epoch": 1.4242424242424243, "percentage": 47.47, "elapsed_time": "0:07:34", "remaining_time": "0:08:23"}
{"current_steps": 48, "total_steps": 99, "loss": 1.0971, "lr": 1.2274343381211067e-05, "epoch": 1.4545454545454546, "percentage": 48.48, "elapsed_time": "0:07:45", "remaining_time": "0:08:14"}
{"current_steps": 49, "total_steps": 99, "loss": 1.0331, "lr": 1.192926065409497e-05, "epoch": 1.4848484848484849, "percentage": 49.49, "elapsed_time": "0:07:57", "remaining_time": "0:08:06"}
{"current_steps": 50, "total_steps": 99, "loss": 1.1258, "lr": 1.1581774308443042e-05, "epoch": 1.5151515151515151, "percentage": 50.51, "elapsed_time": "0:08:10", "remaining_time": "0:08:00"}
{"current_steps": 51, "total_steps": 99, "loss": 1.211, "lr": 1.1232317268971586e-05, "epoch": 1.5454545454545454, "percentage": 51.52, "elapsed_time": "0:08:24", "remaining_time": "0:07:55"}
{"current_steps": 52, "total_steps": 99, "loss": 1.0778, "lr": 1.088132491563602e-05, "epoch": 1.5757575757575757, "percentage": 52.53, "elapsed_time": "0:08:32", "remaining_time": "0:07:43"}
{"current_steps": 53, "total_steps": 99, "loss": 1.1482, "lr": 1.0529234541201631e-05, "epoch": 1.606060606060606, "percentage": 53.54, "elapsed_time": "0:08:46", "remaining_time": "0:07:36"}
{"current_steps": 54, "total_steps": 99, "loss": 1.0857, "lr": 1.0176484806431288e-05, "epoch": 1.6363636363636362, "percentage": 54.55, "elapsed_time": "0:08:54", "remaining_time": "0:07:25"}
{"current_steps": 55, "total_steps": 99, "loss": 1.1458, "lr": 9.823515193568715e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "0:09:08", "remaining_time": "0:07:18"}
{"current_steps": 56, "total_steps": 99, "loss": 0.9578, "lr": 9.470765458798369e-06, "epoch": 1.696969696969697, "percentage": 56.57, "elapsed_time": "0:09:17", "remaining_time": "0:07:08"}
{"current_steps": 57, "total_steps": 99, "loss": 1.1004, "lr": 9.118675084363986e-06, "epoch": 1.7272727272727273, "percentage": 57.58, "elapsed_time": "0:09:32", "remaining_time": "0:07:02"}
{"current_steps": 58, "total_steps": 99, "loss": 1.0838, "lr": 8.767682731028415e-06, "epoch": 1.7575757575757576, "percentage": 58.59, "elapsed_time": "0:09:40", "remaining_time": "0:06:50"}
{"current_steps": 59, "total_steps": 99, "loss": 0.9716, "lr": 8.418225691556962e-06, "epoch": 1.7878787878787878, "percentage": 59.6, "elapsed_time": "0:09:52", "remaining_time": "0:06:41"}
{"current_steps": 60, "total_steps": 99, "loss": 1.2118, "lr": 8.070739345905032e-06, "epoch": 1.8181818181818183, "percentage": 60.61, "elapsed_time": "0:10:01", "remaining_time": "0:06:31"}
{"current_steps": 61, "total_steps": 99, "loss": 1.0933, "lr": 7.725656618788938e-06, "epoch": 1.8484848484848486, "percentage": 61.62, "elapsed_time": "0:10:09", "remaining_time": "0:06:19"}
{"current_steps": 62, "total_steps": 99, "loss": 1.054, "lr": 7.383407440315595e-06, "epoch": 1.878787878787879, "percentage": 62.63, "elapsed_time": "0:10:20", "remaining_time": "0:06:10"}
{"current_steps": 63, "total_steps": 99, "loss": 1.2112, "lr": 7.044418210343161e-06, "epoch": 1.9090909090909092, "percentage": 63.64, "elapsed_time": "0:10:32", "remaining_time": "0:06:01"}
{"current_steps": 64, "total_steps": 99, "loss": 1.06, "lr": 6.7091112672399e-06, "epoch": 1.9393939393939394, "percentage": 64.65, "elapsed_time": "0:10:40", "remaining_time": "0:05:50"}
{"current_steps": 65, "total_steps": 99, "loss": 1.1465, "lr": 6.3779043617031775e-06, "epoch": 1.9696969696969697, "percentage": 65.66, "elapsed_time": "0:10:51", "remaining_time": "0:05:40"}
{"current_steps": 66, "total_steps": 99, "loss": 0.8968, "lr": 6.051210136294089e-06, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:10:56", "remaining_time": "0:05:28"}
{"current_steps": 67, "total_steps": 99, "loss": 0.8801, "lr": 5.729435611336239e-06, "epoch": 2.0303030303030303, "percentage": 67.68, "elapsed_time": "0:11:07", "remaining_time": "0:05:18"}
{"current_steps": 68, "total_steps": 99, "loss": 0.8784, "lr": 5.412981677819094e-06, "epoch": 2.0606060606060606, "percentage": 68.69, "elapsed_time": "0:11:14", "remaining_time": "0:05:07"}
{"current_steps": 69, "total_steps": 99, "loss": 0.7264, "lr": 5.1022425979377174e-06, "epoch": 2.090909090909091, "percentage": 69.7, "elapsed_time": "0:11:22", "remaining_time": "0:04:56"}
{"current_steps": 70, "total_steps": 99, "loss": 0.758, "lr": 4.797605513891179e-06, "epoch": 2.121212121212121, "percentage": 70.71, "elapsed_time": "0:11:30", "remaining_time": "0:04:45"}
{"current_steps": 71, "total_steps": 99, "loss": 0.8087, "lr": 4.4994499655515865e-06, "epoch": 2.1515151515151514, "percentage": 71.72, "elapsed_time": "0:11:37", "remaining_time": "0:04:35"}
{"current_steps": 72, "total_steps": 99, "loss": 0.7311, "lr": 4.208147417604665e-06, "epoch": 2.1818181818181817, "percentage": 72.73, "elapsed_time": "0:11:52", "remaining_time": "0:04:27"}
{"current_steps": 73, "total_steps": 99, "loss": 0.7755, "lr": 3.924060796751012e-06, "epoch": 2.212121212121212, "percentage": 73.74, "elapsed_time": "0:12:05", "remaining_time": "0:04:18"}
{"current_steps": 74, "total_steps": 99, "loss": 0.7668, "lr": 3.647544039544615e-06, "epoch": 2.242424242424242, "percentage": 74.75, "elapsed_time": "0:12:14", "remaining_time": "0:04:08"}
{"current_steps": 75, "total_steps": 99, "loss": 0.7701, "lr": 3.378941651431996e-06, "epoch": 2.2727272727272725, "percentage": 75.76, "elapsed_time": "0:12:22", "remaining_time": "0:03:57"}
{"current_steps": 76, "total_steps": 99, "loss": 0.889, "lr": 3.1185882775413123e-06, "epoch": 2.303030303030303, "percentage": 76.77, "elapsed_time": "0:12:32", "remaining_time": "0:03:47"}
{"current_steps": 77, "total_steps": 99, "loss": 0.7411, "lr": 2.8668082857562006e-06, "epoch": 2.3333333333333335, "percentage": 77.78, "elapsed_time": "0:12:42", "remaining_time": "0:03:37"}
{"current_steps": 78, "total_steps": 99, "loss": 0.7564, "lr": 2.6239153625937786e-06, "epoch": 2.3636363636363638, "percentage": 78.79, "elapsed_time": "0:12:51", "remaining_time": "0:03:27"}
{"current_steps": 79, "total_steps": 99, "loss": 0.7695, "lr": 2.390212122390323e-06, "epoch": 2.393939393939394, "percentage": 79.8, "elapsed_time": "0:13:04", "remaining_time": "0:03:18"}
{"current_steps": 80, "total_steps": 99, "loss": 0.8017, "lr": 2.165989730281475e-06, "epoch": 2.4242424242424243, "percentage": 80.81, "elapsed_time": "0:13:15", "remaining_time": "0:03:08"}
{"current_steps": 81, "total_steps": 99, "loss": 0.7561, "lr": 1.9515275394467446e-06, "epoch": 2.4545454545454546, "percentage": 81.82, "elapsed_time": "0:13:26", "remaining_time": "0:02:59"}
{"current_steps": 82, "total_steps": 99, "loss": 0.6155, "lr": 1.7470927430702277e-06, "epoch": 2.484848484848485, "percentage": 82.83, "elapsed_time": "0:13:33", "remaining_time": "0:02:48"}
{"current_steps": 83, "total_steps": 99, "loss": 0.6894, "lr": 1.5529400414511809e-06, "epoch": 2.515151515151515, "percentage": 83.84, "elapsed_time": "0:13:48", "remaining_time": "0:02:39"}
{"current_steps": 84, "total_steps": 99, "loss": 0.8904, "lr": 1.369311324679159e-06, "epoch": 2.5454545454545454, "percentage": 84.85, "elapsed_time": "0:14:01", "remaining_time": "0:02:30"}
{"current_steps": 85, "total_steps": 99, "loss": 0.8289, "lr": 1.196435371269089e-06, "epoch": 2.5757575757575757, "percentage": 85.86, "elapsed_time": "0:14:09", "remaining_time": "0:02:19"}
{"current_steps": 86, "total_steps": 99, "loss": 0.7425, "lr": 1.0345275631317165e-06, "epoch": 2.606060606060606, "percentage": 86.87, "elapsed_time": "0:14:17", "remaining_time": "0:02:09"}
{"current_steps": 87, "total_steps": 99, "loss": 0.7578, "lr": 8.837896172345827e-07, "epoch": 2.6363636363636362, "percentage": 87.88, "elapsed_time": "0:14:28", "remaining_time": "0:01:59"}
{"current_steps": 88, "total_steps": 99, "loss": 0.6974, "lr": 7.4440933428779e-07, "epoch": 2.6666666666666665, "percentage": 88.89, "elapsed_time": "0:14:37", "remaining_time": "0:01:49"}
{"current_steps": 89, "total_steps": 99, "loss": 0.7797, "lr": 6.165603647677054e-07, "epoch": 2.6969696969696972, "percentage": 89.9, "elapsed_time": "0:14:46", "remaining_time": "0:01:39"}
{"current_steps": 90, "total_steps": 99, "loss": 0.7053, "lr": 5.004019925700921e-07, "epoch": 2.7272727272727275, "percentage": 90.91, "elapsed_time": "0:14:57", "remaining_time": "0:01:29"}
{"current_steps": 91, "total_steps": 99, "loss": 0.7264, "lr": 3.960789365622075e-07, "epoch": 2.757575757575758, "percentage": 91.92, "elapsed_time": "0:15:07", "remaining_time": "0:01:19"}
{"current_steps": 92, "total_steps": 99, "loss": 0.7089, "lr": 3.0372117028111825e-07, "epoch": 2.787878787878788, "percentage": 92.93, "elapsed_time": "0:15:21", "remaining_time": "0:01:10"}
{"current_steps": 93, "total_steps": 99, "loss": 0.6721, "lr": 2.2344376000285606e-07, "epoch": 2.8181818181818183, "percentage": 93.94, "elapsed_time": "0:15:30", "remaining_time": "0:01:00"}
{"current_steps": 94, "total_steps": 99, "loss": 0.788, "lr": 1.553467213841664e-07, "epoch": 2.8484848484848486, "percentage": 94.95, "elapsed_time": "0:15:37", "remaining_time": "0:00:49"}
{"current_steps": 95, "total_steps": 99, "loss": 0.6617, "lr": 9.951489485545696e-08, "epoch": 2.878787878787879, "percentage": 95.96, "elapsed_time": "0:15:46", "remaining_time": "0:00:39"}
{"current_steps": 96, "total_steps": 99, "loss": 0.7603, "lr": 5.6017839920180506e-08, "epoch": 2.909090909090909, "percentage": 96.97, "elapsed_time": "0:15:58", "remaining_time": "0:00:29"}
{"current_steps": 97, "total_steps": 99, "loss": 0.7336, "lr": 2.4909748492362162e-08, "epoch": 2.9393939393939394, "percentage": 97.98, "elapsed_time": "0:16:07", "remaining_time": "0:00:19"}
{"current_steps": 98, "total_steps": 99, "loss": 0.7566, "lr": 6.229377380218005e-09, "epoch": 2.9696969696969697, "percentage": 98.99, "elapsed_time": "0:16:17", "remaining_time": "0:00:09"}
{"current_steps": 99, "total_steps": 99, "loss": 0.6936, "lr": 0.0, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:16:29", "remaining_time": "0:00:00"}
{"current_steps": 99, "total_steps": 99, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:17:28", "remaining_time": "0:00:00"}