{ "best_metric": 0.5939880609512329, "best_model_checkpoint": "/mnt/data1/sheshuaijie/Output/CoT/Trained/vicuna-13b_english-cot+auto-cot_0.0002/lora/checkpoint-1036", "epoch": 6.835463917525773, "global_step": 1036, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_loss": 1.7208857536315918, "eval_runtime": 39.046, "eval_samples_per_second": 76.832, "eval_steps_per_second": 2.407, "step": 7 }, { "epoch": 0.09, "eval_loss": 1.3302656412124634, "eval_runtime": 39.1446, "eval_samples_per_second": 76.639, "eval_steps_per_second": 2.401, "step": 14 }, { "epoch": 0.13, "learning_rate": 0.00019933110367892977, "loss": 1.607, "step": 20 }, { "epoch": 0.14, "eval_loss": 1.0993696451187134, "eval_runtime": 39.2624, "eval_samples_per_second": 76.409, "eval_steps_per_second": 2.394, "step": 21 }, { "epoch": 0.18, "eval_loss": 0.9883869886398315, "eval_runtime": 39.2607, "eval_samples_per_second": 76.412, "eval_steps_per_second": 2.394, "step": 28 }, { "epoch": 0.23, "eval_loss": 0.9121341109275818, "eval_runtime": 39.2818, "eval_samples_per_second": 76.371, "eval_steps_per_second": 2.393, "step": 35 }, { "epoch": 0.26, "learning_rate": 0.00019665551839464883, "loss": 1.0077, "step": 40 }, { "epoch": 0.28, "eval_loss": 0.8665392398834229, "eval_runtime": 39.261, "eval_samples_per_second": 76.412, "eval_steps_per_second": 2.394, "step": 42 }, { "epoch": 0.32, "eval_loss": 0.8299428820610046, "eval_runtime": 39.2723, "eval_samples_per_second": 76.39, "eval_steps_per_second": 2.394, "step": 49 }, { "epoch": 0.37, "eval_loss": 0.7965301275253296, "eval_runtime": 39.2718, "eval_samples_per_second": 76.391, "eval_steps_per_second": 2.394, "step": 56 }, { "epoch": 0.4, "learning_rate": 0.0001939799331103679, "loss": 0.8626, "step": 60 }, { "epoch": 0.42, "eval_loss": 0.7661889791488647, "eval_runtime": 39.2752, "eval_samples_per_second": 76.384, "eval_steps_per_second": 2.393, "step": 63 }, { "epoch": 0.46, "eval_loss": 0.744417130947113, "eval_runtime": 39.2899, "eval_samples_per_second": 76.355, "eval_steps_per_second": 2.392, "step": 70 }, { "epoch": 0.51, "eval_loss": 0.728394627571106, "eval_runtime": 39.298, "eval_samples_per_second": 76.34, "eval_steps_per_second": 2.392, "step": 77 }, { "epoch": 0.53, "learning_rate": 0.00019130434782608697, "loss": 0.7683, "step": 80 }, { "epoch": 0.55, "eval_loss": 0.7151542901992798, "eval_runtime": 39.272, "eval_samples_per_second": 76.39, "eval_steps_per_second": 2.394, "step": 84 }, { "epoch": 0.6, "eval_loss": 0.7049417495727539, "eval_runtime": 39.2657, "eval_samples_per_second": 76.403, "eval_steps_per_second": 2.394, "step": 91 }, { "epoch": 0.65, "eval_loss": 0.6961150765419006, "eval_runtime": 39.2274, "eval_samples_per_second": 76.477, "eval_steps_per_second": 2.396, "step": 98 }, { "epoch": 0.66, "learning_rate": 0.00018862876254180605, "loss": 0.7346, "step": 100 }, { "epoch": 0.69, "eval_loss": 0.6891586780548096, "eval_runtime": 39.2698, "eval_samples_per_second": 76.395, "eval_steps_per_second": 2.394, "step": 105 }, { "epoch": 0.74, "eval_loss": 0.6833620667457581, "eval_runtime": 39.2474, "eval_samples_per_second": 76.438, "eval_steps_per_second": 2.395, "step": 112 }, { "epoch": 0.79, "eval_loss": 0.678981602191925, "eval_runtime": 39.2363, "eval_samples_per_second": 76.46, "eval_steps_per_second": 2.396, "step": 119 }, { "epoch": 0.79, "learning_rate": 0.0001859531772575251, "loss": 0.7095, "step": 120 }, { "epoch": 0.83, "eval_loss": 0.6739740967750549, "eval_runtime": 39.2467, "eval_samples_per_second": 76.439, "eval_steps_per_second": 2.395, "step": 126 }, { "epoch": 0.88, "eval_loss": 0.6704814434051514, "eval_runtime": 39.2828, "eval_samples_per_second": 76.369, "eval_steps_per_second": 2.393, "step": 133 }, { "epoch": 0.92, "learning_rate": 0.00018327759197324413, "loss": 0.6989, "step": 140 }, { "epoch": 0.92, "eval_loss": 0.6668062210083008, "eval_runtime": 39.1861, "eval_samples_per_second": 76.558, "eval_steps_per_second": 2.399, "step": 140 }, { "epoch": 0.97, "eval_loss": 0.6635003089904785, "eval_runtime": 39.2627, "eval_samples_per_second": 76.408, "eval_steps_per_second": 2.394, "step": 147 }, { "epoch": 1.02, "eval_loss": 0.6594184637069702, "eval_runtime": 39.2634, "eval_samples_per_second": 76.407, "eval_steps_per_second": 2.394, "step": 154 }, { "epoch": 1.06, "learning_rate": 0.00018060200668896322, "loss": 0.6753, "step": 160 }, { "epoch": 1.06, "eval_loss": 0.656818151473999, "eval_runtime": 39.2093, "eval_samples_per_second": 76.513, "eval_steps_per_second": 2.397, "step": 161 }, { "epoch": 1.11, "eval_loss": 0.6542237401008606, "eval_runtime": 39.2619, "eval_samples_per_second": 76.41, "eval_steps_per_second": 2.394, "step": 168 }, { "epoch": 1.15, "eval_loss": 0.6509793400764465, "eval_runtime": 39.2795, "eval_samples_per_second": 76.376, "eval_steps_per_second": 2.393, "step": 175 }, { "epoch": 1.19, "learning_rate": 0.00017792642140468227, "loss": 0.6742, "step": 180 }, { "epoch": 1.2, "eval_loss": 0.6501123905181885, "eval_runtime": 39.2889, "eval_samples_per_second": 76.357, "eval_steps_per_second": 2.393, "step": 182 }, { "epoch": 1.25, "eval_loss": 0.6488311290740967, "eval_runtime": 39.2821, "eval_samples_per_second": 76.371, "eval_steps_per_second": 2.393, "step": 189 }, { "epoch": 1.29, "eval_loss": 0.6458473205566406, "eval_runtime": 39.2749, "eval_samples_per_second": 76.385, "eval_steps_per_second": 2.393, "step": 196 }, { "epoch": 1.32, "learning_rate": 0.00017525083612040135, "loss": 0.6727, "step": 200 }, { "epoch": 1.34, "eval_loss": 0.6445983648300171, "eval_runtime": 39.2655, "eval_samples_per_second": 76.403, "eval_steps_per_second": 2.394, "step": 203 }, { "epoch": 1.39, "eval_loss": 0.6414983868598938, "eval_runtime": 39.2575, "eval_samples_per_second": 76.418, "eval_steps_per_second": 2.394, "step": 210 }, { "epoch": 1.43, "eval_loss": 0.6403743624687195, "eval_runtime": 39.2601, "eval_samples_per_second": 76.413, "eval_steps_per_second": 2.394, "step": 217 }, { "epoch": 1.45, "learning_rate": 0.0001725752508361204, "loss": 0.6651, "step": 220 }, { "epoch": 1.48, "eval_loss": 0.6375772953033447, "eval_runtime": 39.2616, "eval_samples_per_second": 76.411, "eval_steps_per_second": 2.394, "step": 224 }, { "epoch": 1.52, "eval_loss": 0.6363030076026917, "eval_runtime": 39.2685, "eval_samples_per_second": 76.397, "eval_steps_per_second": 2.394, "step": 231 }, { "epoch": 1.57, "eval_loss": 0.6365154981613159, "eval_runtime": 39.2615, "eval_samples_per_second": 76.411, "eval_steps_per_second": 2.394, "step": 238 }, { "epoch": 1.58, "learning_rate": 0.00016989966555183946, "loss": 0.6569, "step": 240 }, { "epoch": 1.62, "eval_loss": 0.6351213455200195, "eval_runtime": 39.2374, "eval_samples_per_second": 76.458, "eval_steps_per_second": 2.396, "step": 245 }, { "epoch": 1.66, "eval_loss": 0.633696436882019, "eval_runtime": 39.2576, "eval_samples_per_second": 76.418, "eval_steps_per_second": 2.394, "step": 252 }, { "epoch": 1.71, "eval_loss": 0.6320024132728577, "eval_runtime": 39.2456, "eval_samples_per_second": 76.442, "eval_steps_per_second": 2.395, "step": 259 }, { "epoch": 1.72, "learning_rate": 0.00016722408026755855, "loss": 0.6535, "step": 260 }, { "epoch": 1.76, "eval_loss": 0.6302981972694397, "eval_runtime": 39.2723, "eval_samples_per_second": 76.39, "eval_steps_per_second": 2.394, "step": 266 }, { "epoch": 1.8, "eval_loss": 0.6285908818244934, "eval_runtime": 39.2745, "eval_samples_per_second": 76.385, "eval_steps_per_second": 2.393, "step": 273 }, { "epoch": 1.85, "learning_rate": 0.0001645484949832776, "loss": 0.6504, "step": 280 }, { "epoch": 1.85, "eval_loss": 0.6279519200325012, "eval_runtime": 39.1978, "eval_samples_per_second": 76.535, "eval_steps_per_second": 2.398, "step": 280 }, { "epoch": 1.89, "eval_loss": 0.6275761723518372, "eval_runtime": 39.2574, "eval_samples_per_second": 76.419, "eval_steps_per_second": 2.394, "step": 287 }, { "epoch": 1.94, "eval_loss": 0.6262693405151367, "eval_runtime": 39.2587, "eval_samples_per_second": 76.416, "eval_steps_per_second": 2.394, "step": 294 }, { "epoch": 1.98, "learning_rate": 0.00016187290969899666, "loss": 0.6447, "step": 300 }, { "epoch": 1.99, "eval_loss": 0.6255723237991333, "eval_runtime": 39.2593, "eval_samples_per_second": 76.415, "eval_steps_per_second": 2.394, "step": 301 }, { "epoch": 2.03, "eval_loss": 0.624893307685852, "eval_runtime": 39.2732, "eval_samples_per_second": 76.388, "eval_steps_per_second": 2.393, "step": 308 }, { "epoch": 2.08, "eval_loss": 0.6238787174224854, "eval_runtime": 39.2648, "eval_samples_per_second": 76.404, "eval_steps_per_second": 2.394, "step": 315 }, { "epoch": 2.11, "learning_rate": 0.00015919732441471574, "loss": 0.6418, "step": 320 }, { "epoch": 2.12, "eval_loss": 0.6227560043334961, "eval_runtime": 39.2517, "eval_samples_per_second": 76.43, "eval_steps_per_second": 2.395, "step": 322 }, { "epoch": 2.17, "eval_loss": 0.621408998966217, "eval_runtime": 39.2673, "eval_samples_per_second": 76.4, "eval_steps_per_second": 2.394, "step": 329 }, { "epoch": 2.22, "eval_loss": 0.6207154989242554, "eval_runtime": 39.2707, "eval_samples_per_second": 76.393, "eval_steps_per_second": 2.394, "step": 336 }, { "epoch": 2.24, "learning_rate": 0.0001565217391304348, "loss": 0.6294, "step": 340 }, { "epoch": 2.26, "eval_loss": 0.6207785606384277, "eval_runtime": 39.2687, "eval_samples_per_second": 76.397, "eval_steps_per_second": 2.394, "step": 343 }, { "epoch": 2.31, "eval_loss": 0.619699239730835, "eval_runtime": 39.2528, "eval_samples_per_second": 76.428, "eval_steps_per_second": 2.395, "step": 350 }, { "epoch": 2.36, "eval_loss": 0.6189907789230347, "eval_runtime": 39.2383, "eval_samples_per_second": 76.456, "eval_steps_per_second": 2.396, "step": 357 }, { "epoch": 2.38, "learning_rate": 0.00015384615384615385, "loss": 0.6323, "step": 360 }, { "epoch": 2.4, "eval_loss": 0.6188793182373047, "eval_runtime": 39.2507, "eval_samples_per_second": 76.432, "eval_steps_per_second": 2.395, "step": 364 }, { "epoch": 2.45, "eval_loss": 0.6180170774459839, "eval_runtime": 39.2506, "eval_samples_per_second": 76.432, "eval_steps_per_second": 2.395, "step": 371 }, { "epoch": 2.49, "eval_loss": 0.6175986528396606, "eval_runtime": 39.2493, "eval_samples_per_second": 76.434, "eval_steps_per_second": 2.395, "step": 378 }, { "epoch": 2.51, "learning_rate": 0.00015117056856187293, "loss": 0.6194, "step": 380 }, { "epoch": 2.54, "eval_loss": 0.6155608296394348, "eval_runtime": 39.2549, "eval_samples_per_second": 76.424, "eval_steps_per_second": 2.395, "step": 385 }, { "epoch": 2.59, "eval_loss": 0.6149768829345703, "eval_runtime": 39.2507, "eval_samples_per_second": 76.432, "eval_steps_per_second": 2.395, "step": 392 }, { "epoch": 2.63, "eval_loss": 0.614321768283844, "eval_runtime": 39.2607, "eval_samples_per_second": 76.412, "eval_steps_per_second": 2.394, "step": 399 }, { "epoch": 2.64, "learning_rate": 0.00014849498327759196, "loss": 0.6165, "step": 400 }, { "epoch": 2.68, "eval_loss": 0.6136913299560547, "eval_runtime": 39.256, "eval_samples_per_second": 76.422, "eval_steps_per_second": 2.395, "step": 406 }, { "epoch": 2.72, "eval_loss": 0.6127980351448059, "eval_runtime": 39.2695, "eval_samples_per_second": 76.395, "eval_steps_per_second": 2.394, "step": 413 }, { "epoch": 2.77, "learning_rate": 0.00014581939799331104, "loss": 0.6202, "step": 420 }, { "epoch": 2.77, "eval_loss": 0.6126558780670166, "eval_runtime": 39.2344, "eval_samples_per_second": 76.464, "eval_steps_per_second": 2.396, "step": 420 }, { "epoch": 2.82, "eval_loss": 0.6126319766044617, "eval_runtime": 39.2692, "eval_samples_per_second": 76.396, "eval_steps_per_second": 2.394, "step": 427 }, { "epoch": 2.86, "eval_loss": 0.6124591827392578, "eval_runtime": 39.2771, "eval_samples_per_second": 76.38, "eval_steps_per_second": 2.393, "step": 434 }, { "epoch": 2.9, "learning_rate": 0.0001431438127090301, "loss": 0.6186, "step": 440 }, { "epoch": 2.91, "eval_loss": 0.6117784976959229, "eval_runtime": 39.2494, "eval_samples_per_second": 76.434, "eval_steps_per_second": 2.395, "step": 441 }, { "epoch": 2.96, "eval_loss": 0.6105948090553284, "eval_runtime": 39.2716, "eval_samples_per_second": 76.391, "eval_steps_per_second": 2.394, "step": 448 }, { "epoch": 3.0, "eval_loss": 0.6107361912727356, "eval_runtime": 39.2828, "eval_samples_per_second": 76.369, "eval_steps_per_second": 2.393, "step": 455 }, { "epoch": 3.04, "learning_rate": 0.00014046822742474916, "loss": 0.6165, "step": 460 }, { "epoch": 3.05, "eval_loss": 0.6106633543968201, "eval_runtime": 39.2701, "eval_samples_per_second": 76.394, "eval_steps_per_second": 2.394, "step": 462 }, { "epoch": 3.09, "eval_loss": 0.6104211807250977, "eval_runtime": 39.2794, "eval_samples_per_second": 76.376, "eval_steps_per_second": 2.393, "step": 469 }, { "epoch": 3.14, "eval_loss": 0.611173152923584, "eval_runtime": 39.2596, "eval_samples_per_second": 76.415, "eval_steps_per_second": 2.394, "step": 476 }, { "epoch": 3.17, "learning_rate": 0.00013779264214046824, "loss": 0.6021, "step": 480 }, { "epoch": 3.19, "eval_loss": 0.6094884276390076, "eval_runtime": 39.2429, "eval_samples_per_second": 76.447, "eval_steps_per_second": 2.395, "step": 483 }, { "epoch": 3.23, "eval_loss": 0.6093204617500305, "eval_runtime": 39.278, "eval_samples_per_second": 76.379, "eval_steps_per_second": 2.393, "step": 490 }, { "epoch": 3.28, "eval_loss": 0.60869961977005, "eval_runtime": 39.269, "eval_samples_per_second": 76.396, "eval_steps_per_second": 2.394, "step": 497 }, { "epoch": 3.3, "learning_rate": 0.0001351170568561873, "loss": 0.6057, "step": 500 }, { "epoch": 3.33, "eval_loss": 0.6093556880950928, "eval_runtime": 39.2597, "eval_samples_per_second": 76.414, "eval_steps_per_second": 2.394, "step": 504 }, { "epoch": 3.37, "eval_loss": 0.6078519821166992, "eval_runtime": 39.2561, "eval_samples_per_second": 76.421, "eval_steps_per_second": 2.395, "step": 511 }, { "epoch": 3.42, "eval_loss": 0.6079010367393494, "eval_runtime": 39.2536, "eval_samples_per_second": 76.426, "eval_steps_per_second": 2.395, "step": 518 }, { "epoch": 3.43, "learning_rate": 0.00013244147157190635, "loss": 0.598, "step": 520 }, { "epoch": 3.46, "eval_loss": 0.6074483394622803, "eval_runtime": 39.2832, "eval_samples_per_second": 76.369, "eval_steps_per_second": 2.393, "step": 525 }, { "epoch": 3.51, "eval_loss": 0.6073596477508545, "eval_runtime": 39.2701, "eval_samples_per_second": 76.394, "eval_steps_per_second": 2.394, "step": 532 }, { "epoch": 3.56, "eval_loss": 0.606430172920227, "eval_runtime": 39.2546, "eval_samples_per_second": 76.424, "eval_steps_per_second": 2.395, "step": 539 }, { "epoch": 3.56, "learning_rate": 0.00012976588628762543, "loss": 0.5948, "step": 540 }, { "epoch": 3.6, "eval_loss": 0.6060574650764465, "eval_runtime": 39.2503, "eval_samples_per_second": 76.433, "eval_steps_per_second": 2.395, "step": 546 }, { "epoch": 3.65, "eval_loss": 0.6067923307418823, "eval_runtime": 39.2654, "eval_samples_per_second": 76.403, "eval_steps_per_second": 2.394, "step": 553 }, { "epoch": 3.69, "learning_rate": 0.0001270903010033445, "loss": 0.5962, "step": 560 }, { "epoch": 3.69, "eval_loss": 0.6042212843894958, "eval_runtime": 39.2032, "eval_samples_per_second": 76.524, "eval_steps_per_second": 2.398, "step": 560 }, { "epoch": 3.74, "eval_loss": 0.6041299700737, "eval_runtime": 39.2396, "eval_samples_per_second": 76.453, "eval_steps_per_second": 2.396, "step": 567 }, { "epoch": 3.79, "eval_loss": 0.6047356128692627, "eval_runtime": 39.274, "eval_samples_per_second": 76.386, "eval_steps_per_second": 2.393, "step": 574 }, { "epoch": 3.83, "learning_rate": 0.00012441471571906357, "loss": 0.5977, "step": 580 }, { "epoch": 3.83, "eval_loss": 0.6040154099464417, "eval_runtime": 39.2677, "eval_samples_per_second": 76.399, "eval_steps_per_second": 2.394, "step": 581 }, { "epoch": 3.88, "eval_loss": 0.603416383266449, "eval_runtime": 39.2621, "eval_samples_per_second": 76.41, "eval_steps_per_second": 2.394, "step": 588 }, { "epoch": 3.93, "eval_loss": 0.6036480069160461, "eval_runtime": 39.2609, "eval_samples_per_second": 76.412, "eval_steps_per_second": 2.394, "step": 595 }, { "epoch": 3.96, "learning_rate": 0.00012173913043478263, "loss": 0.5903, "step": 600 }, { "epoch": 3.97, "eval_loss": 0.6035267114639282, "eval_runtime": 39.2828, "eval_samples_per_second": 76.369, "eval_steps_per_second": 2.393, "step": 602 }, { "epoch": 4.02, "eval_loss": 0.6025964617729187, "eval_runtime": 39.2634, "eval_samples_per_second": 76.407, "eval_steps_per_second": 2.394, "step": 609 }, { "epoch": 4.06, "eval_loss": 0.6028868556022644, "eval_runtime": 39.2591, "eval_samples_per_second": 76.415, "eval_steps_per_second": 2.394, "step": 616 }, { "epoch": 4.09, "learning_rate": 0.0001190635451505017, "loss": 0.5927, "step": 620 }, { "epoch": 4.11, "eval_loss": 0.6027114391326904, "eval_runtime": 39.2648, "eval_samples_per_second": 76.404, "eval_steps_per_second": 2.394, "step": 623 }, { "epoch": 4.16, "eval_loss": 0.6030986905097961, "eval_runtime": 39.2746, "eval_samples_per_second": 76.385, "eval_steps_per_second": 2.393, "step": 630 }, { "epoch": 4.2, "eval_loss": 0.6026434898376465, "eval_runtime": 39.2646, "eval_samples_per_second": 76.405, "eval_steps_per_second": 2.394, "step": 637 }, { "epoch": 4.22, "learning_rate": 0.00011638795986622074, "loss": 0.581, "step": 640 }, { "epoch": 4.25, "eval_loss": 0.6008206009864807, "eval_runtime": 39.2718, "eval_samples_per_second": 76.391, "eval_steps_per_second": 2.394, "step": 644 }, { "epoch": 4.3, "eval_loss": 0.6018855571746826, "eval_runtime": 39.2587, "eval_samples_per_second": 76.416, "eval_steps_per_second": 2.394, "step": 651 }, { "epoch": 4.34, "eval_loss": 0.6018174886703491, "eval_runtime": 39.2445, "eval_samples_per_second": 76.444, "eval_steps_per_second": 2.395, "step": 658 }, { "epoch": 4.35, "learning_rate": 0.00011371237458193979, "loss": 0.5965, "step": 660 }, { "epoch": 4.39, "eval_loss": 0.6006762981414795, "eval_runtime": 39.2498, "eval_samples_per_second": 76.433, "eval_steps_per_second": 2.395, "step": 665 }, { "epoch": 4.43, "eval_loss": 0.6006374359130859, "eval_runtime": 39.2758, "eval_samples_per_second": 76.383, "eval_steps_per_second": 2.393, "step": 672 }, { "epoch": 4.48, "eval_loss": 0.5997828245162964, "eval_runtime": 39.2794, "eval_samples_per_second": 76.376, "eval_steps_per_second": 2.393, "step": 679 }, { "epoch": 4.49, "learning_rate": 0.00011103678929765886, "loss": 0.5896, "step": 680 }, { "epoch": 4.53, "eval_loss": 0.6000981330871582, "eval_runtime": 39.2629, "eval_samples_per_second": 76.408, "eval_steps_per_second": 2.394, "step": 686 }, { "epoch": 4.57, "eval_loss": 0.5991115570068359, "eval_runtime": 39.2774, "eval_samples_per_second": 76.38, "eval_steps_per_second": 2.393, "step": 693 }, { "epoch": 4.62, "learning_rate": 0.00010836120401337793, "loss": 0.5854, "step": 700 }, { "epoch": 4.62, "eval_loss": 0.6001954674720764, "eval_runtime": 39.2333, "eval_samples_per_second": 76.466, "eval_steps_per_second": 2.396, "step": 700 }, { "epoch": 4.66, "eval_loss": 0.6007575988769531, "eval_runtime": 39.2801, "eval_samples_per_second": 76.374, "eval_steps_per_second": 2.393, "step": 707 }, { "epoch": 4.71, "eval_loss": 0.5983864068984985, "eval_runtime": 39.2469, "eval_samples_per_second": 76.439, "eval_steps_per_second": 2.395, "step": 714 }, { "epoch": 4.75, "learning_rate": 0.00010568561872909698, "loss": 0.5844, "step": 720 }, { "epoch": 4.76, "eval_loss": 0.5985506772994995, "eval_runtime": 39.2426, "eval_samples_per_second": 76.448, "eval_steps_per_second": 2.395, "step": 721 }, { "epoch": 4.8, "eval_loss": 0.5978309512138367, "eval_runtime": 39.2604, "eval_samples_per_second": 76.413, "eval_steps_per_second": 2.394, "step": 728 }, { "epoch": 4.85, "eval_loss": 0.5981310606002808, "eval_runtime": 39.2686, "eval_samples_per_second": 76.397, "eval_steps_per_second": 2.394, "step": 735 }, { "epoch": 4.88, "learning_rate": 0.00010301003344481605, "loss": 0.5784, "step": 740 }, { "epoch": 4.9, "eval_loss": 0.5985335111618042, "eval_runtime": 39.2557, "eval_samples_per_second": 76.422, "eval_steps_per_second": 2.395, "step": 742 }, { "epoch": 4.94, "eval_loss": 0.5975944995880127, "eval_runtime": 39.2644, "eval_samples_per_second": 76.405, "eval_steps_per_second": 2.394, "step": 749 }, { "epoch": 4.99, "eval_loss": 0.596754252910614, "eval_runtime": 39.2365, "eval_samples_per_second": 76.459, "eval_steps_per_second": 2.396, "step": 756 }, { "epoch": 5.01, "learning_rate": 0.00010033444816053512, "loss": 0.5825, "step": 760 }, { "epoch": 5.03, "eval_loss": 0.5977214574813843, "eval_runtime": 39.235, "eval_samples_per_second": 76.462, "eval_steps_per_second": 2.396, "step": 763 }, { "epoch": 5.08, "eval_loss": 0.5982287526130676, "eval_runtime": 39.2483, "eval_samples_per_second": 76.436, "eval_steps_per_second": 2.395, "step": 770 }, { "epoch": 5.13, "eval_loss": 0.5973477959632874, "eval_runtime": 39.2692, "eval_samples_per_second": 76.396, "eval_steps_per_second": 2.394, "step": 777 }, { "epoch": 5.15, "learning_rate": 9.765886287625419e-05, "loss": 0.5724, "step": 780 }, { "epoch": 5.17, "eval_loss": 0.598833441734314, "eval_runtime": 39.2608, "eval_samples_per_second": 76.412, "eval_steps_per_second": 2.394, "step": 784 }, { "epoch": 5.22, "eval_loss": 0.5973609089851379, "eval_runtime": 39.2557, "eval_samples_per_second": 76.422, "eval_steps_per_second": 2.395, "step": 791 }, { "epoch": 5.27, "eval_loss": 0.5983055233955383, "eval_runtime": 39.2613, "eval_samples_per_second": 76.411, "eval_steps_per_second": 2.394, "step": 798 }, { "epoch": 5.28, "learning_rate": 9.498327759197325e-05, "loss": 0.5765, "step": 800 }, { "epoch": 5.31, "eval_loss": 0.597219705581665, "eval_runtime": 39.2532, "eval_samples_per_second": 76.427, "eval_steps_per_second": 2.395, "step": 805 }, { "epoch": 5.36, "eval_loss": 0.5974920392036438, "eval_runtime": 39.2428, "eval_samples_per_second": 76.447, "eval_steps_per_second": 2.395, "step": 812 }, { "epoch": 5.4, "eval_loss": 0.5970295667648315, "eval_runtime": 39.2255, "eval_samples_per_second": 76.481, "eval_steps_per_second": 2.396, "step": 819 }, { "epoch": 5.41, "learning_rate": 9.230769230769232e-05, "loss": 0.5662, "step": 820 }, { "epoch": 5.45, "eval_loss": 0.5995200872421265, "eval_runtime": 39.2763, "eval_samples_per_second": 76.382, "eval_steps_per_second": 2.393, "step": 826 }, { "epoch": 5.5, "eval_loss": 0.5961365699768066, "eval_runtime": 39.2442, "eval_samples_per_second": 76.444, "eval_steps_per_second": 2.395, "step": 833 }, { "epoch": 5.54, "learning_rate": 8.963210702341137e-05, "loss": 0.5594, "step": 840 }, { "epoch": 5.54, "eval_loss": 0.5958811640739441, "eval_runtime": 39.224, "eval_samples_per_second": 76.484, "eval_steps_per_second": 2.396, "step": 840 }, { "epoch": 5.59, "eval_loss": 0.5974062085151672, "eval_runtime": 39.2479, "eval_samples_per_second": 76.437, "eval_steps_per_second": 2.395, "step": 847 }, { "epoch": 5.63, "eval_loss": 0.5959305167198181, "eval_runtime": 39.1122, "eval_samples_per_second": 76.702, "eval_steps_per_second": 2.403, "step": 854 }, { "epoch": 5.67, "learning_rate": 8.695652173913044e-05, "loss": 0.5569, "step": 860 }, { "epoch": 5.68, "eval_loss": 0.597082257270813, "eval_runtime": 39.2419, "eval_samples_per_second": 76.449, "eval_steps_per_second": 2.395, "step": 861 }, { "epoch": 5.73, "eval_loss": 0.5964935421943665, "eval_runtime": 39.2482, "eval_samples_per_second": 76.437, "eval_steps_per_second": 2.395, "step": 868 }, { "epoch": 5.77, "eval_loss": 0.596628725528717, "eval_runtime": 39.2684, "eval_samples_per_second": 76.397, "eval_steps_per_second": 2.394, "step": 875 }, { "epoch": 5.81, "learning_rate": 8.42809364548495e-05, "loss": 0.5711, "step": 880 }, { "epoch": 5.82, "eval_loss": 0.596688449382782, "eval_runtime": 39.262, "eval_samples_per_second": 76.41, "eval_steps_per_second": 2.394, "step": 882 }, { "epoch": 5.87, "eval_loss": 0.5974501967430115, "eval_runtime": 39.2621, "eval_samples_per_second": 76.409, "eval_steps_per_second": 2.394, "step": 889 }, { "epoch": 5.91, "eval_loss": 0.5951861143112183, "eval_runtime": 39.2622, "eval_samples_per_second": 76.409, "eval_steps_per_second": 2.394, "step": 896 }, { "epoch": 5.94, "learning_rate": 8.160535117056857e-05, "loss": 0.5703, "step": 900 }, { "epoch": 5.96, "eval_loss": 0.5963322520256042, "eval_runtime": 39.2656, "eval_samples_per_second": 76.403, "eval_steps_per_second": 2.394, "step": 903 }, { "epoch": 6.0, "eval_loss": 0.5958115458488464, "eval_runtime": 39.2804, "eval_samples_per_second": 76.374, "eval_steps_per_second": 2.393, "step": 910 }, { "epoch": 6.05, "eval_loss": 0.5968443155288696, "eval_runtime": 39.2618, "eval_samples_per_second": 76.41, "eval_steps_per_second": 2.394, "step": 917 }, { "epoch": 6.07, "learning_rate": 7.892976588628763e-05, "loss": 0.5551, "step": 920 }, { "epoch": 6.1, "eval_loss": 0.5958288311958313, "eval_runtime": 39.2648, "eval_samples_per_second": 76.404, "eval_steps_per_second": 2.394, "step": 924 }, { "epoch": 6.14, "eval_loss": 0.5968209505081177, "eval_runtime": 39.2563, "eval_samples_per_second": 76.421, "eval_steps_per_second": 2.395, "step": 931 }, { "epoch": 6.19, "eval_loss": 0.5957658886909485, "eval_runtime": 39.2499, "eval_samples_per_second": 76.433, "eval_steps_per_second": 2.395, "step": 938 }, { "epoch": 6.2, "learning_rate": 7.62541806020067e-05, "loss": 0.5636, "step": 940 }, { "epoch": 6.24, "eval_loss": 0.5955784916877747, "eval_runtime": 39.279, "eval_samples_per_second": 76.377, "eval_steps_per_second": 2.393, "step": 945 }, { "epoch": 6.28, "eval_loss": 0.5963084101676941, "eval_runtime": 39.2656, "eval_samples_per_second": 76.403, "eval_steps_per_second": 2.394, "step": 952 }, { "epoch": 6.33, "eval_loss": 0.595792829990387, "eval_runtime": 39.2577, "eval_samples_per_second": 76.418, "eval_steps_per_second": 2.394, "step": 959 }, { "epoch": 6.33, "learning_rate": 7.357859531772575e-05, "loss": 0.5676, "step": 960 }, { "epoch": 6.37, "eval_loss": 0.5953949093818665, "eval_runtime": 39.2554, "eval_samples_per_second": 76.423, "eval_steps_per_second": 2.395, "step": 966 }, { "epoch": 6.42, "eval_loss": 0.595146894454956, "eval_runtime": 39.2386, "eval_samples_per_second": 76.455, "eval_steps_per_second": 2.396, "step": 973 }, { "epoch": 6.47, "learning_rate": 7.090301003344481e-05, "loss": 0.5551, "step": 980 }, { "epoch": 6.47, "eval_loss": 0.5957517027854919, "eval_runtime": 39.197, "eval_samples_per_second": 76.536, "eval_steps_per_second": 2.398, "step": 980 }, { "epoch": 6.51, "eval_loss": 0.596603512763977, "eval_runtime": 39.2315, "eval_samples_per_second": 76.469, "eval_steps_per_second": 2.396, "step": 987 }, { "epoch": 6.56, "eval_loss": 0.5952173471450806, "eval_runtime": 39.2393, "eval_samples_per_second": 76.454, "eval_steps_per_second": 2.396, "step": 994 }, { "epoch": 6.6, "learning_rate": 6.822742474916388e-05, "loss": 0.5539, "step": 1000 }, { "epoch": 6.6, "eval_loss": 0.5954132676124573, "eval_runtime": 39.2213, "eval_samples_per_second": 76.489, "eval_steps_per_second": 2.397, "step": 1001 }, { "epoch": 6.65, "eval_loss": 0.5956953167915344, "eval_runtime": 39.2503, "eval_samples_per_second": 76.432, "eval_steps_per_second": 2.395, "step": 1008 }, { "epoch": 6.7, "eval_loss": 0.5959665775299072, "eval_runtime": 39.2657, "eval_samples_per_second": 76.403, "eval_steps_per_second": 2.394, "step": 1015 }, { "epoch": 6.73, "learning_rate": 6.555183946488295e-05, "loss": 0.5607, "step": 1020 }, { "epoch": 6.74, "eval_loss": 0.5952425003051758, "eval_runtime": 39.2705, "eval_samples_per_second": 76.393, "eval_steps_per_second": 2.394, "step": 1022 }, { "epoch": 6.79, "eval_loss": 0.5953785181045532, "eval_runtime": 39.2403, "eval_samples_per_second": 76.452, "eval_steps_per_second": 2.395, "step": 1029 }, { "epoch": 6.84, "eval_loss": 0.5939880609512329, "eval_runtime": 39.2586, "eval_samples_per_second": 76.416, "eval_steps_per_second": 2.394, "step": 1036 } ], "max_steps": 1510, "num_train_epochs": 10, "total_flos": 9.026762224110141e+18, "trial_name": null, "trial_params": null }