|
{ |
|
"best_metric": 0.5939880609512329, |
|
"best_model_checkpoint": "/mnt/data1/sheshuaijie/Output/CoT/Trained/vicuna-13b_english-cot+auto-cot_0.0002/lora/checkpoint-1036", |
|
"epoch": 6.835463917525773, |
|
"global_step": 1036, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.7208857536315918, |
|
"eval_runtime": 39.046, |
|
"eval_samples_per_second": 76.832, |
|
"eval_steps_per_second": 2.407, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.3302656412124634, |
|
"eval_runtime": 39.1446, |
|
"eval_samples_per_second": 76.639, |
|
"eval_steps_per_second": 2.401, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019933110367892977, |
|
"loss": 1.607, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.0993696451187134, |
|
"eval_runtime": 39.2624, |
|
"eval_samples_per_second": 76.409, |
|
"eval_steps_per_second": 2.394, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 0.9883869886398315, |
|
"eval_runtime": 39.2607, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 0.9121341109275818, |
|
"eval_runtime": 39.2818, |
|
"eval_samples_per_second": 76.371, |
|
"eval_steps_per_second": 2.393, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019665551839464883, |
|
"loss": 1.0077, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.8665392398834229, |
|
"eval_runtime": 39.261, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.8299428820610046, |
|
"eval_runtime": 39.2723, |
|
"eval_samples_per_second": 76.39, |
|
"eval_steps_per_second": 2.394, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 0.7965301275253296, |
|
"eval_runtime": 39.2718, |
|
"eval_samples_per_second": 76.391, |
|
"eval_steps_per_second": 2.394, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001939799331103679, |
|
"loss": 0.8626, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.7661889791488647, |
|
"eval_runtime": 39.2752, |
|
"eval_samples_per_second": 76.384, |
|
"eval_steps_per_second": 2.393, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.744417130947113, |
|
"eval_runtime": 39.2899, |
|
"eval_samples_per_second": 76.355, |
|
"eval_steps_per_second": 2.392, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.728394627571106, |
|
"eval_runtime": 39.298, |
|
"eval_samples_per_second": 76.34, |
|
"eval_steps_per_second": 2.392, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019130434782608697, |
|
"loss": 0.7683, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.7151542901992798, |
|
"eval_runtime": 39.272, |
|
"eval_samples_per_second": 76.39, |
|
"eval_steps_per_second": 2.394, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.7049417495727539, |
|
"eval_runtime": 39.2657, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.6961150765419006, |
|
"eval_runtime": 39.2274, |
|
"eval_samples_per_second": 76.477, |
|
"eval_steps_per_second": 2.396, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018862876254180605, |
|
"loss": 0.7346, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.6891586780548096, |
|
"eval_runtime": 39.2698, |
|
"eval_samples_per_second": 76.395, |
|
"eval_steps_per_second": 2.394, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.6833620667457581, |
|
"eval_runtime": 39.2474, |
|
"eval_samples_per_second": 76.438, |
|
"eval_steps_per_second": 2.395, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.678981602191925, |
|
"eval_runtime": 39.2363, |
|
"eval_samples_per_second": 76.46, |
|
"eval_steps_per_second": 2.396, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001859531772575251, |
|
"loss": 0.7095, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.6739740967750549, |
|
"eval_runtime": 39.2467, |
|
"eval_samples_per_second": 76.439, |
|
"eval_steps_per_second": 2.395, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 0.6704814434051514, |
|
"eval_runtime": 39.2828, |
|
"eval_samples_per_second": 76.369, |
|
"eval_steps_per_second": 2.393, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00018327759197324413, |
|
"loss": 0.6989, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 0.6668062210083008, |
|
"eval_runtime": 39.1861, |
|
"eval_samples_per_second": 76.558, |
|
"eval_steps_per_second": 2.399, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.6635003089904785, |
|
"eval_runtime": 39.2627, |
|
"eval_samples_per_second": 76.408, |
|
"eval_steps_per_second": 2.394, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.6594184637069702, |
|
"eval_runtime": 39.2634, |
|
"eval_samples_per_second": 76.407, |
|
"eval_steps_per_second": 2.394, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00018060200668896322, |
|
"loss": 0.6753, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 0.656818151473999, |
|
"eval_runtime": 39.2093, |
|
"eval_samples_per_second": 76.513, |
|
"eval_steps_per_second": 2.397, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 0.6542237401008606, |
|
"eval_runtime": 39.2619, |
|
"eval_samples_per_second": 76.41, |
|
"eval_steps_per_second": 2.394, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.6509793400764465, |
|
"eval_runtime": 39.2795, |
|
"eval_samples_per_second": 76.376, |
|
"eval_steps_per_second": 2.393, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00017792642140468227, |
|
"loss": 0.6742, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 0.6501123905181885, |
|
"eval_runtime": 39.2889, |
|
"eval_samples_per_second": 76.357, |
|
"eval_steps_per_second": 2.393, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.6488311290740967, |
|
"eval_runtime": 39.2821, |
|
"eval_samples_per_second": 76.371, |
|
"eval_steps_per_second": 2.393, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.6458473205566406, |
|
"eval_runtime": 39.2749, |
|
"eval_samples_per_second": 76.385, |
|
"eval_steps_per_second": 2.393, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00017525083612040135, |
|
"loss": 0.6727, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 0.6445983648300171, |
|
"eval_runtime": 39.2655, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.6414983868598938, |
|
"eval_runtime": 39.2575, |
|
"eval_samples_per_second": 76.418, |
|
"eval_steps_per_second": 2.394, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 0.6403743624687195, |
|
"eval_runtime": 39.2601, |
|
"eval_samples_per_second": 76.413, |
|
"eval_steps_per_second": 2.394, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001725752508361204, |
|
"loss": 0.6651, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.6375772953033447, |
|
"eval_runtime": 39.2616, |
|
"eval_samples_per_second": 76.411, |
|
"eval_steps_per_second": 2.394, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 0.6363030076026917, |
|
"eval_runtime": 39.2685, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.6365154981613159, |
|
"eval_runtime": 39.2615, |
|
"eval_samples_per_second": 76.411, |
|
"eval_steps_per_second": 2.394, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00016989966555183946, |
|
"loss": 0.6569, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.6351213455200195, |
|
"eval_runtime": 39.2374, |
|
"eval_samples_per_second": 76.458, |
|
"eval_steps_per_second": 2.396, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.633696436882019, |
|
"eval_runtime": 39.2576, |
|
"eval_samples_per_second": 76.418, |
|
"eval_steps_per_second": 2.394, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.6320024132728577, |
|
"eval_runtime": 39.2456, |
|
"eval_samples_per_second": 76.442, |
|
"eval_steps_per_second": 2.395, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00016722408026755855, |
|
"loss": 0.6535, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.6302981972694397, |
|
"eval_runtime": 39.2723, |
|
"eval_samples_per_second": 76.39, |
|
"eval_steps_per_second": 2.394, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.6285908818244934, |
|
"eval_runtime": 39.2745, |
|
"eval_samples_per_second": 76.385, |
|
"eval_steps_per_second": 2.393, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001645484949832776, |
|
"loss": 0.6504, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 0.6279519200325012, |
|
"eval_runtime": 39.1978, |
|
"eval_samples_per_second": 76.535, |
|
"eval_steps_per_second": 2.398, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.6275761723518372, |
|
"eval_runtime": 39.2574, |
|
"eval_samples_per_second": 76.419, |
|
"eval_steps_per_second": 2.394, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.6262693405151367, |
|
"eval_runtime": 39.2587, |
|
"eval_samples_per_second": 76.416, |
|
"eval_steps_per_second": 2.394, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00016187290969899666, |
|
"loss": 0.6447, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 0.6255723237991333, |
|
"eval_runtime": 39.2593, |
|
"eval_samples_per_second": 76.415, |
|
"eval_steps_per_second": 2.394, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.624893307685852, |
|
"eval_runtime": 39.2732, |
|
"eval_samples_per_second": 76.388, |
|
"eval_steps_per_second": 2.393, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.6238787174224854, |
|
"eval_runtime": 39.2648, |
|
"eval_samples_per_second": 76.404, |
|
"eval_steps_per_second": 2.394, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00015919732441471574, |
|
"loss": 0.6418, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 0.6227560043334961, |
|
"eval_runtime": 39.2517, |
|
"eval_samples_per_second": 76.43, |
|
"eval_steps_per_second": 2.395, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 0.621408998966217, |
|
"eval_runtime": 39.2673, |
|
"eval_samples_per_second": 76.4, |
|
"eval_steps_per_second": 2.394, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 0.6207154989242554, |
|
"eval_runtime": 39.2707, |
|
"eval_samples_per_second": 76.393, |
|
"eval_steps_per_second": 2.394, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0001565217391304348, |
|
"loss": 0.6294, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 0.6207785606384277, |
|
"eval_runtime": 39.2687, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.619699239730835, |
|
"eval_runtime": 39.2528, |
|
"eval_samples_per_second": 76.428, |
|
"eval_steps_per_second": 2.395, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.6189907789230347, |
|
"eval_runtime": 39.2383, |
|
"eval_samples_per_second": 76.456, |
|
"eval_steps_per_second": 2.396, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 0.6323, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.6188793182373047, |
|
"eval_runtime": 39.2507, |
|
"eval_samples_per_second": 76.432, |
|
"eval_steps_per_second": 2.395, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 0.6180170774459839, |
|
"eval_runtime": 39.2506, |
|
"eval_samples_per_second": 76.432, |
|
"eval_steps_per_second": 2.395, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 0.6175986528396606, |
|
"eval_runtime": 39.2493, |
|
"eval_samples_per_second": 76.434, |
|
"eval_steps_per_second": 2.395, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00015117056856187293, |
|
"loss": 0.6194, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 0.6155608296394348, |
|
"eval_runtime": 39.2549, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 2.395, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 0.6149768829345703, |
|
"eval_runtime": 39.2507, |
|
"eval_samples_per_second": 76.432, |
|
"eval_steps_per_second": 2.395, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.614321768283844, |
|
"eval_runtime": 39.2607, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00014849498327759196, |
|
"loss": 0.6165, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 0.6136913299560547, |
|
"eval_runtime": 39.256, |
|
"eval_samples_per_second": 76.422, |
|
"eval_steps_per_second": 2.395, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.6127980351448059, |
|
"eval_runtime": 39.2695, |
|
"eval_samples_per_second": 76.395, |
|
"eval_steps_per_second": 2.394, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00014581939799331104, |
|
"loss": 0.6202, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 0.6126558780670166, |
|
"eval_runtime": 39.2344, |
|
"eval_samples_per_second": 76.464, |
|
"eval_steps_per_second": 2.396, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 0.6126319766044617, |
|
"eval_runtime": 39.2692, |
|
"eval_samples_per_second": 76.396, |
|
"eval_steps_per_second": 2.394, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 0.6124591827392578, |
|
"eval_runtime": 39.2771, |
|
"eval_samples_per_second": 76.38, |
|
"eval_steps_per_second": 2.393, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0001431438127090301, |
|
"loss": 0.6186, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 0.6117784976959229, |
|
"eval_runtime": 39.2494, |
|
"eval_samples_per_second": 76.434, |
|
"eval_steps_per_second": 2.395, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.6105948090553284, |
|
"eval_runtime": 39.2716, |
|
"eval_samples_per_second": 76.391, |
|
"eval_steps_per_second": 2.394, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.6107361912727356, |
|
"eval_runtime": 39.2828, |
|
"eval_samples_per_second": 76.369, |
|
"eval_steps_per_second": 2.393, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00014046822742474916, |
|
"loss": 0.6165, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 0.6106633543968201, |
|
"eval_runtime": 39.2701, |
|
"eval_samples_per_second": 76.394, |
|
"eval_steps_per_second": 2.394, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_loss": 0.6104211807250977, |
|
"eval_runtime": 39.2794, |
|
"eval_samples_per_second": 76.376, |
|
"eval_steps_per_second": 2.393, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 0.611173152923584, |
|
"eval_runtime": 39.2596, |
|
"eval_samples_per_second": 76.415, |
|
"eval_steps_per_second": 2.394, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00013779264214046824, |
|
"loss": 0.6021, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 0.6094884276390076, |
|
"eval_runtime": 39.2429, |
|
"eval_samples_per_second": 76.447, |
|
"eval_steps_per_second": 2.395, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 0.6093204617500305, |
|
"eval_runtime": 39.278, |
|
"eval_samples_per_second": 76.379, |
|
"eval_steps_per_second": 2.393, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 0.60869961977005, |
|
"eval_runtime": 39.269, |
|
"eval_samples_per_second": 76.396, |
|
"eval_steps_per_second": 2.394, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0001351170568561873, |
|
"loss": 0.6057, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.6093556880950928, |
|
"eval_runtime": 39.2597, |
|
"eval_samples_per_second": 76.414, |
|
"eval_steps_per_second": 2.394, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 0.6078519821166992, |
|
"eval_runtime": 39.2561, |
|
"eval_samples_per_second": 76.421, |
|
"eval_steps_per_second": 2.395, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 0.6079010367393494, |
|
"eval_runtime": 39.2536, |
|
"eval_samples_per_second": 76.426, |
|
"eval_steps_per_second": 2.395, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00013244147157190635, |
|
"loss": 0.598, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 0.6074483394622803, |
|
"eval_runtime": 39.2832, |
|
"eval_samples_per_second": 76.369, |
|
"eval_steps_per_second": 2.393, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 0.6073596477508545, |
|
"eval_runtime": 39.2701, |
|
"eval_samples_per_second": 76.394, |
|
"eval_steps_per_second": 2.394, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 0.606430172920227, |
|
"eval_runtime": 39.2546, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 2.395, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00012976588628762543, |
|
"loss": 0.5948, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 0.6060574650764465, |
|
"eval_runtime": 39.2503, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 2.395, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 0.6067923307418823, |
|
"eval_runtime": 39.2654, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0001270903010033445, |
|
"loss": 0.5962, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 0.6042212843894958, |
|
"eval_runtime": 39.2032, |
|
"eval_samples_per_second": 76.524, |
|
"eval_steps_per_second": 2.398, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 0.6041299700737, |
|
"eval_runtime": 39.2396, |
|
"eval_samples_per_second": 76.453, |
|
"eval_steps_per_second": 2.396, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_loss": 0.6047356128692627, |
|
"eval_runtime": 39.274, |
|
"eval_samples_per_second": 76.386, |
|
"eval_steps_per_second": 2.393, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00012441471571906357, |
|
"loss": 0.5977, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_loss": 0.6040154099464417, |
|
"eval_runtime": 39.2677, |
|
"eval_samples_per_second": 76.399, |
|
"eval_steps_per_second": 2.394, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_loss": 0.603416383266449, |
|
"eval_runtime": 39.2621, |
|
"eval_samples_per_second": 76.41, |
|
"eval_steps_per_second": 2.394, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 0.6036480069160461, |
|
"eval_runtime": 39.2609, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.00012173913043478263, |
|
"loss": 0.5903, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 0.6035267114639282, |
|
"eval_runtime": 39.2828, |
|
"eval_samples_per_second": 76.369, |
|
"eval_steps_per_second": 2.393, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 0.6025964617729187, |
|
"eval_runtime": 39.2634, |
|
"eval_samples_per_second": 76.407, |
|
"eval_steps_per_second": 2.394, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_loss": 0.6028868556022644, |
|
"eval_runtime": 39.2591, |
|
"eval_samples_per_second": 76.415, |
|
"eval_steps_per_second": 2.394, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0001190635451505017, |
|
"loss": 0.5927, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 0.6027114391326904, |
|
"eval_runtime": 39.2648, |
|
"eval_samples_per_second": 76.404, |
|
"eval_steps_per_second": 2.394, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_loss": 0.6030986905097961, |
|
"eval_runtime": 39.2746, |
|
"eval_samples_per_second": 76.385, |
|
"eval_steps_per_second": 2.393, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 0.6026434898376465, |
|
"eval_runtime": 39.2646, |
|
"eval_samples_per_second": 76.405, |
|
"eval_steps_per_second": 2.394, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.00011638795986622074, |
|
"loss": 0.581, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_loss": 0.6008206009864807, |
|
"eval_runtime": 39.2718, |
|
"eval_samples_per_second": 76.391, |
|
"eval_steps_per_second": 2.394, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_loss": 0.6018855571746826, |
|
"eval_runtime": 39.2587, |
|
"eval_samples_per_second": 76.416, |
|
"eval_steps_per_second": 2.394, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"eval_loss": 0.6018174886703491, |
|
"eval_runtime": 39.2445, |
|
"eval_samples_per_second": 76.444, |
|
"eval_steps_per_second": 2.395, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00011371237458193979, |
|
"loss": 0.5965, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 0.6006762981414795, |
|
"eval_runtime": 39.2498, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 2.395, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 0.6006374359130859, |
|
"eval_runtime": 39.2758, |
|
"eval_samples_per_second": 76.383, |
|
"eval_steps_per_second": 2.393, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 0.5997828245162964, |
|
"eval_runtime": 39.2794, |
|
"eval_samples_per_second": 76.376, |
|
"eval_steps_per_second": 2.393, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00011103678929765886, |
|
"loss": 0.5896, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_loss": 0.6000981330871582, |
|
"eval_runtime": 39.2629, |
|
"eval_samples_per_second": 76.408, |
|
"eval_steps_per_second": 2.394, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 0.5991115570068359, |
|
"eval_runtime": 39.2774, |
|
"eval_samples_per_second": 76.38, |
|
"eval_steps_per_second": 2.393, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00010836120401337793, |
|
"loss": 0.5854, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"eval_loss": 0.6001954674720764, |
|
"eval_runtime": 39.2333, |
|
"eval_samples_per_second": 76.466, |
|
"eval_steps_per_second": 2.396, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_loss": 0.6007575988769531, |
|
"eval_runtime": 39.2801, |
|
"eval_samples_per_second": 76.374, |
|
"eval_steps_per_second": 2.393, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_loss": 0.5983864068984985, |
|
"eval_runtime": 39.2469, |
|
"eval_samples_per_second": 76.439, |
|
"eval_steps_per_second": 2.395, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.00010568561872909698, |
|
"loss": 0.5844, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 0.5985506772994995, |
|
"eval_runtime": 39.2426, |
|
"eval_samples_per_second": 76.448, |
|
"eval_steps_per_second": 2.395, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.5978309512138367, |
|
"eval_runtime": 39.2604, |
|
"eval_samples_per_second": 76.413, |
|
"eval_steps_per_second": 2.394, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 0.5981310606002808, |
|
"eval_runtime": 39.2686, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00010301003344481605, |
|
"loss": 0.5784, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 0.5985335111618042, |
|
"eval_runtime": 39.2557, |
|
"eval_samples_per_second": 76.422, |
|
"eval_steps_per_second": 2.395, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 0.5975944995880127, |
|
"eval_runtime": 39.2644, |
|
"eval_samples_per_second": 76.405, |
|
"eval_steps_per_second": 2.394, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 0.596754252910614, |
|
"eval_runtime": 39.2365, |
|
"eval_samples_per_second": 76.459, |
|
"eval_steps_per_second": 2.396, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00010033444816053512, |
|
"loss": 0.5825, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 0.5977214574813843, |
|
"eval_runtime": 39.235, |
|
"eval_samples_per_second": 76.462, |
|
"eval_steps_per_second": 2.396, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 0.5982287526130676, |
|
"eval_runtime": 39.2483, |
|
"eval_samples_per_second": 76.436, |
|
"eval_steps_per_second": 2.395, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 0.5973477959632874, |
|
"eval_runtime": 39.2692, |
|
"eval_samples_per_second": 76.396, |
|
"eval_steps_per_second": 2.394, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 9.765886287625419e-05, |
|
"loss": 0.5724, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_loss": 0.598833441734314, |
|
"eval_runtime": 39.2608, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 0.5973609089851379, |
|
"eval_runtime": 39.2557, |
|
"eval_samples_per_second": 76.422, |
|
"eval_steps_per_second": 2.395, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_loss": 0.5983055233955383, |
|
"eval_runtime": 39.2613, |
|
"eval_samples_per_second": 76.411, |
|
"eval_steps_per_second": 2.394, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 9.498327759197325e-05, |
|
"loss": 0.5765, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 0.597219705581665, |
|
"eval_runtime": 39.2532, |
|
"eval_samples_per_second": 76.427, |
|
"eval_steps_per_second": 2.395, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_loss": 0.5974920392036438, |
|
"eval_runtime": 39.2428, |
|
"eval_samples_per_second": 76.447, |
|
"eval_steps_per_second": 2.395, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 0.5970295667648315, |
|
"eval_runtime": 39.2255, |
|
"eval_samples_per_second": 76.481, |
|
"eval_steps_per_second": 2.396, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 9.230769230769232e-05, |
|
"loss": 0.5662, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.5995200872421265, |
|
"eval_runtime": 39.2763, |
|
"eval_samples_per_second": 76.382, |
|
"eval_steps_per_second": 2.393, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_loss": 0.5961365699768066, |
|
"eval_runtime": 39.2442, |
|
"eval_samples_per_second": 76.444, |
|
"eval_steps_per_second": 2.395, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 8.963210702341137e-05, |
|
"loss": 0.5594, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_loss": 0.5958811640739441, |
|
"eval_runtime": 39.224, |
|
"eval_samples_per_second": 76.484, |
|
"eval_steps_per_second": 2.396, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 0.5974062085151672, |
|
"eval_runtime": 39.2479, |
|
"eval_samples_per_second": 76.437, |
|
"eval_steps_per_second": 2.395, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 0.5959305167198181, |
|
"eval_runtime": 39.1122, |
|
"eval_samples_per_second": 76.702, |
|
"eval_steps_per_second": 2.403, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 0.5569, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 0.597082257270813, |
|
"eval_runtime": 39.2419, |
|
"eval_samples_per_second": 76.449, |
|
"eval_steps_per_second": 2.395, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 0.5964935421943665, |
|
"eval_runtime": 39.2482, |
|
"eval_samples_per_second": 76.437, |
|
"eval_steps_per_second": 2.395, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 0.596628725528717, |
|
"eval_runtime": 39.2684, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 8.42809364548495e-05, |
|
"loss": 0.5711, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_loss": 0.596688449382782, |
|
"eval_runtime": 39.262, |
|
"eval_samples_per_second": 76.41, |
|
"eval_steps_per_second": 2.394, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_loss": 0.5974501967430115, |
|
"eval_runtime": 39.2621, |
|
"eval_samples_per_second": 76.409, |
|
"eval_steps_per_second": 2.394, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_loss": 0.5951861143112183, |
|
"eval_runtime": 39.2622, |
|
"eval_samples_per_second": 76.409, |
|
"eval_steps_per_second": 2.394, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 8.160535117056857e-05, |
|
"loss": 0.5703, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 0.5963322520256042, |
|
"eval_runtime": 39.2656, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.5958115458488464, |
|
"eval_runtime": 39.2804, |
|
"eval_samples_per_second": 76.374, |
|
"eval_steps_per_second": 2.393, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_loss": 0.5968443155288696, |
|
"eval_runtime": 39.2618, |
|
"eval_samples_per_second": 76.41, |
|
"eval_steps_per_second": 2.394, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 7.892976588628763e-05, |
|
"loss": 0.5551, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 0.5958288311958313, |
|
"eval_runtime": 39.2648, |
|
"eval_samples_per_second": 76.404, |
|
"eval_steps_per_second": 2.394, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"eval_loss": 0.5968209505081177, |
|
"eval_runtime": 39.2563, |
|
"eval_samples_per_second": 76.421, |
|
"eval_steps_per_second": 2.395, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_loss": 0.5957658886909485, |
|
"eval_runtime": 39.2499, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 2.395, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.62541806020067e-05, |
|
"loss": 0.5636, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"eval_loss": 0.5955784916877747, |
|
"eval_runtime": 39.279, |
|
"eval_samples_per_second": 76.377, |
|
"eval_steps_per_second": 2.393, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_loss": 0.5963084101676941, |
|
"eval_runtime": 39.2656, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 0.595792829990387, |
|
"eval_runtime": 39.2577, |
|
"eval_samples_per_second": 76.418, |
|
"eval_steps_per_second": 2.394, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 7.357859531772575e-05, |
|
"loss": 0.5676, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 0.5953949093818665, |
|
"eval_runtime": 39.2554, |
|
"eval_samples_per_second": 76.423, |
|
"eval_steps_per_second": 2.395, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_loss": 0.595146894454956, |
|
"eval_runtime": 39.2386, |
|
"eval_samples_per_second": 76.455, |
|
"eval_steps_per_second": 2.396, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 7.090301003344481e-05, |
|
"loss": 0.5551, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 0.5957517027854919, |
|
"eval_runtime": 39.197, |
|
"eval_samples_per_second": 76.536, |
|
"eval_steps_per_second": 2.398, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_loss": 0.596603512763977, |
|
"eval_runtime": 39.2315, |
|
"eval_samples_per_second": 76.469, |
|
"eval_steps_per_second": 2.396, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 0.5952173471450806, |
|
"eval_runtime": 39.2393, |
|
"eval_samples_per_second": 76.454, |
|
"eval_steps_per_second": 2.396, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 6.822742474916388e-05, |
|
"loss": 0.5539, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 0.5954132676124573, |
|
"eval_runtime": 39.2213, |
|
"eval_samples_per_second": 76.489, |
|
"eval_steps_per_second": 2.397, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 0.5956953167915344, |
|
"eval_runtime": 39.2503, |
|
"eval_samples_per_second": 76.432, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 0.5959665775299072, |
|
"eval_runtime": 39.2657, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 6.555183946488295e-05, |
|
"loss": 0.5607, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 0.5952425003051758, |
|
"eval_runtime": 39.2705, |
|
"eval_samples_per_second": 76.393, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_loss": 0.5953785181045532, |
|
"eval_runtime": 39.2403, |
|
"eval_samples_per_second": 76.452, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 0.5939880609512329, |
|
"eval_runtime": 39.2586, |
|
"eval_samples_per_second": 76.416, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1036 |
|
} |
|
], |
|
"max_steps": 1510, |
|
"num_train_epochs": 10, |
|
"total_flos": 9.026762224110141e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|