|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.974948758824869, |
|
"global_step": 10950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.553734061930783e-05, |
|
"loss": 1.3816, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 0.1994573473930359, |
|
"eval_runtime": 49.3939, |
|
"eval_samples_per_second": 50.553, |
|
"eval_steps_per_second": 6.337, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.107468123861566e-05, |
|
"loss": 0.1818, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.18140748143196106, |
|
"eval_runtime": 49.3506, |
|
"eval_samples_per_second": 50.597, |
|
"eval_steps_per_second": 6.342, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.807120237981e-05, |
|
"loss": 0.1716, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.1712454855442047, |
|
"eval_runtime": 49.3274, |
|
"eval_samples_per_second": 50.621, |
|
"eval_steps_per_second": 6.345, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.567220036464831e-05, |
|
"loss": 0.1501, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.1643093228340149, |
|
"eval_runtime": 49.3431, |
|
"eval_samples_per_second": 50.605, |
|
"eval_steps_per_second": 6.343, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.327319834948663e-05, |
|
"loss": 0.122, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 0.16473376750946045, |
|
"eval_runtime": 49.4257, |
|
"eval_samples_per_second": 50.52, |
|
"eval_steps_per_second": 6.333, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.087419633432492e-05, |
|
"loss": 0.1143, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.15297986567020416, |
|
"eval_runtime": 49.4063, |
|
"eval_samples_per_second": 50.54, |
|
"eval_steps_per_second": 6.335, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.847519431916324e-05, |
|
"loss": 0.1121, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.16007670760154724, |
|
"eval_runtime": 49.3369, |
|
"eval_samples_per_second": 50.611, |
|
"eval_steps_per_second": 6.344, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.607619230400153e-05, |
|
"loss": 0.1078, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.14469072222709656, |
|
"eval_runtime": 49.3439, |
|
"eval_samples_per_second": 50.604, |
|
"eval_steps_per_second": 6.343, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.367719028883985e-05, |
|
"loss": 0.0954, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.14710724353790283, |
|
"eval_runtime": 49.3536, |
|
"eval_samples_per_second": 50.594, |
|
"eval_steps_per_second": 6.342, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.127818827367816e-05, |
|
"loss": 0.0804, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 0.13685058057308197, |
|
"eval_runtime": 49.355, |
|
"eval_samples_per_second": 50.593, |
|
"eval_steps_per_second": 6.342, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.887918625851645e-05, |
|
"loss": 0.08, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.13740722835063934, |
|
"eval_runtime": 49.3607, |
|
"eval_samples_per_second": 50.587, |
|
"eval_steps_per_second": 6.341, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 7.648018424335477e-05, |
|
"loss": 0.0769, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 0.1370161473751068, |
|
"eval_runtime": 49.3701, |
|
"eval_samples_per_second": 50.577, |
|
"eval_steps_per_second": 6.34, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.408118222819308e-05, |
|
"loss": 0.0782, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.13374929130077362, |
|
"eval_runtime": 49.4074, |
|
"eval_samples_per_second": 50.539, |
|
"eval_steps_per_second": 6.335, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.168218021303138e-05, |
|
"loss": 0.0591, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 0.13687731325626373, |
|
"eval_runtime": 49.344, |
|
"eval_samples_per_second": 50.604, |
|
"eval_steps_per_second": 6.343, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.928317819786969e-05, |
|
"loss": 0.0575, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 0.13442662358283997, |
|
"eval_runtime": 49.347, |
|
"eval_samples_per_second": 50.601, |
|
"eval_steps_per_second": 6.343, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 6.6884176182708e-05, |
|
"loss": 0.0579, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 0.13463211059570312, |
|
"eval_runtime": 49.3532, |
|
"eval_samples_per_second": 50.595, |
|
"eval_steps_per_second": 6.342, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.44851741675463e-05, |
|
"loss": 0.0541, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 0.12762367725372314, |
|
"eval_runtime": 49.3131, |
|
"eval_samples_per_second": 50.636, |
|
"eval_steps_per_second": 6.347, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 6.208617215238462e-05, |
|
"loss": 0.0469, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 0.1321054846048355, |
|
"eval_runtime": 49.3382, |
|
"eval_samples_per_second": 50.61, |
|
"eval_steps_per_second": 6.344, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.968717013722291e-05, |
|
"loss": 0.0367, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 0.13329076766967773, |
|
"eval_runtime": 49.3411, |
|
"eval_samples_per_second": 50.607, |
|
"eval_steps_per_second": 6.344, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 5.7288168122061226e-05, |
|
"loss": 0.0409, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 0.13458645343780518, |
|
"eval_runtime": 49.3393, |
|
"eval_samples_per_second": 50.609, |
|
"eval_steps_per_second": 6.344, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 5.488916610689954e-05, |
|
"loss": 0.0402, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 0.12923233211040497, |
|
"eval_runtime": 49.4868, |
|
"eval_samples_per_second": 50.458, |
|
"eval_steps_per_second": 6.325, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 5.249016409173784e-05, |
|
"loss": 0.0378, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 0.12460647523403168, |
|
"eval_runtime": 49.3719, |
|
"eval_samples_per_second": 50.575, |
|
"eval_steps_per_second": 6.34, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 5.009116207657615e-05, |
|
"loss": 0.0258, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 0.1305789053440094, |
|
"eval_runtime": 49.3929, |
|
"eval_samples_per_second": 50.554, |
|
"eval_steps_per_second": 6.337, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.769216006141446e-05, |
|
"loss": 0.0252, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_loss": 0.13075487315654755, |
|
"eval_runtime": 49.3793, |
|
"eval_samples_per_second": 50.568, |
|
"eval_steps_per_second": 6.339, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.5293158046252756e-05, |
|
"loss": 0.0266, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 0.13152019679546356, |
|
"eval_runtime": 49.3918, |
|
"eval_samples_per_second": 50.555, |
|
"eval_steps_per_second": 6.337, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.289415603109107e-05, |
|
"loss": 0.0264, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"eval_loss": 0.12978705763816833, |
|
"eval_runtime": 49.4158, |
|
"eval_samples_per_second": 50.53, |
|
"eval_steps_per_second": 6.334, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.0495154015929375e-05, |
|
"loss": 0.0204, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 0.1330789029598236, |
|
"eval_runtime": 49.4096, |
|
"eval_samples_per_second": 50.537, |
|
"eval_steps_per_second": 6.335, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 3.809615200076768e-05, |
|
"loss": 0.0176, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_loss": 0.13327623903751373, |
|
"eval_runtime": 49.4281, |
|
"eval_samples_per_second": 50.518, |
|
"eval_steps_per_second": 6.332, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.569714998560599e-05, |
|
"loss": 0.0177, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 0.13123974204063416, |
|
"eval_runtime": 49.4644, |
|
"eval_samples_per_second": 50.481, |
|
"eval_steps_per_second": 6.328, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.32981479704443e-05, |
|
"loss": 0.0161, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"eval_loss": 0.1328614354133606, |
|
"eval_runtime": 49.4499, |
|
"eval_samples_per_second": 50.496, |
|
"eval_steps_per_second": 6.33, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 3.0899145955282606e-05, |
|
"loss": 0.016, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_loss": 0.13026753067970276, |
|
"eval_runtime": 49.4661, |
|
"eval_samples_per_second": 50.479, |
|
"eval_steps_per_second": 6.328, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.850014394012091e-05, |
|
"loss": 0.0104, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_loss": 0.13250969350337982, |
|
"eval_runtime": 49.5109, |
|
"eval_samples_per_second": 50.433, |
|
"eval_steps_per_second": 6.322, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 2.6101141924959215e-05, |
|
"loss": 0.0104, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"eval_loss": 0.1344473958015442, |
|
"eval_runtime": 49.4545, |
|
"eval_samples_per_second": 50.491, |
|
"eval_steps_per_second": 6.329, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 2.3702139909797524e-05, |
|
"loss": 0.0107, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_loss": 0.13361412286758423, |
|
"eval_runtime": 49.4611, |
|
"eval_samples_per_second": 50.484, |
|
"eval_steps_per_second": 6.328, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 2.1303137894635834e-05, |
|
"loss": 0.0105, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"eval_loss": 0.1311049610376358, |
|
"eval_runtime": 49.3899, |
|
"eval_samples_per_second": 50.557, |
|
"eval_steps_per_second": 6.337, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 1.890413587947414e-05, |
|
"loss": 0.0072, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 0.1345677375793457, |
|
"eval_runtime": 49.4945, |
|
"eval_samples_per_second": 50.45, |
|
"eval_steps_per_second": 6.324, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1.6505133864312446e-05, |
|
"loss": 0.0065, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"eval_loss": 0.13423801958560944, |
|
"eval_runtime": 49.4363, |
|
"eval_samples_per_second": 50.509, |
|
"eval_steps_per_second": 6.331, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 1.4106131849150753e-05, |
|
"loss": 0.0062, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_loss": 0.13279776275157928, |
|
"eval_runtime": 49.5198, |
|
"eval_samples_per_second": 50.424, |
|
"eval_steps_per_second": 6.321, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 1.1707129833989061e-05, |
|
"loss": 0.006, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_loss": 0.13258913159370422, |
|
"eval_runtime": 49.511, |
|
"eval_samples_per_second": 50.433, |
|
"eval_steps_per_second": 6.322, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 9.308127818827369e-06, |
|
"loss": 0.0052, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"eval_loss": 0.13228829205036163, |
|
"eval_runtime": 49.4456, |
|
"eval_samples_per_second": 50.5, |
|
"eval_steps_per_second": 6.33, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 6.909125803665675e-06, |
|
"loss": 0.0039, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_loss": 0.13294672966003418, |
|
"eval_runtime": 49.4619, |
|
"eval_samples_per_second": 50.483, |
|
"eval_steps_per_second": 6.328, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.510123788503983e-06, |
|
"loss": 0.0039, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 0.13272705674171448, |
|
"eval_runtime": 49.4568, |
|
"eval_samples_per_second": 50.489, |
|
"eval_steps_per_second": 6.329, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 2.11112177334229e-06, |
|
"loss": 0.004, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"eval_loss": 0.13220719993114471, |
|
"eval_runtime": 49.465, |
|
"eval_samples_per_second": 50.48, |
|
"eval_steps_per_second": 6.328, |
|
"step": 10750 |
|
} |
|
], |
|
"max_steps": 10970, |
|
"num_train_epochs": 10, |
|
"total_flos": 8.710540889772442e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|