|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 39375, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009873015873015875, |
|
"loss": 0.4962, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009746031746031746, |
|
"loss": 0.5629, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0009619047619047619, |
|
"loss": 0.6018, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0009492063492063491, |
|
"loss": 0.6211, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0009365079365079366, |
|
"loss": 0.6306, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6565389538864577, |
|
"eval_loss": 1.9168750047683716, |
|
"eval_runtime": 5346.8612, |
|
"eval_samples_per_second": 31.412, |
|
"eval_steps_per_second": 0.123, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0009238095238095239, |
|
"loss": 0.5492, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0009111111111111111, |
|
"loss": 0.5403, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0008984126984126985, |
|
"loss": 0.5678, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0008857142857142857, |
|
"loss": 0.58, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.000873015873015873, |
|
"loss": 0.5977, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6613973981125897, |
|
"eval_loss": 1.911996841430664, |
|
"eval_runtime": 2172.477, |
|
"eval_samples_per_second": 77.31, |
|
"eval_steps_per_second": 0.302, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0008603174603174603, |
|
"loss": 0.5428, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0008476190476190476, |
|
"loss": 0.5113, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.000834920634920635, |
|
"loss": 0.5199, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0008222222222222222, |
|
"loss": 0.5418, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0008095238095238096, |
|
"loss": 0.5472, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6635348754130571, |
|
"eval_loss": 1.9409977197647095, |
|
"eval_runtime": 2183.1517, |
|
"eval_samples_per_second": 76.932, |
|
"eval_steps_per_second": 0.301, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0007968253968253968, |
|
"loss": 0.5319, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0007841269841269841, |
|
"loss": 0.4719, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0007714285714285715, |
|
"loss": 0.4803, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0007587301587301587, |
|
"loss": 0.4889, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.000746031746031746, |
|
"loss": 0.5079, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.513, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6678634157958977, |
|
"eval_loss": 1.9818364381790161, |
|
"eval_runtime": 2205.4104, |
|
"eval_samples_per_second": 76.156, |
|
"eval_steps_per_second": 0.298, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0007206349206349207, |
|
"loss": 0.4391, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.000707936507936508, |
|
"loss": 0.452, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0006952380952380952, |
|
"loss": 0.4703, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0006825396825396826, |
|
"loss": 0.4698, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0006698412698412698, |
|
"loss": 0.4714, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6711261945163883, |
|
"eval_loss": 1.9427947998046875, |
|
"eval_runtime": 5170.4, |
|
"eval_samples_per_second": 32.484, |
|
"eval_steps_per_second": 0.127, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0006571428571428571, |
|
"loss": 0.4337, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0006444444444444444, |
|
"loss": 0.4168, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0006317460317460318, |
|
"loss": 0.4274, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.0006190476190476191, |
|
"loss": 0.437, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.0006063492063492063, |
|
"loss": 0.4378, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6727397219493316, |
|
"eval_loss": 2.0051467418670654, |
|
"eval_runtime": 2183.8633, |
|
"eval_samples_per_second": 76.907, |
|
"eval_steps_per_second": 0.301, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0005936507936507937, |
|
"loss": 0.4139, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.000580952380952381, |
|
"loss": 0.3956, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0005682539682539682, |
|
"loss": 0.3989, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.0005555555555555556, |
|
"loss": 0.405, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.0005428571428571428, |
|
"loss": 0.4105, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.677752969545414, |
|
"eval_loss": 1.9916749000549316, |
|
"eval_runtime": 2143.2651, |
|
"eval_samples_per_second": 78.364, |
|
"eval_steps_per_second": 0.307, |
|
"step": 18375 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0005301587301587302, |
|
"loss": 0.405, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.0005174603174603176, |
|
"loss": 0.3653, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.0005047619047619048, |
|
"loss": 0.3694, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.000492063492063492, |
|
"loss": 0.3766, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.0004793650793650794, |
|
"loss": 0.3774, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.3837, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.679211693608407, |
|
"eval_loss": 2.041311025619507, |
|
"eval_runtime": 2155.99, |
|
"eval_samples_per_second": 77.902, |
|
"eval_steps_per_second": 0.305, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.000453968253968254, |
|
"loss": 0.3406, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.00044126984126984127, |
|
"loss": 0.3469, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.00042857142857142855, |
|
"loss": 0.3523, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.0004158730158730159, |
|
"loss": 0.3559, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.0004031746031746032, |
|
"loss": 0.3602, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6870054478878271, |
|
"eval_loss": 2.0737974643707275, |
|
"eval_runtime": 2167.8038, |
|
"eval_samples_per_second": 77.477, |
|
"eval_steps_per_second": 0.303, |
|
"step": 23625 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.0003904761904761905, |
|
"loss": 0.3172, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.00037777777777777777, |
|
"loss": 0.325, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00036507936507936505, |
|
"loss": 0.3255, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00035238095238095243, |
|
"loss": 0.3277, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.0003396825396825397, |
|
"loss": 0.336, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6875770295614897, |
|
"eval_loss": 2.087229013442993, |
|
"eval_runtime": 4594.6085, |
|
"eval_samples_per_second": 36.555, |
|
"eval_steps_per_second": 0.143, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.000326984126984127, |
|
"loss": 0.3184, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.00031428571428571427, |
|
"loss": 0.3072, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.0003015873015873016, |
|
"loss": 0.3024, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 0.0002888888888888889, |
|
"loss": 0.3112, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.0002761904761904762, |
|
"loss": 0.3057, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6893870381947546, |
|
"eval_loss": 2.1162524223327637, |
|
"eval_runtime": 1647.7962, |
|
"eval_samples_per_second": 101.927, |
|
"eval_steps_per_second": 0.399, |
|
"step": 28875 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 0.0002634920634920635, |
|
"loss": 0.2991, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 0.0002507936507936508, |
|
"loss": 0.2861, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.0002380952380952381, |
|
"loss": 0.2849, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 0.0002253968253968254, |
|
"loss": 0.2897, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 0.0002126984126984127, |
|
"loss": 0.2864, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2856, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6936441308683874, |
|
"eval_loss": 2.1104633808135986, |
|
"eval_runtime": 1643.6927, |
|
"eval_samples_per_second": 102.182, |
|
"eval_steps_per_second": 0.4, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 0.00018730158730158731, |
|
"loss": 0.2704, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 0.0001746031746031746, |
|
"loss": 0.2688, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 0.2634, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 0.0001492063492063492, |
|
"loss": 0.2713, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 0.0001365079365079365, |
|
"loss": 0.2704, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6964782233336311, |
|
"eval_loss": 2.168531894683838, |
|
"eval_runtime": 1642.8004, |
|
"eval_samples_per_second": 102.237, |
|
"eval_steps_per_second": 0.4, |
|
"step": 34125 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 0.2586, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.0001111111111111111, |
|
"loss": 0.2544, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 9.841269841269841e-05, |
|
"loss": 0.2483, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.2612, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 7.301587301587302e-05, |
|
"loss": 0.2503, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.699431395314221, |
|
"eval_loss": 2.162724733352661, |
|
"eval_runtime": 1648.5219, |
|
"eval_samples_per_second": 101.882, |
|
"eval_steps_per_second": 0.399, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 6.031746031746032e-05, |
|
"loss": 0.2471, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.2452, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 3.4920634920634925e-05, |
|
"loss": 0.2433, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.2392, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.2362, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7000208389151856, |
|
"eval_loss": 2.1793291568756104, |
|
"eval_runtime": 1654.6492, |
|
"eval_samples_per_second": 101.505, |
|
"eval_steps_per_second": 0.397, |
|
"step": 39375 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 39375, |
|
"total_flos": 6.872831806674565e+20, |
|
"train_loss": 0.1121770017593626, |
|
"train_runtime": 63366.2415, |
|
"train_samples_per_second": 159.032, |
|
"train_steps_per_second": 0.621 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 39375, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 6.872831806674565e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|