|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 16.0, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.3812153339385986, |
|
"learning_rate": 5.7005e-05, |
|
"loss": 0.3305, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy_no_text": 0.958563454164822, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.958563454164822, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.1845872551202774, |
|
"eval_mean_accuracy": 0.958563454164822, |
|
"eval_mean_iou": 0.479281727082411, |
|
"eval_overall_accuracy": 0.958563454164822, |
|
"eval_runtime": 9.574, |
|
"eval_samples_per_second": 26.53, |
|
"eval_steps_per_second": 3.342, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.7767746448516846, |
|
"learning_rate": 5.401e-05, |
|
"loss": 0.2037, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy_no_text": 0.9706028320744683, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9706028320744683, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.13224510848522186, |
|
"eval_mean_accuracy": 0.9706028320744683, |
|
"eval_mean_iou": 0.48530141603723415, |
|
"eval_overall_accuracy": 0.9706028320744683, |
|
"eval_runtime": 10.1041, |
|
"eval_samples_per_second": 25.138, |
|
"eval_steps_per_second": 3.167, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.592877984046936, |
|
"learning_rate": 5.1015e-05, |
|
"loss": 0.1534, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy_no_text": 0.9784367089170211, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9784367089170211, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.10737518221139908, |
|
"eval_mean_accuracy": 0.9784367089170211, |
|
"eval_mean_iou": 0.48921835445851053, |
|
"eval_overall_accuracy": 0.9784367089170211, |
|
"eval_runtime": 9.6164, |
|
"eval_samples_per_second": 26.413, |
|
"eval_steps_per_second": 3.328, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.9847292304039001, |
|
"learning_rate": 4.8020000000000004e-05, |
|
"loss": 0.1451, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.3838343918323517, |
|
"learning_rate": 4.5025000000000003e-05, |
|
"loss": 0.1313, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy_no_text": 0.9839048439461286, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9839048439461286, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.09760396927595139, |
|
"eval_mean_accuracy": 0.9839048439461286, |
|
"eval_mean_iou": 0.4919524219730643, |
|
"eval_overall_accuracy": 0.9839048439461286, |
|
"eval_runtime": 10.3548, |
|
"eval_samples_per_second": 24.53, |
|
"eval_steps_per_second": 3.09, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 1.1984161138534546, |
|
"learning_rate": 4.203e-05, |
|
"loss": 0.1156, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy_no_text": 0.9799307811869516, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9799307811869516, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.1000521183013916, |
|
"eval_mean_accuracy": 0.9799307811869516, |
|
"eval_mean_iou": 0.4899653905934758, |
|
"eval_overall_accuracy": 0.9799307811869516, |
|
"eval_runtime": 9.868, |
|
"eval_samples_per_second": 25.74, |
|
"eval_steps_per_second": 3.243, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 2.07499361038208, |
|
"learning_rate": 3.9035e-05, |
|
"loss": 0.1123, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy_no_text": 0.986626541167033, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.986626541167033, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.09204956144094467, |
|
"eval_mean_accuracy": 0.986626541167033, |
|
"eval_mean_iou": 0.4933132705835165, |
|
"eval_overall_accuracy": 0.986626541167033, |
|
"eval_runtime": 10.6441, |
|
"eval_samples_per_second": 23.863, |
|
"eval_steps_per_second": 3.006, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 1.8263524770736694, |
|
"learning_rate": 3.604e-05, |
|
"loss": 0.108, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy_no_text": 0.9815042758929395, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9815042758929395, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.09463128447532654, |
|
"eval_mean_accuracy": 0.9815042758929395, |
|
"eval_mean_iou": 0.49075213794646977, |
|
"eval_overall_accuracy": 0.9815042758929395, |
|
"eval_runtime": 9.8625, |
|
"eval_samples_per_second": 25.754, |
|
"eval_steps_per_second": 3.245, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 0.35387736558914185, |
|
"learning_rate": 3.3045000000000006e-05, |
|
"loss": 0.1015, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.5748061537742615, |
|
"learning_rate": 3.0050000000000002e-05, |
|
"loss": 0.1017, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy_no_text": 0.980515017943572, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.980515017943572, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.09426650404930115, |
|
"eval_mean_accuracy": 0.980515017943572, |
|
"eval_mean_iou": 0.490257508971786, |
|
"eval_overall_accuracy": 0.980515017943572, |
|
"eval_runtime": 9.8323, |
|
"eval_samples_per_second": 25.833, |
|
"eval_steps_per_second": 3.255, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 3.100071668624878, |
|
"learning_rate": 2.7054999999999998e-05, |
|
"loss": 0.0994, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy_no_text": 0.9807879619513361, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9807879619513361, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.0927177369594574, |
|
"eval_mean_accuracy": 0.9807879619513361, |
|
"eval_mean_iou": 0.49039398097566805, |
|
"eval_overall_accuracy": 0.9807879619513361, |
|
"eval_runtime": 9.5796, |
|
"eval_samples_per_second": 26.515, |
|
"eval_steps_per_second": 3.34, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 0.8802123665809631, |
|
"learning_rate": 2.406e-05, |
|
"loss": 0.0926, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy_no_text": 0.9782629100297493, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9782629100297493, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.0956711545586586, |
|
"eval_mean_accuracy": 0.9782629100297493, |
|
"eval_mean_iou": 0.48913145501487465, |
|
"eval_overall_accuracy": 0.9782629100297493, |
|
"eval_runtime": 10.2441, |
|
"eval_samples_per_second": 24.795, |
|
"eval_steps_per_second": 3.124, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"grad_norm": 0.4630286693572998, |
|
"learning_rate": 2.1065e-05, |
|
"loss": 0.0907, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy_no_text": 0.9829907049995675, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9829907049995675, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.09134823828935623, |
|
"eval_mean_accuracy": 0.9829907049995675, |
|
"eval_mean_iou": 0.49149535249978377, |
|
"eval_overall_accuracy": 0.9829907049995675, |
|
"eval_runtime": 9.7246, |
|
"eval_samples_per_second": 26.119, |
|
"eval_steps_per_second": 3.291, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"grad_norm": 57.36943435668945, |
|
"learning_rate": 1.807e-05, |
|
"loss": 0.0964, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.3222465515136719, |
|
"learning_rate": 1.5075000000000002e-05, |
|
"loss": 0.0893, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy_no_text": 0.9838469054451806, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9838469054451806, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.08928046375513077, |
|
"eval_mean_accuracy": 0.9838469054451806, |
|
"eval_mean_iou": 0.4919234527225903, |
|
"eval_overall_accuracy": 0.9838469054451806, |
|
"eval_runtime": 9.6536, |
|
"eval_samples_per_second": 26.311, |
|
"eval_steps_per_second": 3.315, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"grad_norm": 1.2742468118667603, |
|
"learning_rate": 1.2079999999999998e-05, |
|
"loss": 0.0853, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy_no_text": 0.9803520856406479, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9803520856406479, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.09133532643318176, |
|
"eval_mean_accuracy": 0.9803520856406479, |
|
"eval_mean_iou": 0.49017604282032395, |
|
"eval_overall_accuracy": 0.9803520856406479, |
|
"eval_runtime": 10.3368, |
|
"eval_samples_per_second": 24.572, |
|
"eval_steps_per_second": 3.096, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"grad_norm": 5.432977199554443, |
|
"learning_rate": 9.085000000000002e-06, |
|
"loss": 0.0834, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy_no_text": 0.98199298972385, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.98199298972385, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.08987393975257874, |
|
"eval_mean_accuracy": 0.98199298972385, |
|
"eval_mean_iou": 0.490996494861925, |
|
"eval_overall_accuracy": 0.98199298972385, |
|
"eval_runtime": 9.5985, |
|
"eval_samples_per_second": 26.463, |
|
"eval_steps_per_second": 3.334, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"grad_norm": 1.3897353410720825, |
|
"learning_rate": 6.0899999999999984e-06, |
|
"loss": 0.0861, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy_no_text": 0.9814577689058006, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9814577689058006, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.0901678055524826, |
|
"eval_mean_accuracy": 0.9814577689058006, |
|
"eval_mean_iou": 0.4907288844529003, |
|
"eval_overall_accuracy": 0.9814577689058006, |
|
"eval_runtime": 9.8323, |
|
"eval_samples_per_second": 25.833, |
|
"eval_steps_per_second": 3.255, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"grad_norm": 0.7514830231666565, |
|
"learning_rate": 3.0950000000000026e-06, |
|
"loss": 0.0882, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.9411144852638245, |
|
"learning_rate": 1e-07, |
|
"loss": 0.0803, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy_no_text": 0.9793073981417643, |
|
"eval_accuracy_text": NaN, |
|
"eval_iou_no_text": 0.9793073981417643, |
|
"eval_iou_text": 0.0, |
|
"eval_loss": 0.09290074557065964, |
|
"eval_mean_accuracy": 0.9793073981417643, |
|
"eval_mean_iou": 0.48965369907088213, |
|
"eval_overall_accuracy": 0.9793073981417643, |
|
"eval_runtime": 9.6102, |
|
"eval_samples_per_second": 26.43, |
|
"eval_steps_per_second": 3.33, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 2000, |
|
"total_flos": 1.032636453617664e+18, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0014, |
|
"train_samples_per_second": 11544617.925, |
|
"train_steps_per_second": 1443077.241 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.032636453617664e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|