|
{ |
|
"best_metric": 0.3198860287666321, |
|
"best_model_checkpoint": "finetuned-fake-food/checkpoint-1800", |
|
"epoch": 2.5284450063211126, |
|
"eval_steps": 100, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1264222503160556, |
|
"grad_norm": 5.020864009857178, |
|
"learning_rate": 0.00019, |
|
"loss": 0.5416, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1264222503160556, |
|
"eval_accuracy": 0.7081468218442256, |
|
"eval_loss": 0.5592844486236572, |
|
"eval_runtime": 75.3793, |
|
"eval_samples_per_second": 14.818, |
|
"eval_steps_per_second": 1.857, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2528445006321112, |
|
"grad_norm": 3.241377115249634, |
|
"learning_rate": 0.00018, |
|
"loss": 0.5299, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2528445006321112, |
|
"eval_accuracy": 0.7421665174574754, |
|
"eval_loss": 0.5342246294021606, |
|
"eval_runtime": 75.6464, |
|
"eval_samples_per_second": 14.766, |
|
"eval_steps_per_second": 1.851, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37926675094816686, |
|
"grad_norm": 3.0553033351898193, |
|
"learning_rate": 0.00017, |
|
"loss": 0.5503, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37926675094816686, |
|
"eval_accuracy": 0.7717099373321397, |
|
"eval_loss": 0.48751309514045715, |
|
"eval_runtime": 75.98, |
|
"eval_samples_per_second": 14.701, |
|
"eval_steps_per_second": 1.843, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5056890012642224, |
|
"grad_norm": 2.0104384422302246, |
|
"learning_rate": 0.00016, |
|
"loss": 0.5561, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5056890012642224, |
|
"eval_accuracy": 0.7940913160250671, |
|
"eval_loss": 0.4621775448322296, |
|
"eval_runtime": 76.6055, |
|
"eval_samples_per_second": 14.581, |
|
"eval_steps_per_second": 1.828, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6321112515802781, |
|
"grad_norm": 3.4203433990478516, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.5581, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6321112515802781, |
|
"eval_accuracy": 0.7457475380483438, |
|
"eval_loss": 0.5501323342323303, |
|
"eval_runtime": 75.3664, |
|
"eval_samples_per_second": 14.821, |
|
"eval_steps_per_second": 1.858, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7585335018963337, |
|
"grad_norm": 1.4952611923217773, |
|
"learning_rate": 0.00014, |
|
"loss": 0.5845, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7585335018963337, |
|
"eval_accuracy": 0.747538048343778, |
|
"eval_loss": 0.5088097453117371, |
|
"eval_runtime": 76.5056, |
|
"eval_samples_per_second": 14.6, |
|
"eval_steps_per_second": 1.83, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 1.8074195384979248, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 0.5695, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"eval_accuracy": 0.7860340196956133, |
|
"eval_loss": 0.4740249812602997, |
|
"eval_runtime": 76.9715, |
|
"eval_samples_per_second": 14.512, |
|
"eval_steps_per_second": 1.819, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.011378002528445, |
|
"grad_norm": 1.2785142660140991, |
|
"learning_rate": 0.00012, |
|
"loss": 0.5406, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.011378002528445, |
|
"eval_accuracy": 0.7815577439570277, |
|
"eval_loss": 0.4855746030807495, |
|
"eval_runtime": 76.7685, |
|
"eval_samples_per_second": 14.55, |
|
"eval_steps_per_second": 1.824, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1378002528445006, |
|
"grad_norm": 1.3373093605041504, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 0.5353, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1378002528445006, |
|
"eval_accuracy": 0.8155774395702775, |
|
"eval_loss": 0.4251798987388611, |
|
"eval_runtime": 75.415, |
|
"eval_samples_per_second": 14.811, |
|
"eval_steps_per_second": 1.856, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2642225031605563, |
|
"grad_norm": 2.4060959815979004, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5345, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2642225031605563, |
|
"eval_accuracy": 0.7761862130707251, |
|
"eval_loss": 0.50136399269104, |
|
"eval_runtime": 75.9241, |
|
"eval_samples_per_second": 14.712, |
|
"eval_steps_per_second": 1.844, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3906447534766118, |
|
"grad_norm": 1.6286314725875854, |
|
"learning_rate": 9e-05, |
|
"loss": 0.5105, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.3906447534766118, |
|
"eval_accuracy": 0.7860340196956133, |
|
"eval_loss": 0.48000478744506836, |
|
"eval_runtime": 75.3515, |
|
"eval_samples_per_second": 14.824, |
|
"eval_steps_per_second": 1.858, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5170670037926675, |
|
"grad_norm": 2.462752103805542, |
|
"learning_rate": 8e-05, |
|
"loss": 0.5266, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5170670037926675, |
|
"eval_accuracy": 0.7958818263205013, |
|
"eval_loss": 0.4617547392845154, |
|
"eval_runtime": 75.1188, |
|
"eval_samples_per_second": 14.87, |
|
"eval_steps_per_second": 1.864, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6434892541087232, |
|
"grad_norm": 2.6984634399414062, |
|
"learning_rate": 7e-05, |
|
"loss": 0.4709, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.6434892541087232, |
|
"eval_accuracy": 0.8281110116383169, |
|
"eval_loss": 0.39056020975112915, |
|
"eval_runtime": 74.801, |
|
"eval_samples_per_second": 14.933, |
|
"eval_steps_per_second": 1.872, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"grad_norm": 2.939568281173706, |
|
"learning_rate": 6e-05, |
|
"loss": 0.4624, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"eval_accuracy": 0.8128916741271263, |
|
"eval_loss": 0.4208226203918457, |
|
"eval_runtime": 77.4109, |
|
"eval_samples_per_second": 14.429, |
|
"eval_steps_per_second": 1.809, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8963337547408345, |
|
"grad_norm": 1.791272759437561, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4677, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8963337547408345, |
|
"eval_accuracy": 0.8173679498657117, |
|
"eval_loss": 0.4207296073436737, |
|
"eval_runtime": 76.4178, |
|
"eval_samples_per_second": 14.617, |
|
"eval_steps_per_second": 1.832, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.02275600505689, |
|
"grad_norm": 1.7240327596664429, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4478, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.02275600505689, |
|
"eval_accuracy": 0.8478066248880931, |
|
"eval_loss": 0.35574597120285034, |
|
"eval_runtime": 75.4802, |
|
"eval_samples_per_second": 14.799, |
|
"eval_steps_per_second": 1.855, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.1491782553729455, |
|
"grad_norm": 3.029090642929077, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4451, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.1491782553729455, |
|
"eval_accuracy": 0.8442256042972247, |
|
"eval_loss": 0.3545984923839569, |
|
"eval_runtime": 75.7957, |
|
"eval_samples_per_second": 14.737, |
|
"eval_steps_per_second": 1.847, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.275600505689001, |
|
"grad_norm": 2.259213447570801, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3796, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.275600505689001, |
|
"eval_accuracy": 0.8719785138764548, |
|
"eval_loss": 0.3198860287666321, |
|
"eval_runtime": 74.5384, |
|
"eval_samples_per_second": 14.986, |
|
"eval_steps_per_second": 1.878, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.402022756005057, |
|
"grad_norm": 2.9328560829162598, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4358, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.402022756005057, |
|
"eval_accuracy": 0.8603401969561325, |
|
"eval_loss": 0.33084791898727417, |
|
"eval_runtime": 76.0815, |
|
"eval_samples_per_second": 14.682, |
|
"eval_steps_per_second": 1.84, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.5284450063211126, |
|
"grad_norm": 1.4755433797836304, |
|
"learning_rate": 0.0, |
|
"loss": 0.3373, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.5284450063211126, |
|
"eval_accuracy": 0.8540734109221128, |
|
"eval_loss": 0.34551766514778137, |
|
"eval_runtime": 75.3964, |
|
"eval_samples_per_second": 14.815, |
|
"eval_steps_per_second": 1.857, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.5284450063211126, |
|
"step": 2000, |
|
"total_flos": 1.2397168498542428e+18, |
|
"train_loss": 0.49920871353149415, |
|
"train_runtime": 3192.0436, |
|
"train_samples_per_second": 5.012, |
|
"train_steps_per_second": 0.627 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2397168498542428e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|