finetuned-fake-food / trainer_state.json
itsLeen's picture
🍻 cheers
1450f88 verified
raw
history blame
9.44 kB
{
"best_metric": 0.3198860287666321,
"best_model_checkpoint": "finetuned-fake-food/checkpoint-1800",
"epoch": 2.5284450063211126,
"eval_steps": 100,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1264222503160556,
"grad_norm": 5.020864009857178,
"learning_rate": 0.00019,
"loss": 0.5416,
"step": 100
},
{
"epoch": 0.1264222503160556,
"eval_accuracy": 0.7081468218442256,
"eval_loss": 0.5592844486236572,
"eval_runtime": 75.3793,
"eval_samples_per_second": 14.818,
"eval_steps_per_second": 1.857,
"step": 100
},
{
"epoch": 0.2528445006321112,
"grad_norm": 3.241377115249634,
"learning_rate": 0.00018,
"loss": 0.5299,
"step": 200
},
{
"epoch": 0.2528445006321112,
"eval_accuracy": 0.7421665174574754,
"eval_loss": 0.5342246294021606,
"eval_runtime": 75.6464,
"eval_samples_per_second": 14.766,
"eval_steps_per_second": 1.851,
"step": 200
},
{
"epoch": 0.37926675094816686,
"grad_norm": 3.0553033351898193,
"learning_rate": 0.00017,
"loss": 0.5503,
"step": 300
},
{
"epoch": 0.37926675094816686,
"eval_accuracy": 0.7717099373321397,
"eval_loss": 0.48751309514045715,
"eval_runtime": 75.98,
"eval_samples_per_second": 14.701,
"eval_steps_per_second": 1.843,
"step": 300
},
{
"epoch": 0.5056890012642224,
"grad_norm": 2.0104384422302246,
"learning_rate": 0.00016,
"loss": 0.5561,
"step": 400
},
{
"epoch": 0.5056890012642224,
"eval_accuracy": 0.7940913160250671,
"eval_loss": 0.4621775448322296,
"eval_runtime": 76.6055,
"eval_samples_per_second": 14.581,
"eval_steps_per_second": 1.828,
"step": 400
},
{
"epoch": 0.6321112515802781,
"grad_norm": 3.4203433990478516,
"learning_rate": 0.00015000000000000001,
"loss": 0.5581,
"step": 500
},
{
"epoch": 0.6321112515802781,
"eval_accuracy": 0.7457475380483438,
"eval_loss": 0.5501323342323303,
"eval_runtime": 75.3664,
"eval_samples_per_second": 14.821,
"eval_steps_per_second": 1.858,
"step": 500
},
{
"epoch": 0.7585335018963337,
"grad_norm": 1.4952611923217773,
"learning_rate": 0.00014,
"loss": 0.5845,
"step": 600
},
{
"epoch": 0.7585335018963337,
"eval_accuracy": 0.747538048343778,
"eval_loss": 0.5088097453117371,
"eval_runtime": 76.5056,
"eval_samples_per_second": 14.6,
"eval_steps_per_second": 1.83,
"step": 600
},
{
"epoch": 0.8849557522123894,
"grad_norm": 1.8074195384979248,
"learning_rate": 0.00013000000000000002,
"loss": 0.5695,
"step": 700
},
{
"epoch": 0.8849557522123894,
"eval_accuracy": 0.7860340196956133,
"eval_loss": 0.4740249812602997,
"eval_runtime": 76.9715,
"eval_samples_per_second": 14.512,
"eval_steps_per_second": 1.819,
"step": 700
},
{
"epoch": 1.011378002528445,
"grad_norm": 1.2785142660140991,
"learning_rate": 0.00012,
"loss": 0.5406,
"step": 800
},
{
"epoch": 1.011378002528445,
"eval_accuracy": 0.7815577439570277,
"eval_loss": 0.4855746030807495,
"eval_runtime": 76.7685,
"eval_samples_per_second": 14.55,
"eval_steps_per_second": 1.824,
"step": 800
},
{
"epoch": 1.1378002528445006,
"grad_norm": 1.3373093605041504,
"learning_rate": 0.00011000000000000002,
"loss": 0.5353,
"step": 900
},
{
"epoch": 1.1378002528445006,
"eval_accuracy": 0.8155774395702775,
"eval_loss": 0.4251798987388611,
"eval_runtime": 75.415,
"eval_samples_per_second": 14.811,
"eval_steps_per_second": 1.856,
"step": 900
},
{
"epoch": 1.2642225031605563,
"grad_norm": 2.4060959815979004,
"learning_rate": 0.0001,
"loss": 0.5345,
"step": 1000
},
{
"epoch": 1.2642225031605563,
"eval_accuracy": 0.7761862130707251,
"eval_loss": 0.50136399269104,
"eval_runtime": 75.9241,
"eval_samples_per_second": 14.712,
"eval_steps_per_second": 1.844,
"step": 1000
},
{
"epoch": 1.3906447534766118,
"grad_norm": 1.6286314725875854,
"learning_rate": 9e-05,
"loss": 0.5105,
"step": 1100
},
{
"epoch": 1.3906447534766118,
"eval_accuracy": 0.7860340196956133,
"eval_loss": 0.48000478744506836,
"eval_runtime": 75.3515,
"eval_samples_per_second": 14.824,
"eval_steps_per_second": 1.858,
"step": 1100
},
{
"epoch": 1.5170670037926675,
"grad_norm": 2.462752103805542,
"learning_rate": 8e-05,
"loss": 0.5266,
"step": 1200
},
{
"epoch": 1.5170670037926675,
"eval_accuracy": 0.7958818263205013,
"eval_loss": 0.4617547392845154,
"eval_runtime": 75.1188,
"eval_samples_per_second": 14.87,
"eval_steps_per_second": 1.864,
"step": 1200
},
{
"epoch": 1.6434892541087232,
"grad_norm": 2.6984634399414062,
"learning_rate": 7e-05,
"loss": 0.4709,
"step": 1300
},
{
"epoch": 1.6434892541087232,
"eval_accuracy": 0.8281110116383169,
"eval_loss": 0.39056020975112915,
"eval_runtime": 74.801,
"eval_samples_per_second": 14.933,
"eval_steps_per_second": 1.872,
"step": 1300
},
{
"epoch": 1.7699115044247788,
"grad_norm": 2.939568281173706,
"learning_rate": 6e-05,
"loss": 0.4624,
"step": 1400
},
{
"epoch": 1.7699115044247788,
"eval_accuracy": 0.8128916741271263,
"eval_loss": 0.4208226203918457,
"eval_runtime": 77.4109,
"eval_samples_per_second": 14.429,
"eval_steps_per_second": 1.809,
"step": 1400
},
{
"epoch": 1.8963337547408345,
"grad_norm": 1.791272759437561,
"learning_rate": 5e-05,
"loss": 0.4677,
"step": 1500
},
{
"epoch": 1.8963337547408345,
"eval_accuracy": 0.8173679498657117,
"eval_loss": 0.4207296073436737,
"eval_runtime": 76.4178,
"eval_samples_per_second": 14.617,
"eval_steps_per_second": 1.832,
"step": 1500
},
{
"epoch": 2.02275600505689,
"grad_norm": 1.7240327596664429,
"learning_rate": 4e-05,
"loss": 0.4478,
"step": 1600
},
{
"epoch": 2.02275600505689,
"eval_accuracy": 0.8478066248880931,
"eval_loss": 0.35574597120285034,
"eval_runtime": 75.4802,
"eval_samples_per_second": 14.799,
"eval_steps_per_second": 1.855,
"step": 1600
},
{
"epoch": 2.1491782553729455,
"grad_norm": 3.029090642929077,
"learning_rate": 3e-05,
"loss": 0.4451,
"step": 1700
},
{
"epoch": 2.1491782553729455,
"eval_accuracy": 0.8442256042972247,
"eval_loss": 0.3545984923839569,
"eval_runtime": 75.7957,
"eval_samples_per_second": 14.737,
"eval_steps_per_second": 1.847,
"step": 1700
},
{
"epoch": 2.275600505689001,
"grad_norm": 2.259213447570801,
"learning_rate": 2e-05,
"loss": 0.3796,
"step": 1800
},
{
"epoch": 2.275600505689001,
"eval_accuracy": 0.8719785138764548,
"eval_loss": 0.3198860287666321,
"eval_runtime": 74.5384,
"eval_samples_per_second": 14.986,
"eval_steps_per_second": 1.878,
"step": 1800
},
{
"epoch": 2.402022756005057,
"grad_norm": 2.9328560829162598,
"learning_rate": 1e-05,
"loss": 0.4358,
"step": 1900
},
{
"epoch": 2.402022756005057,
"eval_accuracy": 0.8603401969561325,
"eval_loss": 0.33084791898727417,
"eval_runtime": 76.0815,
"eval_samples_per_second": 14.682,
"eval_steps_per_second": 1.84,
"step": 1900
},
{
"epoch": 2.5284450063211126,
"grad_norm": 1.4755433797836304,
"learning_rate": 0.0,
"loss": 0.3373,
"step": 2000
},
{
"epoch": 2.5284450063211126,
"eval_accuracy": 0.8540734109221128,
"eval_loss": 0.34551766514778137,
"eval_runtime": 75.3964,
"eval_samples_per_second": 14.815,
"eval_steps_per_second": 1.857,
"step": 2000
},
{
"epoch": 2.5284450063211126,
"step": 2000,
"total_flos": 1.2397168498542428e+18,
"train_loss": 0.49920871353149415,
"train_runtime": 3192.0436,
"train_samples_per_second": 5.012,
"train_steps_per_second": 0.627
}
],
"logging_steps": 100,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2397168498542428e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}