FT_3 / checkpoint-200 /trainer_state.json
sheepy928's picture
Training in progress, step 200, checkpoint
8232258
raw
history blame
6.96 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0638297872340425,
"eval_steps": 20,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 0.0004995563442768412,
"loss": 1.2913,
"step": 10
},
{
"epoch": 0.11,
"learning_rate": 0.0004986690328305235,
"loss": 1.1578,
"step": 20
},
{
"epoch": 0.11,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.7813256978988647,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.7155,
"eval_samples_per_second": 262.444,
"eval_steps_per_second": 8.223,
"step": 20
},
{
"epoch": 0.16,
"learning_rate": 0.0004977817213842058,
"loss": 0.8742,
"step": 30
},
{
"epoch": 0.21,
"learning_rate": 0.0004968944099378882,
"loss": 0.7537,
"step": 40
},
{
"epoch": 0.21,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.792127251625061,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.6846,
"eval_samples_per_second": 263.869,
"eval_steps_per_second": 8.268,
"step": 40
},
{
"epoch": 0.27,
"learning_rate": 0.0004960070984915705,
"loss": 0.8076,
"step": 50
},
{
"epoch": 0.32,
"learning_rate": 0.0004951197870452529,
"loss": 0.7436,
"step": 60
},
{
"epoch": 0.32,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.7419535517692566,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.7977,
"eval_samples_per_second": 258.725,
"eval_steps_per_second": 8.107,
"step": 60
},
{
"epoch": 0.37,
"learning_rate": 0.0004942324755989353,
"loss": 0.7465,
"step": 70
},
{
"epoch": 0.43,
"learning_rate": 0.0004933451641526176,
"loss": 0.6516,
"step": 80
},
{
"epoch": 0.43,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.7484750747680664,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.7051,
"eval_samples_per_second": 262.923,
"eval_steps_per_second": 8.238,
"step": 80
},
{
"epoch": 0.48,
"learning_rate": 0.0004924578527063,
"loss": 0.9634,
"step": 90
},
{
"epoch": 0.53,
"learning_rate": 0.0004915705412599822,
"loss": 0.8011,
"step": 100
},
{
"epoch": 0.53,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.7428026795387268,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.7042,
"eval_samples_per_second": 262.962,
"eval_steps_per_second": 8.239,
"step": 100
},
{
"epoch": 0.59,
"learning_rate": 0.0004906832298136646,
"loss": 0.8691,
"step": 110
},
{
"epoch": 0.64,
"learning_rate": 0.0004897959183673469,
"loss": 0.8761,
"step": 120
},
{
"epoch": 0.64,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.7458600997924805,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.6685,
"eval_samples_per_second": 264.622,
"eval_steps_per_second": 8.292,
"step": 120
},
{
"epoch": 0.69,
"learning_rate": 0.0004889086069210293,
"loss": 0.828,
"step": 130
},
{
"epoch": 0.74,
"learning_rate": 0.00048802129547471164,
"loss": 0.8708,
"step": 140
},
{
"epoch": 0.74,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.7820696830749512,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.6616,
"eval_samples_per_second": 264.945,
"eval_steps_per_second": 8.302,
"step": 140
},
{
"epoch": 0.8,
"learning_rate": 0.000487133984028394,
"loss": 0.7489,
"step": 150
},
{
"epoch": 0.85,
"learning_rate": 0.0004862466725820763,
"loss": 0.9504,
"step": 160
},
{
"epoch": 0.85,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.7716627717018127,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.6993,
"eval_samples_per_second": 263.189,
"eval_steps_per_second": 8.247,
"step": 160
},
{
"epoch": 0.9,
"learning_rate": 0.0004853593611357587,
"loss": 0.5649,
"step": 170
},
{
"epoch": 0.96,
"learning_rate": 0.00048447204968944104,
"loss": 1.1222,
"step": 180
},
{
"epoch": 0.96,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.9907371401786804,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.6565,
"eval_samples_per_second": 265.181,
"eval_steps_per_second": 8.309,
"step": 180
},
{
"epoch": 1.01,
"learning_rate": 0.00048358473824312333,
"loss": 0.9109,
"step": 190
},
{
"epoch": 1.06,
"learning_rate": 0.00048269742679680566,
"loss": 0.7528,
"step": 200
},
{
"epoch": 1.06,
"eval_accuracy": 0.7386666666666667,
"eval_combined_score": 0.6626504648943422,
"eval_f1": 0.6276400817995911,
"eval_loss": 0.7575691938400269,
"eval_precision": 0.5456284444444445,
"eval_recall": 0.7386666666666667,
"eval_runtime": 5.6695,
"eval_samples_per_second": 264.575,
"eval_steps_per_second": 8.29,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 5640,
"num_train_epochs": 30,
"save_steps": 100,
"total_flos": 419928999985152.0,
"trial_name": null,
"trial_params": null
}