Tukan-1.1B-Chat-v0.1 / trainer_state.json
alexredna's picture
Model save
be86fd3 verified
raw
history blame
6.16 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9996412770536889,
"eval_steps": 80,
"global_step": 209,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.9997740569453936e-05,
"loss": 1.3377,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 3.9943539757443494e-05,
"loss": 1.1879,
"step": 5
},
{
"epoch": 0.05,
"learning_rate": 3.97744778056729e-05,
"loss": 1.1238,
"step": 10
},
{
"epoch": 0.07,
"learning_rate": 3.949376867256863e-05,
"loss": 1.0839,
"step": 15
},
{
"epoch": 0.1,
"learning_rate": 3.9102997248704994e-05,
"loss": 1.0787,
"step": 20
},
{
"epoch": 0.12,
"learning_rate": 3.8604369839019515e-05,
"loss": 1.0363,
"step": 25
},
{
"epoch": 0.14,
"learning_rate": 3.800070170596182e-05,
"loss": 1.0079,
"step": 30
},
{
"epoch": 0.17,
"learning_rate": 3.729540117445352e-05,
"loss": 1.0173,
"step": 35
},
{
"epoch": 0.19,
"learning_rate": 3.6492450388403034e-05,
"loss": 1.0162,
"step": 40
},
{
"epoch": 0.22,
"learning_rate": 3.559638282742449e-05,
"loss": 1.0247,
"step": 45
},
{
"epoch": 0.24,
"learning_rate": 3.461225771070188e-05,
"loss": 1.0066,
"step": 50
},
{
"epoch": 0.26,
"learning_rate": 3.354563143251483e-05,
"loss": 0.9969,
"step": 55
},
{
"epoch": 0.29,
"learning_rate": 3.2402526190701667e-05,
"loss": 1.0205,
"step": 60
},
{
"epoch": 0.31,
"learning_rate": 3.1189395985184464e-05,
"loss": 0.9882,
"step": 65
},
{
"epoch": 0.33,
"learning_rate": 2.9913090178528815e-05,
"loss": 0.9749,
"step": 70
},
{
"epoch": 0.36,
"learning_rate": 2.858081482427673e-05,
"loss": 0.9849,
"step": 75
},
{
"epoch": 0.38,
"learning_rate": 2.7200091981393524e-05,
"loss": 0.9699,
"step": 80
},
{
"epoch": 0.38,
"eval_loss": 1.043211817741394,
"eval_runtime": 26.2196,
"eval_samples_per_second": 4.767,
"eval_steps_per_second": 1.602,
"step": 80
},
{
"epoch": 0.41,
"learning_rate": 2.577871724454045e-05,
"loss": 0.9874,
"step": 85
},
{
"epoch": 0.43,
"learning_rate": 2.4324715729958146e-05,
"loss": 0.9723,
"step": 90
},
{
"epoch": 0.45,
"learning_rate": 2.2846296765465708e-05,
"loss": 0.9884,
"step": 95
},
{
"epoch": 0.48,
"learning_rate": 2.1351807540396666e-05,
"loss": 0.9596,
"step": 100
},
{
"epoch": 0.5,
"learning_rate": 1.9849685977165566e-05,
"loss": 0.9784,
"step": 105
},
{
"epoch": 0.53,
"learning_rate": 1.8348413090553356e-05,
"loss": 0.9715,
"step": 110
},
{
"epoch": 0.55,
"learning_rate": 1.6856465103692203e-05,
"loss": 0.9627,
"step": 115
},
{
"epoch": 0.57,
"learning_rate": 1.5382265591104088e-05,
"loss": 0.96,
"step": 120
},
{
"epoch": 0.6,
"learning_rate": 1.3934137918994753e-05,
"loss": 0.9743,
"step": 125
},
{
"epoch": 0.62,
"learning_rate": 1.2520258251326212e-05,
"loss": 0.9661,
"step": 130
},
{
"epoch": 0.65,
"learning_rate": 1.1148609386996692e-05,
"loss": 0.9676,
"step": 135
},
{
"epoch": 0.67,
"learning_rate": 9.826935688764434e-06,
"loss": 0.9842,
"step": 140
},
{
"epoch": 0.69,
"learning_rate": 8.562699358387723e-06,
"loss": 0.9628,
"step": 145
},
{
"epoch": 0.72,
"learning_rate": 7.3630383048527255e-06,
"loss": 0.9603,
"step": 150
},
{
"epoch": 0.74,
"learning_rate": 6.234725843566269e-06,
"loss": 0.9626,
"step": 155
},
{
"epoch": 0.77,
"learning_rate": 5.184132454052731e-06,
"loss": 0.9576,
"step": 160
},
{
"epoch": 0.77,
"eval_loss": 1.0250179767608643,
"eval_runtime": 26.1866,
"eval_samples_per_second": 4.773,
"eval_steps_per_second": 1.604,
"step": 160
},
{
"epoch": 0.79,
"learning_rate": 4.217189812072131e-06,
"loss": 0.9659,
"step": 165
},
{
"epoch": 0.81,
"learning_rate": 3.3393572992349156e-06,
"loss": 0.9655,
"step": 170
},
{
"epoch": 0.84,
"learning_rate": 2.5555911792009624e-06,
"loss": 0.9501,
"step": 175
},
{
"epoch": 0.86,
"learning_rate": 1.8703166144947427e-06,
"loss": 0.9754,
"step": 180
},
{
"epoch": 0.88,
"learning_rate": 1.2874026819303698e-06,
"loss": 0.9497,
"step": 185
},
{
"epoch": 0.91,
"learning_rate": 8.101405277100549e-07,
"loss": 0.9477,
"step": 190
},
{
"epoch": 0.93,
"learning_rate": 4.412247855328322e-07,
"loss": 0.9624,
"step": 195
},
{
"epoch": 0.96,
"learning_rate": 1.8273836262732824e-07,
"loss": 0.959,
"step": 200
},
{
"epoch": 0.98,
"learning_rate": 3.614067960701961e-08,
"loss": 0.9523,
"step": 205
},
{
"epoch": 1.0,
"step": 209,
"total_flos": 3.512657938470666e+17,
"train_loss": 0.5025384334856243,
"train_runtime": 8965.5847,
"train_samples_per_second": 2.798,
"train_steps_per_second": 0.023
}
],
"logging_steps": 5,
"max_steps": 209,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 3.512657938470666e+17,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}