File size: 2,165 Bytes
b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d b2d35fb b8d192d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9891196834817013,
"eval_steps": 20,
"global_step": 40,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 1.9969173337331283e-05,
"loss": 1.6723,
"step": 1
},
{
"epoch": 0.12,
"learning_rate": 1.9238795325112867e-05,
"loss": 1.4829,
"step": 5
},
{
"epoch": 0.25,
"learning_rate": 1.7071067811865477e-05,
"loss": 1.3734,
"step": 10
},
{
"epoch": 0.37,
"learning_rate": 1.3826834323650899e-05,
"loss": 1.3486,
"step": 15
},
{
"epoch": 0.49,
"learning_rate": 1e-05,
"loss": 1.3274,
"step": 20
},
{
"epoch": 0.49,
"eval_loss": 1.258691668510437,
"eval_runtime": 2.1716,
"eval_samples_per_second": 4.605,
"eval_steps_per_second": 2.302,
"step": 20
},
{
"epoch": 0.62,
"learning_rate": 6.173165676349103e-06,
"loss": 1.2978,
"step": 25
},
{
"epoch": 0.74,
"learning_rate": 2.9289321881345257e-06,
"loss": 1.3259,
"step": 30
},
{
"epoch": 0.87,
"learning_rate": 7.612046748871327e-07,
"loss": 1.2801,
"step": 35
},
{
"epoch": 0.99,
"learning_rate": 0.0,
"loss": 1.3066,
"step": 40
},
{
"epoch": 0.99,
"eval_loss": 1.2477926015853882,
"eval_runtime": 2.1661,
"eval_samples_per_second": 4.617,
"eval_steps_per_second": 2.308,
"step": 40
},
{
"epoch": 0.99,
"step": 40,
"total_flos": 2.6717900760940544e+16,
"train_loss": 1.3475643575191498,
"train_runtime": 1594.7957,
"train_samples_per_second": 1.268,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 40,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 2.6717900760940544e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|