File size: 2,271 Bytes
b8d192d b530618 b8d192d b530618 b8d192d b530618 b2d35fb b8d192d b530618 b2d35fb b8d192d b530618 be86fd3 b530618 be86fd3 b530618 be86fd3 b530618 be86fd3 b530618 be86fd3 b530618 be86fd3 b530618 be86fd3 b530618 b8d192d be86fd3 b530618 b8d192d b2d35fb b530618 b8d192d b2d35fb b530618 b8d192d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9961568024596464,
"eval_steps": 10,
"global_step": 36,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 3.9923893961834914e-05,
"loss": 1.3268,
"step": 1
},
{
"epoch": 0.14,
"learning_rate": 3.812615574073301e-05,
"loss": 1.2548,
"step": 5
},
{
"epoch": 0.28,
"learning_rate": 3.285575219373079e-05,
"loss": 1.1912,
"step": 10
},
{
"epoch": 0.28,
"eval_loss": 1.109934687614441,
"eval_runtime": 9.2946,
"eval_samples_per_second": 5.057,
"eval_steps_per_second": 1.291,
"step": 10
},
{
"epoch": 0.42,
"learning_rate": 2.5176380902050418e-05,
"loss": 1.1433,
"step": 15
},
{
"epoch": 0.55,
"learning_rate": 1.6527036446661396e-05,
"loss": 1.1238,
"step": 20
},
{
"epoch": 0.55,
"eval_loss": 1.065536379814148,
"eval_runtime": 9.2887,
"eval_samples_per_second": 5.06,
"eval_steps_per_second": 1.292,
"step": 20
},
{
"epoch": 0.69,
"learning_rate": 8.528471272979083e-06,
"loss": 1.1102,
"step": 25
},
{
"epoch": 0.83,
"learning_rate": 2.679491924311226e-06,
"loss": 1.1258,
"step": 30
},
{
"epoch": 0.83,
"eval_loss": 1.055001974105835,
"eval_runtime": 9.2909,
"eval_samples_per_second": 5.059,
"eval_steps_per_second": 1.292,
"step": 30
},
{
"epoch": 0.97,
"learning_rate": 7.61060381650891e-08,
"loss": 1.1272,
"step": 35
},
{
"epoch": 1.0,
"step": 36,
"total_flos": 1.0010669722946765e+17,
"train_loss": 1.1544433269235823,
"train_runtime": 5078.5964,
"train_samples_per_second": 1.537,
"train_steps_per_second": 0.007
}
],
"logging_steps": 5,
"max_steps": 36,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 20,
"total_flos": 1.0010669722946765e+17,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}
|