File size: 3,435 Bytes
1e5d4b0 da6644b ecb490e da6644b 1e5d4b0 68e2142 1e5d4b0 ecb490e 68e2142 ecb490e 5663cfb 4590fb1 da6644b 1e5d4b0 ecb490e 1e5d4b0 ecb490e 1e5d4b0 da6644b 1e5d4b0 da6644b 1e5d4b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.14450867052023122,
"eval_steps": 10,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002890173410404624,
"eval_loss": NaN,
"eval_runtime": 21.3252,
"eval_samples_per_second": 6.846,
"eval_steps_per_second": 3.423,
"step": 1
},
{
"epoch": 0.014450867052023121,
"grad_norm": NaN,
"learning_rate": 5e-05,
"loss": 0.0,
"step": 5
},
{
"epoch": 0.028901734104046242,
"grad_norm": NaN,
"learning_rate": 0.0001,
"loss": 0.0,
"step": 10
},
{
"epoch": 0.028901734104046242,
"eval_loss": NaN,
"eval_runtime": 19.2525,
"eval_samples_per_second": 7.583,
"eval_steps_per_second": 3.792,
"step": 10
},
{
"epoch": 0.04335260115606936,
"grad_norm": NaN,
"learning_rate": 9.619397662556435e-05,
"loss": 0.0,
"step": 15
},
{
"epoch": 0.057803468208092484,
"grad_norm": NaN,
"learning_rate": 8.535533905932738e-05,
"loss": 0.0,
"step": 20
},
{
"epoch": 0.057803468208092484,
"eval_loss": NaN,
"eval_runtime": 25.0682,
"eval_samples_per_second": 5.824,
"eval_steps_per_second": 2.912,
"step": 20
},
{
"epoch": 0.07225433526011561,
"grad_norm": NaN,
"learning_rate": 6.91341716182545e-05,
"loss": 0.0,
"step": 25
},
{
"epoch": 0.08670520231213873,
"grad_norm": NaN,
"learning_rate": 5e-05,
"loss": 0.0,
"step": 30
},
{
"epoch": 0.08670520231213873,
"eval_loss": NaN,
"eval_runtime": 19.2346,
"eval_samples_per_second": 7.59,
"eval_steps_per_second": 3.795,
"step": 30
},
{
"epoch": 0.10115606936416185,
"grad_norm": NaN,
"learning_rate": 3.086582838174551e-05,
"loss": 0.0,
"step": 35
},
{
"epoch": 0.11560693641618497,
"grad_norm": NaN,
"learning_rate": 1.4644660940672627e-05,
"loss": 0.0,
"step": 40
},
{
"epoch": 0.11560693641618497,
"eval_loss": NaN,
"eval_runtime": 19.2386,
"eval_samples_per_second": 7.589,
"eval_steps_per_second": 3.794,
"step": 40
},
{
"epoch": 0.13005780346820808,
"grad_norm": NaN,
"learning_rate": 3.8060233744356633e-06,
"loss": 0.0,
"step": 45
},
{
"epoch": 0.14450867052023122,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 50
},
{
"epoch": 0.14450867052023122,
"eval_loss": NaN,
"eval_runtime": 19.3451,
"eval_samples_per_second": 7.547,
"eval_steps_per_second": 3.774,
"step": 50
}
],
"logging_steps": 5,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 13,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3234875128676352.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|