|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.00353534907152895, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.767674535764475e-05, |
|
"eval_loss": 10.380178451538086, |
|
"eval_runtime": 67.8401, |
|
"eval_samples_per_second": 351.12, |
|
"eval_steps_per_second": 175.56, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00017676745357644752, |
|
"grad_norm": 0.03153292462229729, |
|
"learning_rate": 0.00019967573081342103, |
|
"loss": 10.3799, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00035353490715289503, |
|
"grad_norm": 0.025041894987225533, |
|
"learning_rate": 0.0001970941817426052, |
|
"loss": 10.3802, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0005303023607293425, |
|
"grad_norm": 0.019651275128126144, |
|
"learning_rate": 0.00019199794436588243, |
|
"loss": 10.379, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0007070698143057901, |
|
"grad_norm": 0.04715586081147194, |
|
"learning_rate": 0.0001845190085543795, |
|
"loss": 10.376, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0008838372678822375, |
|
"grad_norm": 0.04065093770623207, |
|
"learning_rate": 0.00017485107481711012, |
|
"loss": 10.3746, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0008838372678822375, |
|
"eval_loss": 10.375716209411621, |
|
"eval_runtime": 68.057, |
|
"eval_samples_per_second": 350.001, |
|
"eval_steps_per_second": 175.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.001060604721458685, |
|
"grad_norm": 0.06875745952129364, |
|
"learning_rate": 0.00016324453755953773, |
|
"loss": 10.3741, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0012373721750351325, |
|
"grad_norm": 0.08086761832237244, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 10.375, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0014141396286115801, |
|
"grad_norm": 0.06105079501867294, |
|
"learning_rate": 0.00013546048870425356, |
|
"loss": 10.3691, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0015909070821880276, |
|
"grad_norm": 0.088962621986866, |
|
"learning_rate": 0.00012000256937760445, |
|
"loss": 10.3672, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.001767674535764475, |
|
"grad_norm": 0.1441858559846878, |
|
"learning_rate": 0.00010402659401094152, |
|
"loss": 10.3632, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.001767674535764475, |
|
"eval_loss": 10.361382484436035, |
|
"eval_runtime": 68.1568, |
|
"eval_samples_per_second": 349.488, |
|
"eval_steps_per_second": 174.744, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0019444419893409226, |
|
"grad_norm": 0.09688922762870789, |
|
"learning_rate": 8.79463319744677e-05, |
|
"loss": 10.3623, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.00212120944291737, |
|
"grad_norm": 0.10141890496015549, |
|
"learning_rate": 7.217825360835473e-05, |
|
"loss": 10.3596, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0022979768964938177, |
|
"grad_norm": 0.11141142249107361, |
|
"learning_rate": 5.713074385969457e-05, |
|
"loss": 10.3546, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.002474744350070265, |
|
"grad_norm": 0.10640386492013931, |
|
"learning_rate": 4.3193525326884435e-05, |
|
"loss": 10.3534, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0026515118036467126, |
|
"grad_norm": 0.06903336197137833, |
|
"learning_rate": 3.072756464904006e-05, |
|
"loss": 10.3528, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0026515118036467126, |
|
"eval_loss": 10.354546546936035, |
|
"eval_runtime": 68.7013, |
|
"eval_samples_per_second": 346.718, |
|
"eval_steps_per_second": 173.359, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0028282792572231603, |
|
"grad_norm": 0.09616001695394516, |
|
"learning_rate": 2.0055723659649904e-05, |
|
"loss": 10.3559, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0030050467107996075, |
|
"grad_norm": 0.11327233165502548, |
|
"learning_rate": 1.1454397434679021e-05, |
|
"loss": 10.3541, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.003181814164376055, |
|
"grad_norm": 0.06384103745222092, |
|
"learning_rate": 5.146355805285452e-06, |
|
"loss": 10.3542, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0033585816179525028, |
|
"grad_norm": 0.08369371294975281, |
|
"learning_rate": 1.2949737362087156e-06, |
|
"loss": 10.3538, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.00353534907152895, |
|
"grad_norm": 0.1022939532995224, |
|
"learning_rate": 0.0, |
|
"loss": 10.354, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.00353534907152895, |
|
"eval_loss": 10.35390567779541, |
|
"eval_runtime": 68.4752, |
|
"eval_samples_per_second": 347.863, |
|
"eval_steps_per_second": 173.932, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3937461534720.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|