|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997490589711417, |
|
"eval_steps": 500, |
|
"global_step": 996, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.050188205771643665, |
|
"grad_norm": 2.3202156020973708, |
|
"learning_rate": 9.989427142584392e-06, |
|
"loss": 1.5003, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10037641154328733, |
|
"grad_norm": 2.314291595101664, |
|
"learning_rate": 9.870995413367397e-06, |
|
"loss": 1.3883, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15056461731493098, |
|
"grad_norm": 2.314931158603531, |
|
"learning_rate": 9.624050979896533e-06, |
|
"loss": 1.3754, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.20075282308657466, |
|
"grad_norm": 2.152003211136021, |
|
"learning_rate": 9.255109039631998e-06, |
|
"loss": 1.3628, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25094102885821834, |
|
"grad_norm": 2.1047973694759157, |
|
"learning_rate": 8.773903481118611e-06, |
|
"loss": 1.3543, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.30112923462986196, |
|
"grad_norm": 2.1025984809300318, |
|
"learning_rate": 8.193130072341872e-06, |
|
"loss": 1.3515, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35131744040150564, |
|
"grad_norm": 2.2036136599303076, |
|
"learning_rate": 7.528111505069428e-06, |
|
"loss": 1.3419, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4015056461731493, |
|
"grad_norm": 2.1361038466937563, |
|
"learning_rate": 6.796393132397829e-06, |
|
"loss": 1.3361, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.451693851944793, |
|
"grad_norm": 2.1253080705715175, |
|
"learning_rate": 6.0172800652631706e-06, |
|
"loss": 1.3336, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5018820577164367, |
|
"grad_norm": 2.094902290851612, |
|
"learning_rate": 5.211327840815459e-06, |
|
"loss": 1.321, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5520702634880803, |
|
"grad_norm": 2.086008706466924, |
|
"learning_rate": 4.399800100481858e-06, |
|
"loss": 1.3173, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6022584692597239, |
|
"grad_norm": 2.084376290248031, |
|
"learning_rate": 3.6041075859356383e-06, |
|
"loss": 1.3044, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6524466750313677, |
|
"grad_norm": 2.1873617946017525, |
|
"learning_rate": 2.845243254082134e-06, |
|
"loss": 1.3029, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7026348808030113, |
|
"grad_norm": 2.1539300794808733, |
|
"learning_rate": 2.1432284145659104e-06, |
|
"loss": 1.2977, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7528230865746549, |
|
"grad_norm": 2.100942527337154, |
|
"learning_rate": 1.5165845024934366e-06, |
|
"loss": 1.3072, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8030112923462986, |
|
"grad_norm": 2.2004509246977477, |
|
"learning_rate": 9.81844422725109e-07, |
|
"loss": 1.3014, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8531994981179423, |
|
"grad_norm": 2.148051612184209, |
|
"learning_rate": 5.531163580638483e-07, |
|
"loss": 1.2948, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.903387703889586, |
|
"grad_norm": 2.0857628452747248, |
|
"learning_rate": 2.417115494991107e-07, |
|
"loss": 1.2925, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9535759096612296, |
|
"grad_norm": 2.1477939821041936, |
|
"learning_rate": 5.584586887435739e-08, |
|
"loss": 1.2951, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9997490589711417, |
|
"step": 996, |
|
"total_flos": 3245135674474496.0, |
|
"train_loss": 1.3331998112690018, |
|
"train_runtime": 34717.8895, |
|
"train_samples_per_second": 7.345, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 996, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 420, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3245135674474496.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|