|
{ |
|
"best_metric": 0.8177282214164734, |
|
"best_model_checkpoint": "saves/BLOOM-7B/lora/train_1/checkpoint-140", |
|
"epoch": 0.20782061799289034, |
|
"eval_steps": 10, |
|
"global_step": 190, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"grad_norm": 0.988913357257843, |
|
"learning_rate": 0.00029999015487222375, |
|
"loss": 1.7338, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"eval_loss": 0.9125174283981323, |
|
"eval_runtime": 2020.5977, |
|
"eval_samples_per_second": 23.32, |
|
"eval_steps_per_second": 0.729, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"grad_norm": 0.40076708793640137, |
|
"learning_rate": 0.00029996062078124905, |
|
"loss": 0.6341, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"eval_loss": 0.8568164706230164, |
|
"eval_runtime": 2020.8868, |
|
"eval_samples_per_second": 23.316, |
|
"eval_steps_per_second": 0.729, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"grad_norm": 0.2821263074874878, |
|
"learning_rate": 0.0002999114016039678, |
|
"loss": 0.5526, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"eval_loss": 0.899090051651001, |
|
"eval_runtime": 2020.647, |
|
"eval_samples_per_second": 23.319, |
|
"eval_steps_per_second": 0.729, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"grad_norm": 0.34677910804748535, |
|
"learning_rate": 0.00029984250380130117, |
|
"loss": 0.5651, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"eval_loss": 0.894440770149231, |
|
"eval_runtime": 2020.4948, |
|
"eval_samples_per_second": 23.321, |
|
"eval_steps_per_second": 0.729, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"grad_norm": 0.36296069622039795, |
|
"learning_rate": 0.0002997539364173515, |
|
"loss": 0.5392, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"eval_loss": 0.8795871138572693, |
|
"eval_runtime": 2020.3744, |
|
"eval_samples_per_second": 23.322, |
|
"eval_steps_per_second": 0.729, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"grad_norm": 0.3470546305179596, |
|
"learning_rate": 0.00029964571107821494, |
|
"loss": 0.5038, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"eval_loss": 0.8612168431282043, |
|
"eval_runtime": 2020.1194, |
|
"eval_samples_per_second": 23.325, |
|
"eval_steps_per_second": 0.729, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"grad_norm": 0.4996536374092102, |
|
"learning_rate": 0.00029951784199045534, |
|
"loss": 0.4904, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"eval_loss": 0.8335446715354919, |
|
"eval_runtime": 2020.0047, |
|
"eval_samples_per_second": 23.327, |
|
"eval_steps_per_second": 0.729, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"grad_norm": 0.4522307515144348, |
|
"learning_rate": 0.0002993703459392396, |
|
"loss": 0.476, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"eval_loss": 0.8787203431129456, |
|
"eval_runtime": 2021.176, |
|
"eval_samples_per_second": 23.313, |
|
"eval_steps_per_second": 0.729, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"grad_norm": 0.4299957752227783, |
|
"learning_rate": 0.00029920324228613376, |
|
"loss": 0.4819, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"eval_loss": 0.8441948294639587, |
|
"eval_runtime": 2020.7328, |
|
"eval_samples_per_second": 23.318, |
|
"eval_steps_per_second": 0.729, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"grad_norm": 0.49023598432540894, |
|
"learning_rate": 0.0002990165529665622, |
|
"loss": 0.4376, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"eval_loss": 0.8534411787986755, |
|
"eval_runtime": 2019.4689, |
|
"eval_samples_per_second": 23.333, |
|
"eval_steps_per_second": 0.729, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"grad_norm": 0.4622706174850464, |
|
"learning_rate": 0.0002988103024869277, |
|
"loss": 0.4443, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"eval_loss": 0.8330591917037964, |
|
"eval_runtime": 2020.301, |
|
"eval_samples_per_second": 23.323, |
|
"eval_steps_per_second": 0.729, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"grad_norm": 0.4706326425075531, |
|
"learning_rate": 0.00029858451792139453, |
|
"loss": 0.44, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"eval_loss": 0.8530427813529968, |
|
"eval_runtime": 2020.3749, |
|
"eval_samples_per_second": 23.322, |
|
"eval_steps_per_second": 0.729, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"grad_norm": 0.3258361220359802, |
|
"learning_rate": 0.0002983392289083346, |
|
"loss": 0.4362, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"eval_loss": 0.8594123125076294, |
|
"eval_runtime": 2019.0126, |
|
"eval_samples_per_second": 23.338, |
|
"eval_steps_per_second": 0.73, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"grad_norm": 0.2880217134952545, |
|
"learning_rate": 0.0002980744676464371, |
|
"loss": 0.4273, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"eval_loss": 0.8177282214164734, |
|
"eval_runtime": 2019.0397, |
|
"eval_samples_per_second": 23.338, |
|
"eval_steps_per_second": 0.73, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"grad_norm": 0.357930451631546, |
|
"learning_rate": 0.0002977902688904813, |
|
"loss": 0.438, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"eval_loss": 0.8449589610099792, |
|
"eval_runtime": 2020.3435, |
|
"eval_samples_per_second": 23.323, |
|
"eval_steps_per_second": 0.729, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"grad_norm": 0.3165152072906494, |
|
"learning_rate": 0.00029748666994677467, |
|
"loss": 0.4234, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"eval_loss": 0.8483649492263794, |
|
"eval_runtime": 2020.2184, |
|
"eval_samples_per_second": 23.324, |
|
"eval_steps_per_second": 0.729, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18594476346732294, |
|
"grad_norm": 0.31138718128204346, |
|
"learning_rate": 0.00029716371066825593, |
|
"loss": 0.4254, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18594476346732294, |
|
"eval_loss": 0.826312243938446, |
|
"eval_runtime": 2020.0047, |
|
"eval_samples_per_second": 23.327, |
|
"eval_steps_per_second": 0.729, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19688269073010664, |
|
"grad_norm": 0.36661282181739807, |
|
"learning_rate": 0.0002968214334492632, |
|
"loss": 0.4074, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19688269073010664, |
|
"eval_loss": 0.8609428405761719, |
|
"eval_runtime": 2020.6157, |
|
"eval_samples_per_second": 23.32, |
|
"eval_steps_per_second": 0.729, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"grad_norm": 0.44822070002555847, |
|
"learning_rate": 0.00029645988321996917, |
|
"loss": 0.4209, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"eval_loss": 0.8370873332023621, |
|
"eval_runtime": 2020.0421, |
|
"eval_samples_per_second": 23.326, |
|
"eval_steps_per_second": 0.729, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"step": 190, |
|
"total_flos": 2.7046982162212454e+17, |
|
"train_loss": 0.5409229328757839, |
|
"train_runtime": 46907.2892, |
|
"train_samples_per_second": 14.968, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2742, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.7046982162212454e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|