{ "best_metric": 0.8177282214164734, "best_model_checkpoint": "saves/BLOOM-7B/lora/train_1/checkpoint-140", "epoch": 0.20782061799289034, "eval_steps": 10, "global_step": 190, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010937927262783703, "grad_norm": 0.988913357257843, "learning_rate": 0.00029999015487222375, "loss": 1.7338, "step": 10 }, { "epoch": 0.010937927262783703, "eval_loss": 0.9125174283981323, "eval_runtime": 2020.5977, "eval_samples_per_second": 23.32, "eval_steps_per_second": 0.729, "step": 10 }, { "epoch": 0.021875854525567406, "grad_norm": 0.40076708793640137, "learning_rate": 0.00029996062078124905, "loss": 0.6341, "step": 20 }, { "epoch": 0.021875854525567406, "eval_loss": 0.8568164706230164, "eval_runtime": 2020.8868, "eval_samples_per_second": 23.316, "eval_steps_per_second": 0.729, "step": 20 }, { "epoch": 0.03281378178835111, "grad_norm": 0.2821263074874878, "learning_rate": 0.0002999114016039678, "loss": 0.5526, "step": 30 }, { "epoch": 0.03281378178835111, "eval_loss": 0.899090051651001, "eval_runtime": 2020.647, "eval_samples_per_second": 23.319, "eval_steps_per_second": 0.729, "step": 30 }, { "epoch": 0.04375170905113481, "grad_norm": 0.34677910804748535, "learning_rate": 0.00029984250380130117, "loss": 0.5651, "step": 40 }, { "epoch": 0.04375170905113481, "eval_loss": 0.894440770149231, "eval_runtime": 2020.4948, "eval_samples_per_second": 23.321, "eval_steps_per_second": 0.729, "step": 40 }, { "epoch": 0.05468963631391851, "grad_norm": 0.36296069622039795, "learning_rate": 0.0002997539364173515, "loss": 0.5392, "step": 50 }, { "epoch": 0.05468963631391851, "eval_loss": 0.8795871138572693, "eval_runtime": 2020.3744, "eval_samples_per_second": 23.322, "eval_steps_per_second": 0.729, "step": 50 }, { "epoch": 0.06562756357670221, "grad_norm": 0.3470546305179596, "learning_rate": 0.00029964571107821494, "loss": 0.5038, "step": 60 }, { "epoch": 0.06562756357670221, "eval_loss": 0.8612168431282043, "eval_runtime": 2020.1194, "eval_samples_per_second": 23.325, "eval_steps_per_second": 0.729, "step": 60 }, { "epoch": 0.07656549083948591, "grad_norm": 0.4996536374092102, "learning_rate": 0.00029951784199045534, "loss": 0.4904, "step": 70 }, { "epoch": 0.07656549083948591, "eval_loss": 0.8335446715354919, "eval_runtime": 2020.0047, "eval_samples_per_second": 23.327, "eval_steps_per_second": 0.729, "step": 70 }, { "epoch": 0.08750341810226962, "grad_norm": 0.4522307515144348, "learning_rate": 0.0002993703459392396, "loss": 0.476, "step": 80 }, { "epoch": 0.08750341810226962, "eval_loss": 0.8787203431129456, "eval_runtime": 2021.176, "eval_samples_per_second": 23.313, "eval_steps_per_second": 0.729, "step": 80 }, { "epoch": 0.09844134536505332, "grad_norm": 0.4299957752227783, "learning_rate": 0.00029920324228613376, "loss": 0.4819, "step": 90 }, { "epoch": 0.09844134536505332, "eval_loss": 0.8441948294639587, "eval_runtime": 2020.7328, "eval_samples_per_second": 23.318, "eval_steps_per_second": 0.729, "step": 90 }, { "epoch": 0.10937927262783702, "grad_norm": 0.49023598432540894, "learning_rate": 0.0002990165529665622, "loss": 0.4376, "step": 100 }, { "epoch": 0.10937927262783702, "eval_loss": 0.8534411787986755, "eval_runtime": 2019.4689, "eval_samples_per_second": 23.333, "eval_steps_per_second": 0.729, "step": 100 }, { "epoch": 0.12031719989062073, "grad_norm": 0.4622706174850464, "learning_rate": 0.0002988103024869277, "loss": 0.4443, "step": 110 }, { "epoch": 0.12031719989062073, "eval_loss": 0.8330591917037964, "eval_runtime": 2020.301, "eval_samples_per_second": 23.323, "eval_steps_per_second": 0.729, "step": 110 }, { "epoch": 0.13125512715340443, "grad_norm": 0.4706326425075531, "learning_rate": 0.00029858451792139453, "loss": 0.44, "step": 120 }, { "epoch": 0.13125512715340443, "eval_loss": 0.8530427813529968, "eval_runtime": 2020.3749, "eval_samples_per_second": 23.322, "eval_steps_per_second": 0.729, "step": 120 }, { "epoch": 0.14219305441618812, "grad_norm": 0.3258361220359802, "learning_rate": 0.0002983392289083346, "loss": 0.4362, "step": 130 }, { "epoch": 0.14219305441618812, "eval_loss": 0.8594123125076294, "eval_runtime": 2019.0126, "eval_samples_per_second": 23.338, "eval_steps_per_second": 0.73, "step": 130 }, { "epoch": 0.15313098167897182, "grad_norm": 0.2880217134952545, "learning_rate": 0.0002980744676464371, "loss": 0.4273, "step": 140 }, { "epoch": 0.15313098167897182, "eval_loss": 0.8177282214164734, "eval_runtime": 2019.0397, "eval_samples_per_second": 23.338, "eval_steps_per_second": 0.73, "step": 140 }, { "epoch": 0.16406890894175555, "grad_norm": 0.357930451631546, "learning_rate": 0.0002977902688904813, "loss": 0.438, "step": 150 }, { "epoch": 0.16406890894175555, "eval_loss": 0.8449589610099792, "eval_runtime": 2020.3435, "eval_samples_per_second": 23.323, "eval_steps_per_second": 0.729, "step": 150 }, { "epoch": 0.17500683620453925, "grad_norm": 0.3165152072906494, "learning_rate": 0.00029748666994677467, "loss": 0.4234, "step": 160 }, { "epoch": 0.17500683620453925, "eval_loss": 0.8483649492263794, "eval_runtime": 2020.2184, "eval_samples_per_second": 23.324, "eval_steps_per_second": 0.729, "step": 160 }, { "epoch": 0.18594476346732294, "grad_norm": 0.31138718128204346, "learning_rate": 0.00029716371066825593, "loss": 0.4254, "step": 170 }, { "epoch": 0.18594476346732294, "eval_loss": 0.826312243938446, "eval_runtime": 2020.0047, "eval_samples_per_second": 23.327, "eval_steps_per_second": 0.729, "step": 170 }, { "epoch": 0.19688269073010664, "grad_norm": 0.36661282181739807, "learning_rate": 0.0002968214334492632, "loss": 0.4074, "step": 180 }, { "epoch": 0.19688269073010664, "eval_loss": 0.8609428405761719, "eval_runtime": 2020.6157, "eval_samples_per_second": 23.32, "eval_steps_per_second": 0.729, "step": 180 }, { "epoch": 0.20782061799289034, "grad_norm": 0.44822070002555847, "learning_rate": 0.00029645988321996917, "loss": 0.4209, "step": 190 }, { "epoch": 0.20782061799289034, "eval_loss": 0.8370873332023621, "eval_runtime": 2020.0421, "eval_samples_per_second": 23.326, "eval_steps_per_second": 0.729, "step": 190 }, { "epoch": 0.20782061799289034, "step": 190, "total_flos": 2.7046982162212454e+17, "train_loss": 0.5409229328757839, "train_runtime": 46907.2892, "train_samples_per_second": 14.968, "train_steps_per_second": 0.058 } ], "logging_steps": 10, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7046982162212454e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }