{ "best_metric": 3.3192038536071777, "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_de_sft_100000samples_-1vocab_original-frozen/checkpoint-25000", "epoch": 2.758772897815052, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.28, "l1_reg_loss": 0.0, "learning_rate": 9e-05, "loss": 3.8882, "step": 2500 }, { "epoch": 0.55, "l1_reg_loss": 0.0, "learning_rate": 8e-05, "loss": 3.6037, "step": 5000 }, { "epoch": 0.55, "eval_loss": 3.5761282444000244, "eval_runtime": 397.3988, "eval_samples_per_second": 9.592, "eval_steps_per_second": 4.796, "step": 5000 }, { "epoch": 0.83, "l1_reg_loss": 0.0001, "learning_rate": 7e-05, "loss": 3.4802, "step": 7500 }, { "epoch": 1.1, "l1_reg_loss": 0.0001, "learning_rate": 6e-05, "loss": 3.4086, "step": 10000 }, { "epoch": 1.1, "eval_loss": 3.4271042346954346, "eval_runtime": 398.9002, "eval_samples_per_second": 9.556, "eval_steps_per_second": 4.778, "step": 10000 }, { "epoch": 1.38, "l1_reg_loss": 0.0001, "learning_rate": 5e-05, "loss": 3.343, "step": 12500 }, { "epoch": 1.66, "l1_reg_loss": 0.0001, "learning_rate": 4e-05, "loss": 3.309, "step": 15000 }, { "epoch": 1.66, "eval_loss": 3.3605945110321045, "eval_runtime": 398.3343, "eval_samples_per_second": 9.57, "eval_steps_per_second": 4.785, "step": 15000 }, { "epoch": 1.93, "l1_reg_loss": 0.0001, "learning_rate": 3e-05, "loss": 3.2949, "step": 17500 }, { "epoch": 2.21, "l1_reg_loss": 0.0001, "learning_rate": 2e-05, "loss": 3.2684, "step": 20000 }, { "epoch": 2.21, "eval_loss": 3.3289120197296143, "eval_runtime": 398.231, "eval_samples_per_second": 9.572, "eval_steps_per_second": 4.786, "step": 20000 }, { "epoch": 2.48, "l1_reg_loss": 0.0001, "learning_rate": 1e-05, "loss": 3.2569, "step": 22500 }, { "epoch": 2.76, "l1_reg_loss": 0.0001, "learning_rate": 0.0, "loss": 3.2477, "step": 25000 }, { "epoch": 2.76, "eval_loss": 3.3192038536071777, "eval_runtime": 397.9798, "eval_samples_per_second": 9.578, "eval_steps_per_second": 4.789, "step": 25000 }, { "epoch": 2.76, "step": 25000, "total_flos": 3.7147907956565606e+17, "train_loss": 3.41004751953125, "train_runtime": 60598.6579, "train_samples_per_second": 3.3, "train_steps_per_second": 0.413 } ], "max_steps": 25000, "num_train_epochs": 3, "total_flos": 3.7147907956565606e+17, "trial_name": null, "trial_params": null }