{ "best_metric": 0.9420929551124573, "best_model_checkpoint": "./outputs/checkpoint-1900", "epoch": 2.533333333333333, "eval_steps": 100, "global_step": 1900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 0.0002, "loss": 1.001, "step": 100 }, { "epoch": 0.13, "eval_loss": 1.0910990238189697, "eval_runtime": 728.6528, "eval_samples_per_second": 2.649, "eval_steps_per_second": 0.332, "step": 100 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 0.8971, "step": 200 }, { "epoch": 0.27, "eval_loss": 1.0677164793014526, "eval_runtime": 727.2702, "eval_samples_per_second": 2.654, "eval_steps_per_second": 0.333, "step": 200 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 0.8815, "step": 300 }, { "epoch": 0.4, "eval_loss": 1.052659273147583, "eval_runtime": 730.164, "eval_samples_per_second": 2.643, "eval_steps_per_second": 0.331, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.8695, "step": 400 }, { "epoch": 0.53, "eval_loss": 1.0382879972457886, "eval_runtime": 732.8796, "eval_samples_per_second": 2.633, "eval_steps_per_second": 0.33, "step": 400 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.8548, "step": 500 }, { "epoch": 0.67, "eval_loss": 1.0260452032089233, "eval_runtime": 730.1971, "eval_samples_per_second": 2.643, "eval_steps_per_second": 0.331, "step": 500 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 0.8496, "step": 600 }, { "epoch": 0.8, "eval_loss": 1.0152662992477417, "eval_runtime": 732.4802, "eval_samples_per_second": 2.635, "eval_steps_per_second": 0.33, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.8445, "step": 700 }, { "epoch": 0.93, "eval_loss": 1.0105178356170654, "eval_runtime": 802.0691, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.302, "step": 700 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.8346, "step": 800 }, { "epoch": 1.07, "eval_loss": 1.0066964626312256, "eval_runtime": 789.7964, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.306, "step": 800 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 0.8199, "step": 900 }, { "epoch": 1.2, "eval_loss": 0.9989904165267944, "eval_runtime": 786.9058, "eval_samples_per_second": 2.453, "eval_steps_per_second": 0.308, "step": 900 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.8133, "step": 1000 }, { "epoch": 1.33, "eval_loss": 0.9895688891410828, "eval_runtime": 782.3169, "eval_samples_per_second": 2.467, "eval_steps_per_second": 0.309, "step": 1000 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.8193, "step": 1100 }, { "epoch": 1.47, "eval_loss": 0.9853964447975159, "eval_runtime": 777.984, "eval_samples_per_second": 2.481, "eval_steps_per_second": 0.311, "step": 1100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.8091, "step": 1200 }, { "epoch": 1.6, "eval_loss": 0.9784607887268066, "eval_runtime": 780.7522, "eval_samples_per_second": 2.472, "eval_steps_per_second": 0.31, "step": 1200 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.7983, "step": 1300 }, { "epoch": 1.73, "eval_loss": 0.9710213541984558, "eval_runtime": 787.7479, "eval_samples_per_second": 2.45, "eval_steps_per_second": 0.307, "step": 1300 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.7971, "step": 1400 }, { "epoch": 1.87, "eval_loss": 0.9653750658035278, "eval_runtime": 777.1907, "eval_samples_per_second": 2.483, "eval_steps_per_second": 0.311, "step": 1400 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.7899, "step": 1500 }, { "epoch": 2.0, "eval_loss": 0.9598689675331116, "eval_runtime": 788.3937, "eval_samples_per_second": 2.448, "eval_steps_per_second": 0.307, "step": 1500 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.7689, "step": 1600 }, { "epoch": 2.13, "eval_loss": 0.9570510983467102, "eval_runtime": 782.2868, "eval_samples_per_second": 2.467, "eval_steps_per_second": 0.309, "step": 1600 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 0.7676, "step": 1700 }, { "epoch": 2.27, "eval_loss": 0.9513885378837585, "eval_runtime": 768.2363, "eval_samples_per_second": 2.512, "eval_steps_per_second": 0.315, "step": 1700 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.766, "step": 1800 }, { "epoch": 2.4, "eval_loss": 0.9479925036430359, "eval_runtime": 792.9294, "eval_samples_per_second": 2.434, "eval_steps_per_second": 0.305, "step": 1800 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 0.7587, "step": 1900 }, { "epoch": 2.53, "eval_loss": 0.9420929551124573, "eval_runtime": 787.6767, "eval_samples_per_second": 2.45, "eval_steps_per_second": 0.307, "step": 1900 } ], "logging_steps": 100, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "total_flos": 4.9198271845933056e+17, "trial_name": null, "trial_params": null }