{ "best_metric": 1.9596437215805054, "best_model_checkpoint": "./outputs/checkpoint-4100", "epoch": 2.987249544626594, "eval_steps": 100, "global_step": 4100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.7205, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.6170873641967773, "eval_runtime": 228.505, "eval_samples_per_second": 27.457, "eval_steps_per_second": 3.435, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 2.5807, "step": 200 }, { "epoch": 0.15, "eval_loss": 2.5663836002349854, "eval_runtime": 210.5353, "eval_samples_per_second": 29.8, "eval_steps_per_second": 3.729, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 2.5398, "step": 300 }, { "epoch": 0.22, "eval_loss": 2.531459331512451, "eval_runtime": 245.5646, "eval_samples_per_second": 25.549, "eval_steps_per_second": 3.197, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 2.5133, "step": 400 }, { "epoch": 0.29, "eval_loss": 2.497565746307373, "eval_runtime": 244.6469, "eval_samples_per_second": 25.645, "eval_steps_per_second": 3.209, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 2.4697, "step": 500 }, { "epoch": 0.36, "eval_loss": 2.4726672172546387, "eval_runtime": 244.9375, "eval_samples_per_second": 25.615, "eval_steps_per_second": 3.205, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 2.4435, "step": 600 }, { "epoch": 0.44, "eval_loss": 2.4451191425323486, "eval_runtime": 216.439, "eval_samples_per_second": 28.987, "eval_steps_per_second": 3.627, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 2.426, "step": 700 }, { "epoch": 0.51, "eval_loss": 2.4199516773223877, "eval_runtime": 206.0859, "eval_samples_per_second": 30.444, "eval_steps_per_second": 3.809, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 2.4041, "step": 800 }, { "epoch": 0.58, "eval_loss": 2.399696111679077, "eval_runtime": 205.7858, "eval_samples_per_second": 30.488, "eval_steps_per_second": 3.815, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 2.369, "step": 900 }, { "epoch": 0.66, "eval_loss": 2.380272150039673, "eval_runtime": 206.0924, "eval_samples_per_second": 30.443, "eval_steps_per_second": 3.809, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 2.3663, "step": 1000 }, { "epoch": 0.73, "eval_loss": 2.361511707305908, "eval_runtime": 206.0953, "eval_samples_per_second": 30.442, "eval_steps_per_second": 3.809, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 2.3596, "step": 1100 }, { "epoch": 0.8, "eval_loss": 2.337871551513672, "eval_runtime": 206.2542, "eval_samples_per_second": 30.419, "eval_steps_per_second": 3.806, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 2.3156, "step": 1200 }, { "epoch": 0.87, "eval_loss": 2.318875789642334, "eval_runtime": 205.5733, "eval_samples_per_second": 30.52, "eval_steps_per_second": 3.819, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 2.3267, "step": 1300 }, { "epoch": 0.95, "eval_loss": 2.3044281005859375, "eval_runtime": 206.2612, "eval_samples_per_second": 30.418, "eval_steps_per_second": 3.806, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 2.2788, "step": 1400 }, { "epoch": 1.02, "eval_loss": 2.2812631130218506, "eval_runtime": 205.7511, "eval_samples_per_second": 30.493, "eval_steps_per_second": 3.815, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 2.2397, "step": 1500 }, { "epoch": 1.09, "eval_loss": 2.2644264698028564, "eval_runtime": 206.0922, "eval_samples_per_second": 30.443, "eval_steps_per_second": 3.809, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 2.232, "step": 1600 }, { "epoch": 1.17, "eval_loss": 2.2463176250457764, "eval_runtime": 205.9251, "eval_samples_per_second": 30.467, "eval_steps_per_second": 3.812, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 2.231, "step": 1700 }, { "epoch": 1.24, "eval_loss": 2.2307546138763428, "eval_runtime": 206.164, "eval_samples_per_second": 30.432, "eval_steps_per_second": 3.808, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 2.1938, "step": 1800 }, { "epoch": 1.31, "eval_loss": 2.214383125305176, "eval_runtime": 206.1484, "eval_samples_per_second": 30.434, "eval_steps_per_second": 3.808, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 2.1772, "step": 1900 }, { "epoch": 1.38, "eval_loss": 2.2003238201141357, "eval_runtime": 207.0672, "eval_samples_per_second": 30.299, "eval_steps_per_second": 3.791, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 2.1719, "step": 2000 }, { "epoch": 1.46, "eval_loss": 2.1876213550567627, "eval_runtime": 206.1329, "eval_samples_per_second": 30.437, "eval_steps_per_second": 3.808, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 2.1558, "step": 2100 }, { "epoch": 1.53, "eval_loss": 2.173450469970703, "eval_runtime": 208.4992, "eval_samples_per_second": 30.091, "eval_steps_per_second": 3.765, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 2.1384, "step": 2200 }, { "epoch": 1.6, "eval_loss": 2.157057762145996, "eval_runtime": 206.6067, "eval_samples_per_second": 30.367, "eval_steps_per_second": 3.799, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 2.1292, "step": 2300 }, { "epoch": 1.68, "eval_loss": 2.145142078399658, "eval_runtime": 311.6945, "eval_samples_per_second": 20.129, "eval_steps_per_second": 2.518, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 2.135, "step": 2400 }, { "epoch": 1.75, "eval_loss": 2.132389545440674, "eval_runtime": 446.4033, "eval_samples_per_second": 14.055, "eval_steps_per_second": 1.758, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 2.1029, "step": 2500 }, { "epoch": 1.82, "eval_loss": 2.1192965507507324, "eval_runtime": 563.307, "eval_samples_per_second": 11.138, "eval_steps_per_second": 1.394, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 2.1027, "step": 2600 }, { "epoch": 1.89, "eval_loss": 2.107308864593506, "eval_runtime": 353.5605, "eval_samples_per_second": 17.745, "eval_steps_per_second": 2.22, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 2.0882, "step": 2700 }, { "epoch": 1.97, "eval_loss": 2.094940423965454, "eval_runtime": 353.902, "eval_samples_per_second": 17.728, "eval_steps_per_second": 2.218, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 2.0454, "step": 2800 }, { "epoch": 2.04, "eval_loss": 2.0859835147857666, "eval_runtime": 352.6105, "eval_samples_per_second": 17.793, "eval_steps_per_second": 2.226, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 2.0166, "step": 2900 }, { "epoch": 2.11, "eval_loss": 2.0744717121124268, "eval_runtime": 879.3837, "eval_samples_per_second": 7.135, "eval_steps_per_second": 0.893, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 2.0391, "step": 3000 }, { "epoch": 2.19, "eval_loss": 2.0644783973693848, "eval_runtime": 353.8886, "eval_samples_per_second": 17.729, "eval_steps_per_second": 2.218, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 2.0069, "step": 3100 }, { "epoch": 2.26, "eval_loss": 2.053786277770996, "eval_runtime": 353.3743, "eval_samples_per_second": 17.755, "eval_steps_per_second": 2.221, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 2.0226, "step": 3200 }, { "epoch": 2.33, "eval_loss": 2.043891668319702, "eval_runtime": 414.5134, "eval_samples_per_second": 15.136, "eval_steps_per_second": 1.894, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.989, "step": 3300 }, { "epoch": 2.4, "eval_loss": 2.0311737060546875, "eval_runtime": 574.7464, "eval_samples_per_second": 10.916, "eval_steps_per_second": 1.366, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.9862, "step": 3400 }, { "epoch": 2.48, "eval_loss": 2.0267181396484375, "eval_runtime": 538.138, "eval_samples_per_second": 11.659, "eval_steps_per_second": 1.459, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.9811, "step": 3500 }, { "epoch": 2.55, "eval_loss": 2.013442039489746, "eval_runtime": 547.6149, "eval_samples_per_second": 11.457, "eval_steps_per_second": 1.433, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.9769, "step": 3600 }, { "epoch": 2.62, "eval_loss": 2.0076935291290283, "eval_runtime": 542.0683, "eval_samples_per_second": 11.574, "eval_steps_per_second": 1.448, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.9561, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.9962486028671265, "eval_runtime": 539.172, "eval_samples_per_second": 11.636, "eval_steps_per_second": 1.456, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.9502, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.9892688989639282, "eval_runtime": 540.3139, "eval_samples_per_second": 11.612, "eval_steps_per_second": 1.453, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.9355, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.9744699001312256, "eval_runtime": 543.3503, "eval_samples_per_second": 11.547, "eval_steps_per_second": 1.445, "step": 3900 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 1.9386, "step": 4000 }, { "epoch": 2.91, "eval_loss": 1.9696241617202759, "eval_runtime": 441.7861, "eval_samples_per_second": 14.201, "eval_steps_per_second": 1.777, "step": 4000 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 1.9403, "step": 4100 }, { "epoch": 2.99, "eval_loss": 1.9596437215805054, "eval_runtime": 353.6394, "eval_samples_per_second": 17.741, "eval_steps_per_second": 2.22, "step": 4100 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 1.2037527679500288e+17, "trial_name": null, "trial_params": null }