{ "best_metric": 1.1207506656646729, "best_model_checkpoint": "./outputs/checkpoint-4100", "epoch": 2.987249544626594, "eval_steps": 100, "global_step": 4100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.2669, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.180582046508789, "eval_runtime": 550.6875, "eval_samples_per_second": 11.393, "eval_steps_per_second": 1.425, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 2.1546, "step": 200 }, { "epoch": 0.15, "eval_loss": 2.131606101989746, "eval_runtime": 550.6761, "eval_samples_per_second": 11.393, "eval_steps_per_second": 1.426, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 2.1084, "step": 300 }, { "epoch": 0.22, "eval_loss": 2.089244842529297, "eval_runtime": 551.3766, "eval_samples_per_second": 11.379, "eval_steps_per_second": 1.424, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 2.068, "step": 400 }, { "epoch": 0.29, "eval_loss": 2.043504476547241, "eval_runtime": 551.782, "eval_samples_per_second": 11.37, "eval_steps_per_second": 1.423, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 2.0238, "step": 500 }, { "epoch": 0.36, "eval_loss": 2.0026352405548096, "eval_runtime": 552.6008, "eval_samples_per_second": 11.354, "eval_steps_per_second": 1.421, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.9746, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.9627352952957153, "eval_runtime": 553.4908, "eval_samples_per_second": 11.335, "eval_steps_per_second": 1.418, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.9436, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.9262831211090088, "eval_runtime": 553.1509, "eval_samples_per_second": 11.342, "eval_steps_per_second": 1.419, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.9026, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.8915894031524658, "eval_runtime": 553.4095, "eval_samples_per_second": 11.337, "eval_steps_per_second": 1.418, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.8633, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.8577481508255005, "eval_runtime": 553.9449, "eval_samples_per_second": 11.326, "eval_steps_per_second": 1.417, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.8404, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.8280558586120605, "eval_runtime": 554.6062, "eval_samples_per_second": 11.313, "eval_steps_per_second": 1.415, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.8207, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.7947672605514526, "eval_runtime": 555.1319, "eval_samples_per_second": 11.302, "eval_steps_per_second": 1.414, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.769, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.761672019958496, "eval_runtime": 555.3259, "eval_samples_per_second": 11.298, "eval_steps_per_second": 1.414, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.7687, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.7306458950042725, "eval_runtime": 554.8996, "eval_samples_per_second": 11.307, "eval_steps_per_second": 1.415, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.6906, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.6910051107406616, "eval_runtime": 555.5658, "eval_samples_per_second": 11.293, "eval_steps_per_second": 1.413, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.6215, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.6626144647598267, "eval_runtime": 555.8884, "eval_samples_per_second": 11.286, "eval_steps_per_second": 1.412, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.601, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.6326826810836792, "eval_runtime": 555.8386, "eval_samples_per_second": 11.287, "eval_steps_per_second": 1.412, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.595, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.6032005548477173, "eval_runtime": 555.9827, "eval_samples_per_second": 11.285, "eval_steps_per_second": 1.412, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.5417, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.581026315689087, "eval_runtime": 556.3673, "eval_samples_per_second": 11.277, "eval_steps_per_second": 1.411, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.5177, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.5540947914123535, "eval_runtime": 556.1818, "eval_samples_per_second": 11.28, "eval_steps_per_second": 1.411, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.5071, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.529414415359497, "eval_runtime": 556.1478, "eval_samples_per_second": 11.281, "eval_steps_per_second": 1.411, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.4879, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.5049669742584229, "eval_runtime": 556.064, "eval_samples_per_second": 11.283, "eval_steps_per_second": 1.412, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.4477, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.47505521774292, "eval_runtime": 555.9558, "eval_samples_per_second": 11.285, "eval_steps_per_second": 1.412, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.4289, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.4537893533706665, "eval_runtime": 556.1711, "eval_samples_per_second": 11.281, "eval_steps_per_second": 1.411, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.4179, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.4315266609191895, "eval_runtime": 556.1848, "eval_samples_per_second": 11.28, "eval_steps_per_second": 1.411, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.3847, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.4086532592773438, "eval_runtime": 556.2903, "eval_samples_per_second": 11.278, "eval_steps_per_second": 1.411, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.3664, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.3867732286453247, "eval_runtime": 556.1989, "eval_samples_per_second": 11.28, "eval_steps_per_second": 1.411, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.3493, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.361178994178772, "eval_runtime": 556.384, "eval_samples_per_second": 11.276, "eval_steps_per_second": 1.411, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.2768, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.3487073183059692, "eval_runtime": 556.4859, "eval_samples_per_second": 11.274, "eval_steps_per_second": 1.411, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.2273, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.3275220394134521, "eval_runtime": 556.9562, "eval_samples_per_second": 11.265, "eval_steps_per_second": 1.409, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.2451, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.307883858680725, "eval_runtime": 555.8716, "eval_samples_per_second": 11.287, "eval_steps_per_second": 1.412, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.208, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.2866222858428955, "eval_runtime": 556.1597, "eval_samples_per_second": 11.281, "eval_steps_per_second": 1.411, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.2102, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.27306067943573, "eval_runtime": 765.8818, "eval_samples_per_second": 8.192, "eval_steps_per_second": 1.025, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.1778, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.2513889074325562, "eval_runtime": 556.1919, "eval_samples_per_second": 11.28, "eval_steps_per_second": 1.411, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.1666, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.2403820753097534, "eval_runtime": 555.8669, "eval_samples_per_second": 11.287, "eval_steps_per_second": 1.412, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.1521, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.2223913669586182, "eval_runtime": 556.0099, "eval_samples_per_second": 11.284, "eval_steps_per_second": 1.412, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.1431, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.2031002044677734, "eval_runtime": 556.2202, "eval_samples_per_second": 11.28, "eval_steps_per_second": 1.411, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.1138, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.1843565702438354, "eval_runtime": 556.3313, "eval_samples_per_second": 11.277, "eval_steps_per_second": 1.411, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.1002, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.1716701984405518, "eval_runtime": 556.4009, "eval_samples_per_second": 11.276, "eval_steps_per_second": 1.411, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.0883, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.1533775329589844, "eval_runtime": 556.2301, "eval_samples_per_second": 11.28, "eval_steps_per_second": 1.411, "step": 3900 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 1.0899, "step": 4000 }, { "epoch": 2.91, "eval_loss": 1.139776587486267, "eval_runtime": 556.1925, "eval_samples_per_second": 11.28, "eval_steps_per_second": 1.411, "step": 4000 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 1.0699, "step": 4100 }, { "epoch": 2.99, "eval_loss": 1.1207506656646729, "eval_runtime": 916.3506, "eval_samples_per_second": 6.847, "eval_steps_per_second": 0.857, "step": 4100 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 1.0672136421373379e+18, "trial_name": null, "trial_params": null }