{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 134, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007462686567164179, "grad_norm": 1.777404546737671, "learning_rate": 1.4285714285714285e-05, "loss": 1.2363, "step": 1 }, { "epoch": 0.03731343283582089, "grad_norm": 1.1628299951553345, "learning_rate": 7.142857142857143e-05, "loss": 1.1427, "step": 5 }, { "epoch": 0.07462686567164178, "grad_norm": 0.5306259989738464, "learning_rate": 0.00014285714285714287, "loss": 0.8518, "step": 10 }, { "epoch": 0.11194029850746269, "grad_norm": 0.36661896109580994, "learning_rate": 0.00019996573249755572, "loss": 0.6968, "step": 15 }, { "epoch": 0.14925373134328357, "grad_norm": 0.22499878704547882, "learning_rate": 0.00019876883405951377, "loss": 0.6287, "step": 20 }, { "epoch": 0.1865671641791045, "grad_norm": 0.23837368190288544, "learning_rate": 0.0001958819734868193, "loss": 0.5985, "step": 25 }, { "epoch": 0.22388059701492538, "grad_norm": 0.20461460947990417, "learning_rate": 0.0001913545457642601, "loss": 0.5683, "step": 30 }, { "epoch": 0.26119402985074625, "grad_norm": 0.1855892837047577, "learning_rate": 0.00018526401643540922, "loss": 0.555, "step": 35 }, { "epoch": 0.29850746268656714, "grad_norm": 0.22351227700710297, "learning_rate": 0.0001777145961456971, "loss": 0.5436, "step": 40 }, { "epoch": 0.3358208955223881, "grad_norm": 0.2045900523662567, "learning_rate": 0.0001688354575693754, "loss": 0.531, "step": 45 }, { "epoch": 0.373134328358209, "grad_norm": 0.20202215015888214, "learning_rate": 0.00015877852522924732, "loss": 0.5251, "step": 50 }, { "epoch": 0.41044776119402987, "grad_norm": 0.20512989163398743, "learning_rate": 0.00014771587602596084, "loss": 0.519, "step": 55 }, { "epoch": 0.44776119402985076, "grad_norm": 0.21831591427326202, "learning_rate": 0.00013583679495453, "loss": 0.5138, "step": 60 }, { "epoch": 0.48507462686567165, "grad_norm": 0.20354878902435303, "learning_rate": 0.00012334453638559057, "loss": 0.5104, "step": 65 }, { "epoch": 0.5223880597014925, "grad_norm": 0.1882062703371048, "learning_rate": 0.00011045284632676536, "loss": 0.5059, "step": 70 }, { "epoch": 0.5597014925373134, "grad_norm": 0.21688711643218994, "learning_rate": 9.73823051692127e-05, "loss": 0.4992, "step": 75 }, { "epoch": 0.5970149253731343, "grad_norm": 0.2021055668592453, "learning_rate": 8.435655349597689e-05, "loss": 0.4936, "step": 80 }, { "epoch": 0.6343283582089553, "grad_norm": 0.18059054017066956, "learning_rate": 7.159846552960774e-05, "loss": 0.5016, "step": 85 }, { "epoch": 0.6716417910447762, "grad_norm": 0.19665920734405518, "learning_rate": 5.9326335692419995e-05, "loss": 0.4983, "step": 90 }, { "epoch": 0.7089552238805971, "grad_norm": 0.20969931781291962, "learning_rate": 4.7750143528405126e-05, "loss": 0.4911, "step": 95 }, { "epoch": 0.746268656716418, "grad_norm": 0.19205212593078613, "learning_rate": 3.7067960895016275e-05, "loss": 0.4964, "step": 100 }, { "epoch": 0.7835820895522388, "grad_norm": 0.17200438678264618, "learning_rate": 2.746256289877126e-05, "loss": 0.4879, "step": 105 }, { "epoch": 0.8208955223880597, "grad_norm": 0.17229676246643066, "learning_rate": 1.9098300562505266e-05, "loss": 0.4915, "step": 110 }, { "epoch": 0.8582089552238806, "grad_norm": 0.17735055088996887, "learning_rate": 1.2118288733803473e-05, "loss": 0.4883, "step": 115 }, { "epoch": 0.8955223880597015, "grad_norm": 0.17195551097393036, "learning_rate": 6.6419573502798374e-06, "loss": 0.4912, "step": 120 }, { "epoch": 0.9328358208955224, "grad_norm": 0.17827412486076355, "learning_rate": 2.7630079602323442e-06, "loss": 0.4861, "step": 125 }, { "epoch": 0.9701492537313433, "grad_norm": 0.17053547501564026, "learning_rate": 5.478104631726711e-07, "loss": 0.4852, "step": 130 }, { "epoch": 1.0, "eval_loss": 0.1658700853586197, "eval_runtime": 9.0857, "eval_samples_per_second": 9.685, "eval_steps_per_second": 0.33, "step": 134 }, { "epoch": 1.0, "step": 134, "total_flos": 3.768774247149732e+17, "train_loss": 0.5603564016854585, "train_runtime": 1424.9095, "train_samples_per_second": 3.001, "train_steps_per_second": 0.094 } ], "logging_steps": 5, "max_steps": 134, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.768774247149732e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }