{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.010905601116733554, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010905601116733554, "eval_accuracy": 0.5034676821874995, "eval_loss": 2.249495029449463, "eval_runtime": 250.8322, "eval_samples_per_second": 22.182, "eval_steps_per_second": 0.088, "step": 100 }, { "epoch": 0.002181120223346711, "eval_accuracy": 0.6792460132466641, "eval_loss": 1.3580471277236938, "eval_runtime": 250.935, "eval_samples_per_second": 22.173, "eval_steps_per_second": 0.088, "step": 200 }, { "epoch": 0.0032716803350200663, "eval_accuracy": 0.7194358306786471, "eval_loss": 1.1734950542449951, "eval_runtime": 249.8616, "eval_samples_per_second": 22.268, "eval_steps_per_second": 0.088, "step": 300 }, { "epoch": 0.004362240446693422, "eval_accuracy": 0.7406081066868597, "eval_loss": 1.0811182260513306, "eval_runtime": 250.2334, "eval_samples_per_second": 22.235, "eval_steps_per_second": 0.088, "step": 400 }, { "epoch": 0.005452800558366777, "grad_norm": 1.5124988555908203, "learning_rate": 4.990911999069389e-05, "loss": 1.8503, "step": 500 }, { "epoch": 0.005452800558366777, "eval_accuracy": 0.7539559460484646, "eval_loss": 1.0170289278030396, "eval_runtime": 250.2283, "eval_samples_per_second": 22.236, "eval_steps_per_second": 0.088, "step": 500 }, { "epoch": 0.0065433606700401326, "eval_accuracy": 0.7639956787495118, "eval_loss": 0.9728907346725464, "eval_runtime": 250.2409, "eval_samples_per_second": 22.235, "eval_steps_per_second": 0.088, "step": 600 }, { "epoch": 0.007633920781713488, "eval_accuracy": 0.7704801005740811, "eval_loss": 0.9435333013534546, "eval_runtime": 250.3167, "eval_samples_per_second": 22.228, "eval_steps_per_second": 0.088, "step": 700 }, { "epoch": 0.008724480893386843, "eval_accuracy": 0.7747122964825798, "eval_loss": 0.924051821231842, "eval_runtime": 250.2963, "eval_samples_per_second": 22.23, "eval_steps_per_second": 0.088, "step": 800 }, { "epoch": 0.009815041005060199, "eval_accuracy": 0.7814711185942613, "eval_loss": 0.8970186114311218, "eval_runtime": 250.1321, "eval_samples_per_second": 22.244, "eval_steps_per_second": 0.088, "step": 900 }, { "epoch": 0.010905601116733554, "grad_norm": 1.2721333503723145, "learning_rate": 4.981823998138778e-05, "loss": 0.9214, "step": 1000 }, { "epoch": 0.010905601116733554, "eval_accuracy": 0.7872277846758073, "eval_loss": 0.8725214004516602, "eval_runtime": 249.8439, "eval_samples_per_second": 22.27, "eval_steps_per_second": 0.088, "step": 1000 }, { "epoch": 0.010905601116733554, "step": 1000, "total_flos": 1.3069163715939533e+18, "train_loss": 1.3858390502929687, "train_runtime": 13072.5982, "train_samples_per_second": 1346.745, "train_steps_per_second": 21.043 } ], "logging_steps": 500, "max_steps": 275088, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3069163715939533e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }