{ "best_metric": 2.7641873359680176, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.001203091946301996, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.01030648767332e-05, "eval_loss": 2.840841054916382, "eval_runtime": 170.0839, "eval_samples_per_second": 61.734, "eval_steps_per_second": 30.867, "step": 1 }, { "epoch": 0.00012030919463019961, "grad_norm": 0.7718831896781921, "learning_rate": 3e-05, "loss": 11.0529, "step": 3 }, { "epoch": 0.00020051532438366602, "eval_loss": 2.840318441390991, "eval_runtime": 169.4803, "eval_samples_per_second": 61.954, "eval_steps_per_second": 30.977, "step": 5 }, { "epoch": 0.00024061838926039923, "grad_norm": 0.8364648222923279, "learning_rate": 6e-05, "loss": 10.4353, "step": 6 }, { "epoch": 0.00036092758389059883, "grad_norm": 0.810796856880188, "learning_rate": 9e-05, "loss": 11.7993, "step": 9 }, { "epoch": 0.00040103064876733204, "eval_loss": 2.8312199115753174, "eval_runtime": 167.9488, "eval_samples_per_second": 62.519, "eval_steps_per_second": 31.26, "step": 10 }, { "epoch": 0.00048123677852079846, "grad_norm": 1.1680842638015747, "learning_rate": 9.755282581475769e-05, "loss": 11.236, "step": 12 }, { "epoch": 0.000601545973150998, "grad_norm": 1.037770390510559, "learning_rate": 8.535533905932738e-05, "loss": 11.5245, "step": 15 }, { "epoch": 0.000601545973150998, "eval_loss": 2.8045854568481445, "eval_runtime": 167.5128, "eval_samples_per_second": 62.682, "eval_steps_per_second": 31.341, "step": 15 }, { "epoch": 0.0007218551677811977, "grad_norm": 1.0737652778625488, "learning_rate": 6.545084971874738e-05, "loss": 11.5629, "step": 18 }, { "epoch": 0.0008020612975346641, "eval_loss": 2.7786405086517334, "eval_runtime": 168.2878, "eval_samples_per_second": 62.393, "eval_steps_per_second": 31.197, "step": 20 }, { "epoch": 0.0008421643624113973, "grad_norm": 1.4385324716567993, "learning_rate": 4.2178276747988446e-05, "loss": 11.0936, "step": 21 }, { "epoch": 0.0009624735570415969, "grad_norm": 1.2842581272125244, "learning_rate": 2.061073738537635e-05, "loss": 10.5965, "step": 24 }, { "epoch": 0.00100257662191833, "eval_loss": 2.7664456367492676, "eval_runtime": 167.7922, "eval_samples_per_second": 62.577, "eval_steps_per_second": 31.289, "step": 25 }, { "epoch": 0.0010827827516717964, "grad_norm": 1.403336524963379, "learning_rate": 5.449673790581611e-06, "loss": 10.7527, "step": 27 }, { "epoch": 0.001203091946301996, "grad_norm": 1.1576800346374512, "learning_rate": 0.0, "loss": 11.1552, "step": 30 }, { "epoch": 0.001203091946301996, "eval_loss": 2.7641873359680176, "eval_runtime": 167.309, "eval_samples_per_second": 62.758, "eval_steps_per_second": 31.379, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 129293195673600.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }