{ "best_metric": 1.7265019416809082, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.0052999788000848, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00021199915200339198, "grad_norm": 1.0395910739898682, "learning_rate": 1e-05, "loss": 2.2386, "step": 1 }, { "epoch": 0.00021199915200339198, "eval_loss": 2.108657121658325, "eval_runtime": 276.9162, "eval_samples_per_second": 7.175, "eval_steps_per_second": 3.59, "step": 1 }, { "epoch": 0.00042399830400678397, "grad_norm": 1.0399056673049927, "learning_rate": 2e-05, "loss": 2.3599, "step": 2 }, { "epoch": 0.0006359974560101759, "grad_norm": 1.052390694618225, "learning_rate": 3e-05, "loss": 2.1135, "step": 3 }, { "epoch": 0.0008479966080135679, "grad_norm": 1.0873916149139404, "learning_rate": 4e-05, "loss": 1.9912, "step": 4 }, { "epoch": 0.00105999576001696, "grad_norm": 1.0538489818572998, "learning_rate": 5e-05, "loss": 2.2384, "step": 5 }, { "epoch": 0.00105999576001696, "eval_loss": 2.0780763626098633, "eval_runtime": 278.1139, "eval_samples_per_second": 7.145, "eval_steps_per_second": 3.574, "step": 5 }, { "epoch": 0.0012719949120203519, "grad_norm": 0.9845679998397827, "learning_rate": 6e-05, "loss": 2.0122, "step": 6 }, { "epoch": 0.001483994064023744, "grad_norm": 0.9107600450515747, "learning_rate": 7e-05, "loss": 1.9534, "step": 7 }, { "epoch": 0.0016959932160271359, "grad_norm": 0.9601123929023743, "learning_rate": 8e-05, "loss": 1.6085, "step": 8 }, { "epoch": 0.0019079923680305278, "grad_norm": 0.7956514954566956, "learning_rate": 9e-05, "loss": 2.171, "step": 9 }, { "epoch": 0.00211999152003392, "grad_norm": 0.6652023196220398, "learning_rate": 0.0001, "loss": 1.9525, "step": 10 }, { "epoch": 0.00211999152003392, "eval_loss": 1.8938112258911133, "eval_runtime": 278.2742, "eval_samples_per_second": 7.14, "eval_steps_per_second": 3.572, "step": 10 }, { "epoch": 0.002331990672037312, "grad_norm": 0.7144079208374023, "learning_rate": 9.890738003669029e-05, "loss": 1.7174, "step": 11 }, { "epoch": 0.0025439898240407037, "grad_norm": 0.8546844720840454, "learning_rate": 9.567727288213005e-05, "loss": 1.903, "step": 12 }, { "epoch": 0.002755988976044096, "grad_norm": 0.8503435254096985, "learning_rate": 9.045084971874738e-05, "loss": 2.1831, "step": 13 }, { "epoch": 0.002967988128047488, "grad_norm": 1.2312841415405273, "learning_rate": 8.345653031794292e-05, "loss": 1.8006, "step": 14 }, { "epoch": 0.0031799872800508796, "grad_norm": 0.9335196018218994, "learning_rate": 7.500000000000001e-05, "loss": 1.74, "step": 15 }, { "epoch": 0.0031799872800508796, "eval_loss": 1.8009980916976929, "eval_runtime": 279.0929, "eval_samples_per_second": 7.119, "eval_steps_per_second": 3.562, "step": 15 }, { "epoch": 0.0033919864320542717, "grad_norm": 1.0158390998840332, "learning_rate": 6.545084971874738e-05, "loss": 2.1792, "step": 16 }, { "epoch": 0.003603985584057664, "grad_norm": 0.8165982365608215, "learning_rate": 5.522642316338268e-05, "loss": 1.6872, "step": 17 }, { "epoch": 0.0038159847360610556, "grad_norm": 0.8424365520477295, "learning_rate": 4.477357683661734e-05, "loss": 1.8862, "step": 18 }, { "epoch": 0.004027983888064448, "grad_norm": 0.8149735331535339, "learning_rate": 3.4549150281252636e-05, "loss": 1.6499, "step": 19 }, { "epoch": 0.00423998304006784, "grad_norm": 0.8468019962310791, "learning_rate": 2.500000000000001e-05, "loss": 1.816, "step": 20 }, { "epoch": 0.00423998304006784, "eval_loss": 1.739220380783081, "eval_runtime": 281.1808, "eval_samples_per_second": 7.067, "eval_steps_per_second": 3.535, "step": 20 }, { "epoch": 0.0044519821920712315, "grad_norm": 0.87850421667099, "learning_rate": 1.6543469682057106e-05, "loss": 1.7883, "step": 21 }, { "epoch": 0.004663981344074624, "grad_norm": 0.8680698871612549, "learning_rate": 9.549150281252633e-06, "loss": 1.4036, "step": 22 }, { "epoch": 0.004875980496078016, "grad_norm": 0.7863142490386963, "learning_rate": 4.322727117869951e-06, "loss": 1.8092, "step": 23 }, { "epoch": 0.005087979648081407, "grad_norm": 0.7802051305770874, "learning_rate": 1.0926199633097157e-06, "loss": 1.7099, "step": 24 }, { "epoch": 0.0052999788000848, "grad_norm": 0.744226336479187, "learning_rate": 0.0, "loss": 1.5709, "step": 25 }, { "epoch": 0.0052999788000848, "eval_loss": 1.7265019416809082, "eval_runtime": 281.6034, "eval_samples_per_second": 7.056, "eval_steps_per_second": 3.53, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2500486653542400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }