{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.11379800853485064, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005689900426742532, "eval_loss": 1.4702181816101074, "eval_runtime": 97.6278, "eval_samples_per_second": 7.58, "eval_steps_per_second": 3.79, "step": 1 }, { "epoch": 0.002844950213371266, "grad_norm": 0.15776292979717255, "learning_rate": 5e-05, "loss": 1.5423, "step": 5 }, { "epoch": 0.005689900426742532, "grad_norm": 0.3180772662162781, "learning_rate": 0.0001, "loss": 1.5495, "step": 10 }, { "epoch": 0.008534850640113799, "grad_norm": 0.2475329339504242, "learning_rate": 9.98292246503335e-05, "loss": 1.4748, "step": 15 }, { "epoch": 0.011379800853485065, "grad_norm": 0.23491647839546204, "learning_rate": 9.931806517013612e-05, "loss": 1.4243, "step": 20 }, { "epoch": 0.01422475106685633, "grad_norm": 0.156405970454216, "learning_rate": 9.847001329696653e-05, "loss": 1.349, "step": 25 }, { "epoch": 0.017069701280227598, "grad_norm": 0.1800297200679779, "learning_rate": 9.729086208503174e-05, "loss": 1.334, "step": 30 }, { "epoch": 0.01991465149359886, "grad_norm": 0.1841624230146408, "learning_rate": 9.578866633275288e-05, "loss": 1.3211, "step": 35 }, { "epoch": 0.02275960170697013, "grad_norm": 0.18861369788646698, "learning_rate": 9.397368756032445e-05, "loss": 1.3624, "step": 40 }, { "epoch": 0.025604551920341393, "grad_norm": 0.20811361074447632, "learning_rate": 9.185832391312644e-05, "loss": 1.2135, "step": 45 }, { "epoch": 0.02844950213371266, "grad_norm": 0.49080491065979004, "learning_rate": 8.945702546981969e-05, "loss": 1.0109, "step": 50 }, { "epoch": 0.02844950213371266, "eval_loss": 1.2867763042449951, "eval_runtime": 98.9605, "eval_samples_per_second": 7.478, "eval_steps_per_second": 3.739, "step": 50 }, { "epoch": 0.031294452347083924, "grad_norm": 0.28847643733024597, "learning_rate": 8.678619553365659e-05, "loss": 1.3137, "step": 55 }, { "epoch": 0.034139402560455195, "grad_norm": 0.1672595888376236, "learning_rate": 8.386407858128706e-05, "loss": 1.2642, "step": 60 }, { "epoch": 0.03698435277382646, "grad_norm": 0.20153048634529114, "learning_rate": 8.07106356344834e-05, "loss": 1.3451, "step": 65 }, { "epoch": 0.03982930298719772, "grad_norm": 0.18182030320167542, "learning_rate": 7.734740790612136e-05, "loss": 1.2577, "step": 70 }, { "epoch": 0.04267425320056899, "grad_norm": 0.18567194044589996, "learning_rate": 7.379736965185368e-05, "loss": 1.3163, "step": 75 }, { "epoch": 0.04551920341394026, "grad_norm": 0.19859260320663452, "learning_rate": 7.008477123264848e-05, "loss": 1.2875, "step": 80 }, { "epoch": 0.04836415362731152, "grad_norm": 0.19528040289878845, "learning_rate": 6.623497346023418e-05, "loss": 1.2905, "step": 85 }, { "epoch": 0.051209103840682786, "grad_norm": 0.2310878038406372, "learning_rate": 6.227427435703997e-05, "loss": 1.3571, "step": 90 }, { "epoch": 0.05405405405405406, "grad_norm": 0.27197933197021484, "learning_rate": 5.8229729514036705e-05, "loss": 1.1707, "step": 95 }, { "epoch": 0.05689900426742532, "grad_norm": 0.5292563438415527, "learning_rate": 5.4128967273616625e-05, "loss": 0.9571, "step": 100 }, { "epoch": 0.05689900426742532, "eval_loss": 1.2389965057373047, "eval_runtime": 98.9658, "eval_samples_per_second": 7.477, "eval_steps_per_second": 3.739, "step": 100 }, { "epoch": 0.059743954480796585, "grad_norm": 0.25443074107170105, "learning_rate": 5e-05, "loss": 1.2893, "step": 105 }, { "epoch": 0.06258890469416785, "grad_norm": 0.2228810042142868, "learning_rate": 4.5871032726383386e-05, "loss": 1.2617, "step": 110 }, { "epoch": 0.06543385490753911, "grad_norm": 0.27687934041023254, "learning_rate": 4.17702704859633e-05, "loss": 1.2612, "step": 115 }, { "epoch": 0.06827880512091039, "grad_norm": 0.21604830026626587, "learning_rate": 3.772572564296005e-05, "loss": 1.3271, "step": 120 }, { "epoch": 0.07112375533428165, "grad_norm": 0.22077901661396027, "learning_rate": 3.3765026539765834e-05, "loss": 1.2858, "step": 125 }, { "epoch": 0.07396870554765292, "grad_norm": 0.20174729824066162, "learning_rate": 2.991522876735154e-05, "loss": 1.2405, "step": 130 }, { "epoch": 0.07681365576102418, "grad_norm": 0.22198493778705597, "learning_rate": 2.6202630348146324e-05, "loss": 1.2382, "step": 135 }, { "epoch": 0.07965860597439545, "grad_norm": 0.2763294577598572, "learning_rate": 2.2652592093878666e-05, "loss": 1.3044, "step": 140 }, { "epoch": 0.08250355618776671, "grad_norm": 0.22474978864192963, "learning_rate": 1.928936436551661e-05, "loss": 1.1803, "step": 145 }, { "epoch": 0.08534850640113797, "grad_norm": 0.5697594881057739, "learning_rate": 1.6135921418712956e-05, "loss": 0.9433, "step": 150 }, { "epoch": 0.08534850640113797, "eval_loss": 1.222001314163208, "eval_runtime": 98.9441, "eval_samples_per_second": 7.479, "eval_steps_per_second": 3.739, "step": 150 }, { "epoch": 0.08819345661450925, "grad_norm": 0.22383780777454376, "learning_rate": 1.3213804466343421e-05, "loss": 1.2384, "step": 155 }, { "epoch": 0.09103840682788052, "grad_norm": 0.23186904191970825, "learning_rate": 1.0542974530180327e-05, "loss": 1.2749, "step": 160 }, { "epoch": 0.09388335704125178, "grad_norm": 0.2378578633069992, "learning_rate": 8.141676086873572e-06, "loss": 1.3229, "step": 165 }, { "epoch": 0.09672830725462304, "grad_norm": 0.2523314356803894, "learning_rate": 6.026312439675552e-06, "loss": 1.2946, "step": 170 }, { "epoch": 0.09957325746799431, "grad_norm": 0.22312402725219727, "learning_rate": 4.2113336672471245e-06, "loss": 1.2379, "step": 175 }, { "epoch": 0.10241820768136557, "grad_norm": 0.2241775393486023, "learning_rate": 2.7091379149682685e-06, "loss": 1.2604, "step": 180 }, { "epoch": 0.10526315789473684, "grad_norm": 0.2440503090620041, "learning_rate": 1.5299867030334814e-06, "loss": 1.2162, "step": 185 }, { "epoch": 0.10810810810810811, "grad_norm": 0.25313878059387207, "learning_rate": 6.819348298638839e-07, "loss": 1.2753, "step": 190 }, { "epoch": 0.11095305832147938, "grad_norm": 0.25487780570983887, "learning_rate": 1.7077534966650766e-07, "loss": 1.0624, "step": 195 }, { "epoch": 0.11379800853485064, "grad_norm": 0.5397162437438965, "learning_rate": 0.0, "loss": 0.9448, "step": 200 }, { "epoch": 0.11379800853485064, "eval_loss": 1.2195565700531006, "eval_runtime": 98.8875, "eval_samples_per_second": 7.483, "eval_steps_per_second": 3.742, "step": 200 } ], "logging_steps": 5, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1865438228263731e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }