{ "best_metric": 0.9848859906196594, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.12012012012012012, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0024024024024024023, "grad_norm": 2.0971031188964844, "learning_rate": 2e-05, "loss": 3.0187, "step": 1 }, { "epoch": 0.0024024024024024023, "eval_loss": 2.851088523864746, "eval_runtime": 48.9037, "eval_samples_per_second": 14.334, "eval_steps_per_second": 1.799, "step": 1 }, { "epoch": 0.004804804804804805, "grad_norm": 2.2249419689178467, "learning_rate": 4e-05, "loss": 2.6649, "step": 2 }, { "epoch": 0.007207207207207207, "grad_norm": 2.2943310737609863, "learning_rate": 6e-05, "loss": 3.0719, "step": 3 }, { "epoch": 0.00960960960960961, "grad_norm": 1.7306230068206787, "learning_rate": 8e-05, "loss": 2.6095, "step": 4 }, { "epoch": 0.012012012012012012, "grad_norm": 1.0318970680236816, "learning_rate": 0.0001, "loss": 2.3355, "step": 5 }, { "epoch": 0.014414414414414415, "grad_norm": 0.6773841381072998, "learning_rate": 9.987820251299122e-05, "loss": 1.6014, "step": 6 }, { "epoch": 0.016816816816816817, "grad_norm": 0.7722229957580566, "learning_rate": 9.951340343707852e-05, "loss": 1.8833, "step": 7 }, { "epoch": 0.01921921921921922, "grad_norm": 0.8358666896820068, "learning_rate": 9.890738003669029e-05, "loss": 1.8462, "step": 8 }, { "epoch": 0.021621621621621623, "grad_norm": 0.7132935523986816, "learning_rate": 9.806308479691595e-05, "loss": 1.5991, "step": 9 }, { "epoch": 0.024024024024024024, "grad_norm": 0.6344431042671204, "learning_rate": 9.698463103929542e-05, "loss": 1.4909, "step": 10 }, { "epoch": 0.026426426426426425, "grad_norm": 0.7708803415298462, "learning_rate": 9.567727288213005e-05, "loss": 1.4264, "step": 11 }, { "epoch": 0.02882882882882883, "grad_norm": 0.6311034560203552, "learning_rate": 9.414737964294636e-05, "loss": 1.3682, "step": 12 }, { "epoch": 0.03123123123123123, "grad_norm": 0.567393958568573, "learning_rate": 9.24024048078213e-05, "loss": 1.5798, "step": 13 }, { "epoch": 0.033633633633633635, "grad_norm": 0.513917088508606, "learning_rate": 9.045084971874738e-05, "loss": 1.382, "step": 14 }, { "epoch": 0.036036036036036036, "grad_norm": 0.46948641538619995, "learning_rate": 8.83022221559489e-05, "loss": 1.2904, "step": 15 }, { "epoch": 0.03843843843843844, "grad_norm": 0.4837779402732849, "learning_rate": 8.596699001693255e-05, "loss": 1.0935, "step": 16 }, { "epoch": 0.04084084084084084, "grad_norm": 0.6037317514419556, "learning_rate": 8.345653031794292e-05, "loss": 1.5422, "step": 17 }, { "epoch": 0.043243243243243246, "grad_norm": 0.45345234870910645, "learning_rate": 8.07830737662829e-05, "loss": 1.1688, "step": 18 }, { "epoch": 0.04564564564564565, "grad_norm": 0.4484889805316925, "learning_rate": 7.795964517353735e-05, "loss": 1.2741, "step": 19 }, { "epoch": 0.04804804804804805, "grad_norm": 0.46147358417510986, "learning_rate": 7.500000000000001e-05, "loss": 1.3749, "step": 20 }, { "epoch": 0.05045045045045045, "grad_norm": 0.41564714908599854, "learning_rate": 7.191855733945387e-05, "loss": 0.9641, "step": 21 }, { "epoch": 0.05285285285285285, "grad_norm": 0.416939914226532, "learning_rate": 6.873032967079561e-05, "loss": 0.8962, "step": 22 }, { "epoch": 0.05525525525525526, "grad_norm": 0.4956507086753845, "learning_rate": 6.545084971874738e-05, "loss": 0.8582, "step": 23 }, { "epoch": 0.05765765765765766, "grad_norm": 0.403537392616272, "learning_rate": 6.209609477998338e-05, "loss": 0.9143, "step": 24 }, { "epoch": 0.06006006006006006, "grad_norm": 0.40698733925819397, "learning_rate": 5.868240888334653e-05, "loss": 0.873, "step": 25 }, { "epoch": 0.06006006006006006, "eval_loss": 1.116862416267395, "eval_runtime": 49.8248, "eval_samples_per_second": 14.069, "eval_steps_per_second": 1.766, "step": 25 }, { "epoch": 0.06246246246246246, "grad_norm": 0.35601821541786194, "learning_rate": 5.522642316338268e-05, "loss": 0.7875, "step": 26 }, { "epoch": 0.06486486486486487, "grad_norm": 0.4093438684940338, "learning_rate": 5.174497483512506e-05, "loss": 0.962, "step": 27 }, { "epoch": 0.06726726726726727, "grad_norm": 0.4381888508796692, "learning_rate": 4.825502516487497e-05, "loss": 1.0355, "step": 28 }, { "epoch": 0.06966966966966967, "grad_norm": 0.394925594329834, "learning_rate": 4.477357683661734e-05, "loss": 0.8719, "step": 29 }, { "epoch": 0.07207207207207207, "grad_norm": 0.39502450823783875, "learning_rate": 4.131759111665349e-05, "loss": 0.973, "step": 30 }, { "epoch": 0.07447447447447447, "grad_norm": 0.41608545184135437, "learning_rate": 3.790390522001662e-05, "loss": 0.7986, "step": 31 }, { "epoch": 0.07687687687687687, "grad_norm": 0.4361729919910431, "learning_rate": 3.4549150281252636e-05, "loss": 0.9382, "step": 32 }, { "epoch": 0.07927927927927927, "grad_norm": 0.42400476336479187, "learning_rate": 3.12696703292044e-05, "loss": 1.0115, "step": 33 }, { "epoch": 0.08168168168168168, "grad_norm": 0.39441099762916565, "learning_rate": 2.8081442660546125e-05, "loss": 0.8631, "step": 34 }, { "epoch": 0.08408408408408409, "grad_norm": 0.42576584219932556, "learning_rate": 2.500000000000001e-05, "loss": 1.0433, "step": 35 }, { "epoch": 0.08648648648648649, "grad_norm": 0.3904152512550354, "learning_rate": 2.2040354826462668e-05, "loss": 1.0188, "step": 36 }, { "epoch": 0.08888888888888889, "grad_norm": 0.5034300088882446, "learning_rate": 1.9216926233717085e-05, "loss": 1.1074, "step": 37 }, { "epoch": 0.0912912912912913, "grad_norm": 0.6272788643836975, "learning_rate": 1.6543469682057106e-05, "loss": 1.1985, "step": 38 }, { "epoch": 0.0936936936936937, "grad_norm": 0.5937858819961548, "learning_rate": 1.4033009983067452e-05, "loss": 1.3471, "step": 39 }, { "epoch": 0.0960960960960961, "grad_norm": 0.646436333656311, "learning_rate": 1.1697777844051105e-05, "loss": 1.353, "step": 40 }, { "epoch": 0.0984984984984985, "grad_norm": 0.5500105023384094, "learning_rate": 9.549150281252633e-06, "loss": 1.1414, "step": 41 }, { "epoch": 0.1009009009009009, "grad_norm": 0.6234367489814758, "learning_rate": 7.597595192178702e-06, "loss": 1.3546, "step": 42 }, { "epoch": 0.1033033033033033, "grad_norm": 0.5408471822738647, "learning_rate": 5.852620357053651e-06, "loss": 0.9866, "step": 43 }, { "epoch": 0.1057057057057057, "grad_norm": 0.8120220303535461, "learning_rate": 4.322727117869951e-06, "loss": 1.4077, "step": 44 }, { "epoch": 0.10810810810810811, "grad_norm": 0.654384195804596, "learning_rate": 3.0153689607045845e-06, "loss": 1.7303, "step": 45 }, { "epoch": 0.11051051051051052, "grad_norm": 0.6135900616645813, "learning_rate": 1.9369152030840556e-06, "loss": 1.4114, "step": 46 }, { "epoch": 0.11291291291291292, "grad_norm": 0.5442359447479248, "learning_rate": 1.0926199633097157e-06, "loss": 1.22, "step": 47 }, { "epoch": 0.11531531531531532, "grad_norm": 0.5599455237388611, "learning_rate": 4.865965629214819e-07, "loss": 1.5017, "step": 48 }, { "epoch": 0.11771771771771772, "grad_norm": 0.6104506254196167, "learning_rate": 1.2179748700879012e-07, "loss": 1.4995, "step": 49 }, { "epoch": 0.12012012012012012, "grad_norm": 0.44341933727264404, "learning_rate": 0.0, "loss": 0.8638, "step": 50 }, { "epoch": 0.12012012012012012, "eval_loss": 0.9848859906196594, "eval_runtime": 49.8489, "eval_samples_per_second": 14.063, "eval_steps_per_second": 1.765, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.1094434267136e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }