{ "best_metric": 0.7026871898897578, "best_model_checkpoint": "/content/temp_assamese/checkpoint-28000", "epoch": 2.0, "eval_steps": 2000, "global_step": 28386, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14091453533431975, "grad_norm": 12.538192749023438, "learning_rate": 4.648418234340873e-05, "loss": 2.2163, "step": 2000 }, { "epoch": 0.14091453533431975, "eval_accuracy": 0.6319857833787309, "eval_loss": 1.8646236658096313, "eval_runtime": 102.3117, "eval_samples_per_second": 116.966, "eval_steps_per_second": 7.311, "step": 2000 }, { "epoch": 0.2818290706686395, "grad_norm": 12.645801544189453, "learning_rate": 4.296131896005073e-05, "loss": 1.9456, "step": 4000 }, { "epoch": 0.2818290706686395, "eval_accuracy": 0.6494802758779904, "eval_loss": 1.749164342880249, "eval_runtime": 102.5618, "eval_samples_per_second": 116.681, "eval_steps_per_second": 7.293, "step": 4000 }, { "epoch": 0.4227436060029592, "grad_norm": 9.79688835144043, "learning_rate": 3.943845557669274e-05, "loss": 1.8391, "step": 6000 }, { "epoch": 0.4227436060029592, "eval_accuracy": 0.6606430993204072, "eval_loss": 1.6770141124725342, "eval_runtime": 102.5061, "eval_samples_per_second": 116.744, "eval_steps_per_second": 7.297, "step": 6000 }, { "epoch": 0.563658141337279, "grad_norm": 10.446520805358887, "learning_rate": 3.5917353625026426e-05, "loss": 1.7704, "step": 8000 }, { "epoch": 0.563658141337279, "eval_accuracy": 0.6706733344622967, "eval_loss": 1.6165672540664673, "eval_runtime": 102.6569, "eval_samples_per_second": 116.573, "eval_steps_per_second": 7.286, "step": 8000 }, { "epoch": 0.7045726766715987, "grad_norm": 9.16817569732666, "learning_rate": 3.23962516733601e-05, "loss": 1.7213, "step": 10000 }, { "epoch": 0.7045726766715987, "eval_accuracy": 0.6759152415500662, "eval_loss": 1.5817841291427612, "eval_runtime": 102.3347, "eval_samples_per_second": 116.94, "eval_steps_per_second": 7.309, "step": 10000 }, { "epoch": 0.8454872120059184, "grad_norm": 9.296396255493164, "learning_rate": 2.8875149721693794e-05, "loss": 1.6802, "step": 12000 }, { "epoch": 0.8454872120059184, "eval_accuracy": 0.6820033844378066, "eval_loss": 1.5402722358703613, "eval_runtime": 102.6712, "eval_samples_per_second": 116.557, "eval_steps_per_second": 7.285, "step": 12000 }, { "epoch": 0.9864017473402381, "grad_norm": 7.622576713562012, "learning_rate": 2.5354047770027478e-05, "loss": 1.6432, "step": 14000 }, { "epoch": 0.9864017473402381, "eval_accuracy": 0.6857634747617221, "eval_loss": 1.5153496265411377, "eval_runtime": 102.6162, "eval_samples_per_second": 116.619, "eval_steps_per_second": 7.289, "step": 14000 }, { "epoch": 1.127316282674558, "grad_norm": 11.516377449035645, "learning_rate": 2.1832945818361165e-05, "loss": 1.6074, "step": 16000 }, { "epoch": 1.127316282674558, "eval_accuracy": 0.6885159496543541, "eval_loss": 1.496454119682312, "eval_runtime": 102.8787, "eval_samples_per_second": 116.321, "eval_steps_per_second": 7.271, "step": 16000 }, { "epoch": 1.2682308180088775, "grad_norm": 9.480605125427246, "learning_rate": 1.831008243500317e-05, "loss": 1.5833, "step": 18000 }, { "epoch": 1.2682308180088775, "eval_accuracy": 0.6934179945828158, "eval_loss": 1.4677945375442505, "eval_runtime": 102.7153, "eval_samples_per_second": 116.506, "eval_steps_per_second": 7.282, "step": 18000 }, { "epoch": 1.4091453533431975, "grad_norm": 8.643658638000488, "learning_rate": 1.4788980483336856e-05, "loss": 1.5649, "step": 20000 }, { "epoch": 1.4091453533431975, "eval_accuracy": 0.6950099353567151, "eval_loss": 1.4508079290390015, "eval_runtime": 102.6391, "eval_samples_per_second": 116.593, "eval_steps_per_second": 7.288, "step": 20000 }, { "epoch": 1.550059888677517, "grad_norm": 7.6539506912231445, "learning_rate": 1.1267878531670542e-05, "loss": 1.553, "step": 22000 }, { "epoch": 1.550059888677517, "eval_accuracy": 0.698540482055296, "eval_loss": 1.436693787574768, "eval_runtime": 102.5486, "eval_samples_per_second": 116.696, "eval_steps_per_second": 7.294, "step": 22000 }, { "epoch": 1.690974424011837, "grad_norm": 8.063584327697754, "learning_rate": 7.746776580004228e-06, "loss": 1.5345, "step": 24000 }, { "epoch": 1.690974424011837, "eval_accuracy": 0.7001222876777317, "eval_loss": 1.4230775833129883, "eval_runtime": 102.8829, "eval_samples_per_second": 116.317, "eval_steps_per_second": 7.27, "step": 24000 }, { "epoch": 1.8318889593461565, "grad_norm": 8.720465660095215, "learning_rate": 4.2256746283379135e-06, "loss": 1.5261, "step": 26000 }, { "epoch": 1.8318889593461565, "eval_accuracy": 0.701337358949075, "eval_loss": 1.4157360792160034, "eval_runtime": 102.7046, "eval_samples_per_second": 116.519, "eval_steps_per_second": 7.283, "step": 26000 }, { "epoch": 1.9728034946804764, "grad_norm": 9.144937515258789, "learning_rate": 7.045726766715987e-07, "loss": 1.5148, "step": 28000 }, { "epoch": 1.9728034946804764, "eval_accuracy": 0.7026871898897578, "eval_loss": 1.4097787141799927, "eval_runtime": 102.5398, "eval_samples_per_second": 116.706, "eval_steps_per_second": 7.295, "step": 28000 }, { "epoch": 2.0, "step": 28386, "total_flos": 1.197729267088466e+17, "train_loss": 1.6903211268009264, "train_runtime": 8975.6005, "train_samples_per_second": 50.601, "train_steps_per_second": 3.163 } ], "logging_steps": 2000, "max_steps": 28386, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.197729267088466e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }