|
{ |
|
"best_metric": 0.7026871898897578, |
|
"best_model_checkpoint": "/content/temp_assamese/checkpoint-28000", |
|
"epoch": 2.0, |
|
"eval_steps": 2000, |
|
"global_step": 28386, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14091453533431975, |
|
"grad_norm": 12.538192749023438, |
|
"learning_rate": 4.648418234340873e-05, |
|
"loss": 2.2163, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14091453533431975, |
|
"eval_accuracy": 0.6319857833787309, |
|
"eval_loss": 1.8646236658096313, |
|
"eval_runtime": 102.3117, |
|
"eval_samples_per_second": 116.966, |
|
"eval_steps_per_second": 7.311, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2818290706686395, |
|
"grad_norm": 12.645801544189453, |
|
"learning_rate": 4.296131896005073e-05, |
|
"loss": 1.9456, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2818290706686395, |
|
"eval_accuracy": 0.6494802758779904, |
|
"eval_loss": 1.749164342880249, |
|
"eval_runtime": 102.5618, |
|
"eval_samples_per_second": 116.681, |
|
"eval_steps_per_second": 7.293, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4227436060029592, |
|
"grad_norm": 9.79688835144043, |
|
"learning_rate": 3.943845557669274e-05, |
|
"loss": 1.8391, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4227436060029592, |
|
"eval_accuracy": 0.6606430993204072, |
|
"eval_loss": 1.6770141124725342, |
|
"eval_runtime": 102.5061, |
|
"eval_samples_per_second": 116.744, |
|
"eval_steps_per_second": 7.297, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.563658141337279, |
|
"grad_norm": 10.446520805358887, |
|
"learning_rate": 3.5917353625026426e-05, |
|
"loss": 1.7704, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.563658141337279, |
|
"eval_accuracy": 0.6706733344622967, |
|
"eval_loss": 1.6165672540664673, |
|
"eval_runtime": 102.6569, |
|
"eval_samples_per_second": 116.573, |
|
"eval_steps_per_second": 7.286, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.7045726766715987, |
|
"grad_norm": 9.16817569732666, |
|
"learning_rate": 3.23962516733601e-05, |
|
"loss": 1.7213, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.7045726766715987, |
|
"eval_accuracy": 0.6759152415500662, |
|
"eval_loss": 1.5817841291427612, |
|
"eval_runtime": 102.3347, |
|
"eval_samples_per_second": 116.94, |
|
"eval_steps_per_second": 7.309, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8454872120059184, |
|
"grad_norm": 9.296396255493164, |
|
"learning_rate": 2.8875149721693794e-05, |
|
"loss": 1.6802, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8454872120059184, |
|
"eval_accuracy": 0.6820033844378066, |
|
"eval_loss": 1.5402722358703613, |
|
"eval_runtime": 102.6712, |
|
"eval_samples_per_second": 116.557, |
|
"eval_steps_per_second": 7.285, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.9864017473402381, |
|
"grad_norm": 7.622576713562012, |
|
"learning_rate": 2.5354047770027478e-05, |
|
"loss": 1.6432, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.9864017473402381, |
|
"eval_accuracy": 0.6857634747617221, |
|
"eval_loss": 1.5153496265411377, |
|
"eval_runtime": 102.6162, |
|
"eval_samples_per_second": 116.619, |
|
"eval_steps_per_second": 7.289, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.127316282674558, |
|
"grad_norm": 11.516377449035645, |
|
"learning_rate": 2.1832945818361165e-05, |
|
"loss": 1.6074, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.127316282674558, |
|
"eval_accuracy": 0.6885159496543541, |
|
"eval_loss": 1.496454119682312, |
|
"eval_runtime": 102.8787, |
|
"eval_samples_per_second": 116.321, |
|
"eval_steps_per_second": 7.271, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.2682308180088775, |
|
"grad_norm": 9.480605125427246, |
|
"learning_rate": 1.831008243500317e-05, |
|
"loss": 1.5833, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.2682308180088775, |
|
"eval_accuracy": 0.6934179945828158, |
|
"eval_loss": 1.4677945375442505, |
|
"eval_runtime": 102.7153, |
|
"eval_samples_per_second": 116.506, |
|
"eval_steps_per_second": 7.282, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.4091453533431975, |
|
"grad_norm": 8.643658638000488, |
|
"learning_rate": 1.4788980483336856e-05, |
|
"loss": 1.5649, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.4091453533431975, |
|
"eval_accuracy": 0.6950099353567151, |
|
"eval_loss": 1.4508079290390015, |
|
"eval_runtime": 102.6391, |
|
"eval_samples_per_second": 116.593, |
|
"eval_steps_per_second": 7.288, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.550059888677517, |
|
"grad_norm": 7.6539506912231445, |
|
"learning_rate": 1.1267878531670542e-05, |
|
"loss": 1.553, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.550059888677517, |
|
"eval_accuracy": 0.698540482055296, |
|
"eval_loss": 1.436693787574768, |
|
"eval_runtime": 102.5486, |
|
"eval_samples_per_second": 116.696, |
|
"eval_steps_per_second": 7.294, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.690974424011837, |
|
"grad_norm": 8.063584327697754, |
|
"learning_rate": 7.746776580004228e-06, |
|
"loss": 1.5345, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.690974424011837, |
|
"eval_accuracy": 0.7001222876777317, |
|
"eval_loss": 1.4230775833129883, |
|
"eval_runtime": 102.8829, |
|
"eval_samples_per_second": 116.317, |
|
"eval_steps_per_second": 7.27, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.8318889593461565, |
|
"grad_norm": 8.720465660095215, |
|
"learning_rate": 4.2256746283379135e-06, |
|
"loss": 1.5261, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.8318889593461565, |
|
"eval_accuracy": 0.701337358949075, |
|
"eval_loss": 1.4157360792160034, |
|
"eval_runtime": 102.7046, |
|
"eval_samples_per_second": 116.519, |
|
"eval_steps_per_second": 7.283, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.9728034946804764, |
|
"grad_norm": 9.144937515258789, |
|
"learning_rate": 7.045726766715987e-07, |
|
"loss": 1.5148, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.9728034946804764, |
|
"eval_accuracy": 0.7026871898897578, |
|
"eval_loss": 1.4097787141799927, |
|
"eval_runtime": 102.5398, |
|
"eval_samples_per_second": 116.706, |
|
"eval_steps_per_second": 7.295, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 28386, |
|
"total_flos": 1.197729267088466e+17, |
|
"train_loss": 1.6903211268009264, |
|
"train_runtime": 8975.6005, |
|
"train_samples_per_second": 50.601, |
|
"train_steps_per_second": 3.163 |
|
} |
|
], |
|
"logging_steps": 2000, |
|
"max_steps": 28386, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.197729267088466e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|