TongZheng1999
/

alma-13b-sft-group-7-max-tokens-512

Model card Files Files and versions Community

alma-13b-sft-group-7-max-tokens-512 / checkpoint-265 /trainer_state.json

TongZheng1999's picture

Upload folder using huggingface_hub

14eee1e verified 11 days ago

3.88 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9985869053226566,
	"eval_steps": 14,
	"global_step": 265,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0527555346208196,
	"grad_norm": 0.18791379034519196,
	"learning_rate": 0.0013333333333333333,
	"loss": 1.1165,
	"step": 14
	},
	{
	"epoch": 0.1055110692416392,
	"grad_norm": 1.2923468351364136,
	"learning_rate": 0.001044465935734187,
	"loss": 1.5197,
	"step": 28
	},
	{
	"epoch": 0.15826660386245878,
	"grad_norm": 0.27302563190460205,
	"learning_rate": 0.0006928203230275508,
	"loss": 0.9163,
	"step": 42
	},
	{
	"epoch": 0.2110221384832784,
	"grad_norm": 0.252996027469635,
	"learning_rate": 0.0005547001962252292,
	"loss": 0.8046,
	"step": 56
	},
	{
	"epoch": 0.263777673104098,
	"grad_norm": 0.25708699226379395,
	"learning_rate": 0.00047583095143088644,
	"loss": 0.7624,
	"step": 70
	},
	{
	"epoch": 0.31653320772491755,
	"grad_norm": 0.2488645762205124,
	"learning_rate": 0.000423207369515159,
	"loss": 0.7358,
	"step": 84
	},
	{
	"epoch": 0.36928874234573716,
	"grad_norm": 0.2357274889945984,
	"learning_rate": 0.00038490017945975053,
	"loss": 0.7009,
	"step": 98
	},
	{
	"epoch": 0.4220442769665568,
	"grad_norm": 0.25910037755966187,
	"learning_rate": 0.00035540932665545545,
	"loss": 0.6576,
	"step": 112
	},
	{
	"epoch": 0.47479981158737633,
	"grad_norm": 0.24194850027561188,
	"learning_rate": 0.00033180075816559865,
	"loss": 0.6456,
	"step": 126
	},
	{
	"epoch": 0.527555346208196,
	"grad_norm": 0.31878945231437683,
	"learning_rate": 0.0003123475237772121,
	"loss": 0.6035,
	"step": 140
	},
	{
	"epoch": 0.5803108808290155,
	"grad_norm": 0.23705270886421204,
	"learning_rate": 0.00029595817420019407,
	"loss": 0.6138,
	"step": 154
	},
	{
	"epoch": 0.6330664154498351,
	"grad_norm": 0.32103270292282104,
	"learning_rate": 0.0002819045914409638,
	"loss": 0.5941,
	"step": 168
	},
	{
	"epoch": 0.6858219500706547,
	"grad_norm": 0.2693799138069153,
	"learning_rate": 0.00026967994498529687,
	"loss": 0.5686,
	"step": 182
	},
	{
	"epoch": 0.7385774846914743,
	"grad_norm": 0.2850986123085022,
	"learning_rate": 0.0002589191112012619,
	"loss": 0.5854,
	"step": 196
	},
	{
	"epoch": 0.7913330193122939,
	"grad_norm": 0.2964475154876709,
	"learning_rate": 0.00024935149047701483,
	"loss": 0.5482,
	"step": 210
	},
	{
	"epoch": 0.8440885539331136,
	"grad_norm": 0.27225586771965027,
	"learning_rate": 0.0002407717061715384,
	"loss": 0.5447,
	"step": 224
	},
	{
	"epoch": 0.8968440885539332,
	"grad_norm": 0.24302007257938385,
	"learning_rate": 0.00023302069121418522,
	"loss": 0.5245,
	"step": 238
	},
	{
	"epoch": 0.9495996231747527,
	"grad_norm": 0.3157835900783539,
	"learning_rate": 0.00022597307314641284,
	"loss": 0.5328,
	"step": 252
	}
	],
	"logging_steps": 14,
	"max_steps": 265,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 14,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.691462902673572e+18,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}