marianna13's picture
Upload folder using huggingface_hub
f2b9e71 verified
raw
history blame
14.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.994764397905759,
"eval_steps": 500,
"global_step": 858,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.034904013961605584,
"grad_norm": 1.636154953792276,
"learning_rate": 5e-06,
"loss": 0.9609,
"step": 10
},
{
"epoch": 0.06980802792321117,
"grad_norm": 1.1847809904244873,
"learning_rate": 5e-06,
"loss": 0.8537,
"step": 20
},
{
"epoch": 0.10471204188481675,
"grad_norm": 1.1055793026372138,
"learning_rate": 5e-06,
"loss": 0.8208,
"step": 30
},
{
"epoch": 0.13961605584642234,
"grad_norm": 1.1454147166881017,
"learning_rate": 5e-06,
"loss": 0.8046,
"step": 40
},
{
"epoch": 0.17452006980802792,
"grad_norm": 0.8149143192535275,
"learning_rate": 5e-06,
"loss": 0.7882,
"step": 50
},
{
"epoch": 0.2094240837696335,
"grad_norm": 0.8125713341369675,
"learning_rate": 5e-06,
"loss": 0.7822,
"step": 60
},
{
"epoch": 0.2443280977312391,
"grad_norm": 0.6705477513854557,
"learning_rate": 5e-06,
"loss": 0.7746,
"step": 70
},
{
"epoch": 0.2792321116928447,
"grad_norm": 0.742108317973775,
"learning_rate": 5e-06,
"loss": 0.7674,
"step": 80
},
{
"epoch": 0.31413612565445026,
"grad_norm": 0.5830122580252405,
"learning_rate": 5e-06,
"loss": 0.7613,
"step": 90
},
{
"epoch": 0.34904013961605584,
"grad_norm": 0.6867621444893013,
"learning_rate": 5e-06,
"loss": 0.7581,
"step": 100
},
{
"epoch": 0.38394415357766143,
"grad_norm": 0.5706894443800514,
"learning_rate": 5e-06,
"loss": 0.7561,
"step": 110
},
{
"epoch": 0.418848167539267,
"grad_norm": 0.5933615440045283,
"learning_rate": 5e-06,
"loss": 0.7555,
"step": 120
},
{
"epoch": 0.4537521815008726,
"grad_norm": 0.5976459344111194,
"learning_rate": 5e-06,
"loss": 0.7479,
"step": 130
},
{
"epoch": 0.4886561954624782,
"grad_norm": 0.7595901956158283,
"learning_rate": 5e-06,
"loss": 0.7445,
"step": 140
},
{
"epoch": 0.5235602094240838,
"grad_norm": 0.5243859212111798,
"learning_rate": 5e-06,
"loss": 0.7453,
"step": 150
},
{
"epoch": 0.5584642233856894,
"grad_norm": 0.7053972260403277,
"learning_rate": 5e-06,
"loss": 0.7459,
"step": 160
},
{
"epoch": 0.5933682373472949,
"grad_norm": 0.7356815513429203,
"learning_rate": 5e-06,
"loss": 0.7406,
"step": 170
},
{
"epoch": 0.6282722513089005,
"grad_norm": 0.5406768959780917,
"learning_rate": 5e-06,
"loss": 0.7403,
"step": 180
},
{
"epoch": 0.6631762652705061,
"grad_norm": 0.5731257742921576,
"learning_rate": 5e-06,
"loss": 0.7414,
"step": 190
},
{
"epoch": 0.6980802792321117,
"grad_norm": 0.5893545114403889,
"learning_rate": 5e-06,
"loss": 0.7354,
"step": 200
},
{
"epoch": 0.7329842931937173,
"grad_norm": 0.6666154404813628,
"learning_rate": 5e-06,
"loss": 0.7378,
"step": 210
},
{
"epoch": 0.7678883071553229,
"grad_norm": 0.6379810550334492,
"learning_rate": 5e-06,
"loss": 0.736,
"step": 220
},
{
"epoch": 0.8027923211169284,
"grad_norm": 0.5761611687799336,
"learning_rate": 5e-06,
"loss": 0.7365,
"step": 230
},
{
"epoch": 0.837696335078534,
"grad_norm": 0.5490954549201844,
"learning_rate": 5e-06,
"loss": 0.7351,
"step": 240
},
{
"epoch": 0.8726003490401396,
"grad_norm": 0.5577745326979847,
"learning_rate": 5e-06,
"loss": 0.7305,
"step": 250
},
{
"epoch": 0.9075043630017452,
"grad_norm": 0.5309350088615197,
"learning_rate": 5e-06,
"loss": 0.7344,
"step": 260
},
{
"epoch": 0.9424083769633508,
"grad_norm": 0.5171633944749564,
"learning_rate": 5e-06,
"loss": 0.7312,
"step": 270
},
{
"epoch": 0.9773123909249564,
"grad_norm": 0.6439135188078838,
"learning_rate": 5e-06,
"loss": 0.729,
"step": 280
},
{
"epoch": 1.012216404886562,
"grad_norm": 0.7944207887971882,
"learning_rate": 5e-06,
"loss": 0.752,
"step": 290
},
{
"epoch": 1.0471204188481675,
"grad_norm": 0.6463556134515147,
"learning_rate": 5e-06,
"loss": 0.6937,
"step": 300
},
{
"epoch": 1.082024432809773,
"grad_norm": 0.9747756816715487,
"learning_rate": 5e-06,
"loss": 0.6922,
"step": 310
},
{
"epoch": 1.1169284467713787,
"grad_norm": 0.6041467541568463,
"learning_rate": 5e-06,
"loss": 0.6946,
"step": 320
},
{
"epoch": 1.1518324607329844,
"grad_norm": 0.6555191903371557,
"learning_rate": 5e-06,
"loss": 0.6905,
"step": 330
},
{
"epoch": 1.1867364746945899,
"grad_norm": 0.7808834156906888,
"learning_rate": 5e-06,
"loss": 0.6905,
"step": 340
},
{
"epoch": 1.2216404886561953,
"grad_norm": 0.689883618215288,
"learning_rate": 5e-06,
"loss": 0.6886,
"step": 350
},
{
"epoch": 1.256544502617801,
"grad_norm": 0.5458990467442779,
"learning_rate": 5e-06,
"loss": 0.6958,
"step": 360
},
{
"epoch": 1.2914485165794067,
"grad_norm": 0.49774723989961944,
"learning_rate": 5e-06,
"loss": 0.6872,
"step": 370
},
{
"epoch": 1.3263525305410122,
"grad_norm": 0.615067023750174,
"learning_rate": 5e-06,
"loss": 0.6913,
"step": 380
},
{
"epoch": 1.3612565445026177,
"grad_norm": 0.5120804275981703,
"learning_rate": 5e-06,
"loss": 0.6943,
"step": 390
},
{
"epoch": 1.3961605584642234,
"grad_norm": 0.6511934985434475,
"learning_rate": 5e-06,
"loss": 0.6922,
"step": 400
},
{
"epoch": 1.431064572425829,
"grad_norm": 0.6015850091580557,
"learning_rate": 5e-06,
"loss": 0.691,
"step": 410
},
{
"epoch": 1.4659685863874345,
"grad_norm": 0.5548178493075747,
"learning_rate": 5e-06,
"loss": 0.6876,
"step": 420
},
{
"epoch": 1.50087260034904,
"grad_norm": 0.6084101340671536,
"learning_rate": 5e-06,
"loss": 0.6871,
"step": 430
},
{
"epoch": 1.5357766143106457,
"grad_norm": 0.5785174417745115,
"learning_rate": 5e-06,
"loss": 0.6893,
"step": 440
},
{
"epoch": 1.5706806282722514,
"grad_norm": 0.6387925686406533,
"learning_rate": 5e-06,
"loss": 0.6889,
"step": 450
},
{
"epoch": 1.6055846422338569,
"grad_norm": 0.511039789752418,
"learning_rate": 5e-06,
"loss": 0.6882,
"step": 460
},
{
"epoch": 1.6404886561954624,
"grad_norm": 0.6303156357824996,
"learning_rate": 5e-06,
"loss": 0.6874,
"step": 470
},
{
"epoch": 1.675392670157068,
"grad_norm": 0.5463553041688999,
"learning_rate": 5e-06,
"loss": 0.6826,
"step": 480
},
{
"epoch": 1.7102966841186737,
"grad_norm": 0.6680053603003989,
"learning_rate": 5e-06,
"loss": 0.6887,
"step": 490
},
{
"epoch": 1.7452006980802792,
"grad_norm": 0.5861342009392054,
"learning_rate": 5e-06,
"loss": 0.6877,
"step": 500
},
{
"epoch": 1.7801047120418847,
"grad_norm": 0.5441609154940179,
"learning_rate": 5e-06,
"loss": 0.687,
"step": 510
},
{
"epoch": 1.8150087260034904,
"grad_norm": 0.493678213169674,
"learning_rate": 5e-06,
"loss": 0.6865,
"step": 520
},
{
"epoch": 1.849912739965096,
"grad_norm": 0.7325770535166638,
"learning_rate": 5e-06,
"loss": 0.6889,
"step": 530
},
{
"epoch": 1.8848167539267016,
"grad_norm": 0.6627130061862745,
"learning_rate": 5e-06,
"loss": 0.683,
"step": 540
},
{
"epoch": 1.919720767888307,
"grad_norm": 0.5392435344182795,
"learning_rate": 5e-06,
"loss": 0.6869,
"step": 550
},
{
"epoch": 1.9546247818499127,
"grad_norm": 0.5374264329462486,
"learning_rate": 5e-06,
"loss": 0.6874,
"step": 560
},
{
"epoch": 1.9895287958115184,
"grad_norm": 0.4520265683973087,
"learning_rate": 5e-06,
"loss": 0.686,
"step": 570
},
{
"epoch": 2.024432809773124,
"grad_norm": 0.7616966695399988,
"learning_rate": 5e-06,
"loss": 0.6906,
"step": 580
},
{
"epoch": 2.0593368237347294,
"grad_norm": 0.8040603166806708,
"learning_rate": 5e-06,
"loss": 0.6442,
"step": 590
},
{
"epoch": 2.094240837696335,
"grad_norm": 0.5161547369323151,
"learning_rate": 5e-06,
"loss": 0.6462,
"step": 600
},
{
"epoch": 2.1291448516579408,
"grad_norm": 0.5566643583686863,
"learning_rate": 5e-06,
"loss": 0.6465,
"step": 610
},
{
"epoch": 2.164048865619546,
"grad_norm": 0.7404248587220047,
"learning_rate": 5e-06,
"loss": 0.6501,
"step": 620
},
{
"epoch": 2.1989528795811517,
"grad_norm": 0.602474854437427,
"learning_rate": 5e-06,
"loss": 0.6451,
"step": 630
},
{
"epoch": 2.2338568935427574,
"grad_norm": 0.663987316506295,
"learning_rate": 5e-06,
"loss": 0.6454,
"step": 640
},
{
"epoch": 2.268760907504363,
"grad_norm": 0.513580773343669,
"learning_rate": 5e-06,
"loss": 0.6464,
"step": 650
},
{
"epoch": 2.303664921465969,
"grad_norm": 0.5572279672626476,
"learning_rate": 5e-06,
"loss": 0.6495,
"step": 660
},
{
"epoch": 2.338568935427574,
"grad_norm": 0.6124769197735679,
"learning_rate": 5e-06,
"loss": 0.6466,
"step": 670
},
{
"epoch": 2.3734729493891797,
"grad_norm": 0.6689033731877824,
"learning_rate": 5e-06,
"loss": 0.6487,
"step": 680
},
{
"epoch": 2.4083769633507854,
"grad_norm": 0.6242198004638967,
"learning_rate": 5e-06,
"loss": 0.6479,
"step": 690
},
{
"epoch": 2.4432809773123907,
"grad_norm": 0.5816264133167447,
"learning_rate": 5e-06,
"loss": 0.6473,
"step": 700
},
{
"epoch": 2.4781849912739964,
"grad_norm": 0.6182232441775428,
"learning_rate": 5e-06,
"loss": 0.6486,
"step": 710
},
{
"epoch": 2.513089005235602,
"grad_norm": 0.6438939308222409,
"learning_rate": 5e-06,
"loss": 0.6523,
"step": 720
},
{
"epoch": 2.5479930191972078,
"grad_norm": 0.6270289995094971,
"learning_rate": 5e-06,
"loss": 0.6467,
"step": 730
},
{
"epoch": 2.5828970331588135,
"grad_norm": 0.5230942315565851,
"learning_rate": 5e-06,
"loss": 0.6491,
"step": 740
},
{
"epoch": 2.6178010471204187,
"grad_norm": 0.5004924007290114,
"learning_rate": 5e-06,
"loss": 0.6519,
"step": 750
},
{
"epoch": 2.6527050610820244,
"grad_norm": 0.5669482852337735,
"learning_rate": 5e-06,
"loss": 0.6501,
"step": 760
},
{
"epoch": 2.68760907504363,
"grad_norm": 0.6432274167649737,
"learning_rate": 5e-06,
"loss": 0.6487,
"step": 770
},
{
"epoch": 2.7225130890052354,
"grad_norm": 0.5996384982145978,
"learning_rate": 5e-06,
"loss": 0.6521,
"step": 780
},
{
"epoch": 2.757417102966841,
"grad_norm": 0.5437537271625021,
"learning_rate": 5e-06,
"loss": 0.6502,
"step": 790
},
{
"epoch": 2.7923211169284468,
"grad_norm": 0.5375883439387915,
"learning_rate": 5e-06,
"loss": 0.6494,
"step": 800
},
{
"epoch": 2.8272251308900525,
"grad_norm": 0.9130751404600511,
"learning_rate": 5e-06,
"loss": 0.6521,
"step": 810
},
{
"epoch": 2.862129144851658,
"grad_norm": 0.7285776360956339,
"learning_rate": 5e-06,
"loss": 0.6497,
"step": 820
},
{
"epoch": 2.8970331588132634,
"grad_norm": 0.6108923967332701,
"learning_rate": 5e-06,
"loss": 0.6491,
"step": 830
},
{
"epoch": 2.931937172774869,
"grad_norm": 0.6234985887070743,
"learning_rate": 5e-06,
"loss": 0.6471,
"step": 840
},
{
"epoch": 2.966841186736475,
"grad_norm": 0.6458996378236107,
"learning_rate": 5e-06,
"loss": 0.648,
"step": 850
},
{
"epoch": 2.994764397905759,
"step": 858,
"total_flos": 1436763197276160.0,
"train_loss": 0.7012652700597589,
"train_runtime": 12908.7913,
"train_samples_per_second": 34.079,
"train_steps_per_second": 0.066
}
],
"logging_steps": 10,
"max_steps": 858,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1436763197276160.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}