File size: 18,150 Bytes
841a5a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003205128205128205,
"grad_norm": 469662.9144643782,
"learning_rate": 1.5625e-08,
"logits/chosen": -0.3432708978652954,
"logits/rejected": -0.332830011844635,
"logps/chosen": -140.40289306640625,
"logps/rejected": -115.87382507324219,
"loss": 120282.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03205128205128205,
"grad_norm": 568441.5566994098,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": -0.5443148016929626,
"logits/rejected": -0.5515072345733643,
"logps/chosen": -89.99518585205078,
"logps/rejected": -90.88400268554688,
"loss": 125155.3333,
"rewards/accuracies": 0.3333333432674408,
"rewards/chosen": -0.000289025716483593,
"rewards/margins": -9.353376663057134e-05,
"rewards/rejected": -0.0001954919280251488,
"step": 10
},
{
"epoch": 0.0641025641025641,
"grad_norm": 464338.6645889823,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -0.4427386224269867,
"logits/rejected": -0.4934562146663666,
"logps/chosen": -90.24401092529297,
"logps/rejected": -95.63074493408203,
"loss": 124284.2,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.003463043598458171,
"rewards/margins": 0.0009749190066941082,
"rewards/rejected": -0.004437962546944618,
"step": 20
},
{
"epoch": 0.09615384615384616,
"grad_norm": 480260.34201998485,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": -0.5152963399887085,
"logits/rejected": -0.5460027456283569,
"logps/chosen": -94.16231536865234,
"logps/rejected": -100.62825775146484,
"loss": 124351.825,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.006953537464141846,
"rewards/margins": 0.0006809952319599688,
"rewards/rejected": -0.007634532637894154,
"step": 30
},
{
"epoch": 0.1282051282051282,
"grad_norm": 433885.6131804333,
"learning_rate": 4.857142857142857e-07,
"logits/chosen": -0.5599047541618347,
"logits/rejected": -0.5487984418869019,
"logps/chosen": -93.6915512084961,
"logps/rejected": -95.92937469482422,
"loss": 124131.1375,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.008278829045593739,
"rewards/margins": 0.0008631674572825432,
"rewards/rejected": -0.009141995571553707,
"step": 40
},
{
"epoch": 0.16025641025641027,
"grad_norm": 491164.7661120398,
"learning_rate": 4.6785714285714283e-07,
"logits/chosen": -0.5146197080612183,
"logits/rejected": -0.49345073103904724,
"logps/chosen": -109.50101470947266,
"logps/rejected": -104.8797378540039,
"loss": 125644.0,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.006485734134912491,
"rewards/margins": 0.0027839418035000563,
"rewards/rejected": -0.00926967617124319,
"step": 50
},
{
"epoch": 0.19230769230769232,
"grad_norm": 558888.3723594319,
"learning_rate": 4.5e-07,
"logits/chosen": -0.6499918699264526,
"logits/rejected": -0.6553579568862915,
"logps/chosen": -114.01820373535156,
"logps/rejected": -111.94651794433594,
"loss": 124503.425,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.011266408488154411,
"rewards/margins": 0.002909548580646515,
"rewards/rejected": -0.014175957068800926,
"step": 60
},
{
"epoch": 0.22435897435897437,
"grad_norm": 536020.620286275,
"learning_rate": 4.3214285714285713e-07,
"logits/chosen": -0.5960813760757446,
"logits/rejected": -0.5772069692611694,
"logps/chosen": -87.95893859863281,
"logps/rejected": -90.57078552246094,
"loss": 124475.325,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.007508446462452412,
"rewards/margins": 0.0036646847147494555,
"rewards/rejected": -0.011173130944371223,
"step": 70
},
{
"epoch": 0.2564102564102564,
"grad_norm": 579545.0571782525,
"learning_rate": 4.142857142857143e-07,
"logits/chosen": -0.5821112394332886,
"logits/rejected": -0.528997540473938,
"logps/chosen": -87.23516082763672,
"logps/rejected": -85.477783203125,
"loss": 125293.725,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.012744182720780373,
"rewards/margins": 0.0017470993334427476,
"rewards/rejected": -0.014491280540823936,
"step": 80
},
{
"epoch": 0.28846153846153844,
"grad_norm": 702613.3359581099,
"learning_rate": 3.9642857142857137e-07,
"logits/chosen": -0.5383504629135132,
"logits/rejected": -0.4826398491859436,
"logps/chosen": -94.10514831542969,
"logps/rejected": -94.60591888427734,
"loss": 124419.85,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.006864988245069981,
"rewards/margins": 0.002892577787861228,
"rewards/rejected": -0.009757566265761852,
"step": 90
},
{
"epoch": 0.32051282051282054,
"grad_norm": 574962.0888861647,
"learning_rate": 3.785714285714285e-07,
"logits/chosen": -0.6083141565322876,
"logits/rejected": -0.6077857613563538,
"logps/chosen": -90.84620666503906,
"logps/rejected": -94.3597640991211,
"loss": 123532.0125,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.010823920369148254,
"rewards/margins": 0.001849750755354762,
"rewards/rejected": -0.01267367135733366,
"step": 100
},
{
"epoch": 0.3525641025641026,
"grad_norm": 582134.6500433815,
"learning_rate": 3.607142857142857e-07,
"logits/chosen": -0.5725646615028381,
"logits/rejected": -0.5323026776313782,
"logps/chosen": -79.6702651977539,
"logps/rejected": -76.59967041015625,
"loss": 124798.1,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.004521737806499004,
"rewards/margins": 0.0010833492269739509,
"rewards/rejected": -0.005605087615549564,
"step": 110
},
{
"epoch": 0.38461538461538464,
"grad_norm": 661819.0222539164,
"learning_rate": 3.4285714285714286e-07,
"logits/chosen": -0.5654035210609436,
"logits/rejected": -0.5707298517227173,
"logps/chosen": -73.98536682128906,
"logps/rejected": -84.55022430419922,
"loss": 125812.0125,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.007425880525261164,
"rewards/margins": 0.006358510348945856,
"rewards/rejected": -0.01378439087420702,
"step": 120
},
{
"epoch": 0.4166666666666667,
"grad_norm": 620371.8592043375,
"learning_rate": 3.25e-07,
"logits/chosen": -0.7043228149414062,
"logits/rejected": -0.7306665182113647,
"logps/chosen": -100.49541473388672,
"logps/rejected": -107.61614990234375,
"loss": 125617.475,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.006788595579564571,
"rewards/margins": 0.0021920702420175076,
"rewards/rejected": -0.008980666287243366,
"step": 130
},
{
"epoch": 0.44871794871794873,
"grad_norm": 633474.7695131563,
"learning_rate": 3.0714285714285716e-07,
"logits/chosen": -0.6854395270347595,
"logits/rejected": -0.627780556678772,
"logps/chosen": -91.88723754882812,
"logps/rejected": -87.9045639038086,
"loss": 124316.25,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.010259262286126614,
"rewards/margins": 0.0011669063242152333,
"rewards/rejected": -0.011426168493926525,
"step": 140
},
{
"epoch": 0.4807692307692308,
"grad_norm": 696715.0430078872,
"learning_rate": 2.892857142857143e-07,
"logits/chosen": -0.6097627282142639,
"logits/rejected": -0.645863950252533,
"logps/chosen": -102.78446197509766,
"logps/rejected": -106.5654525756836,
"loss": 123236.9375,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.012269060127437115,
"rewards/margins": 0.0026497889775782824,
"rewards/rejected": -0.014918850734829903,
"step": 150
},
{
"epoch": 0.5128205128205128,
"grad_norm": 787986.1807345189,
"learning_rate": 2.714285714285714e-07,
"logits/chosen": -0.5826394557952881,
"logits/rejected": -0.590654730796814,
"logps/chosen": -90.98385620117188,
"logps/rejected": -97.3979263305664,
"loss": 121865.35,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.011020239442586899,
"rewards/margins": 0.005359311122447252,
"rewards/rejected": -0.016379551962018013,
"step": 160
},
{
"epoch": 0.5448717948717948,
"grad_norm": 736957.014479021,
"learning_rate": 2.5357142857142855e-07,
"logits/chosen": -0.5794961452484131,
"logits/rejected": -0.6191390156745911,
"logps/chosen": -98.76277160644531,
"logps/rejected": -103.96248626708984,
"loss": 123882.0625,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.009380698204040527,
"rewards/margins": 0.0029742049518972635,
"rewards/rejected": -0.012354902923107147,
"step": 170
},
{
"epoch": 0.5769230769230769,
"grad_norm": 733809.4054912812,
"learning_rate": 2.357142857142857e-07,
"logits/chosen": -0.6393710970878601,
"logits/rejected": -0.6236029863357544,
"logps/chosen": -94.88532257080078,
"logps/rejected": -92.90126037597656,
"loss": 123955.3625,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.011159100569784641,
"rewards/margins": 0.0009313292102888227,
"rewards/rejected": -0.012090427801012993,
"step": 180
},
{
"epoch": 0.6089743589743589,
"grad_norm": 699287.0532059986,
"learning_rate": 2.1785714285714284e-07,
"logits/chosen": -0.534403920173645,
"logits/rejected": -0.5387021899223328,
"logps/chosen": -92.89164733886719,
"logps/rejected": -97.23823547363281,
"loss": 124855.6375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.009661735966801643,
"rewards/margins": 0.004148019477725029,
"rewards/rejected": -0.013809755444526672,
"step": 190
},
{
"epoch": 0.6410256410256411,
"grad_norm": 694927.9752367702,
"learning_rate": 2e-07,
"logits/chosen": -0.696746289730072,
"logits/rejected": -0.7076197266578674,
"logps/chosen": -107.4284896850586,
"logps/rejected": -108.21855163574219,
"loss": 123333.7125,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.009755025617778301,
"rewards/margins": 0.004948892164975405,
"rewards/rejected": -0.014703919179737568,
"step": 200
},
{
"epoch": 0.6730769230769231,
"grad_norm": 664529.2459223642,
"learning_rate": 1.8214285714285714e-07,
"logits/chosen": -0.5494934320449829,
"logits/rejected": -0.5753802061080933,
"logps/chosen": -87.67240905761719,
"logps/rejected": -95.261474609375,
"loss": 123261.425,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.010124283842742443,
"rewards/margins": 0.006216048263013363,
"rewards/rejected": -0.016340332105755806,
"step": 210
},
{
"epoch": 0.7051282051282052,
"grad_norm": 752626.4895790943,
"learning_rate": 1.6428571428571429e-07,
"logits/chosen": -0.4907689094543457,
"logits/rejected": -0.5003972053527832,
"logps/chosen": -70.47917175292969,
"logps/rejected": -73.15069580078125,
"loss": 122213.175,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.007994843646883965,
"rewards/margins": 0.005159543361514807,
"rewards/rejected": -0.01315438561141491,
"step": 220
},
{
"epoch": 0.7371794871794872,
"grad_norm": 690877.8863774311,
"learning_rate": 1.4642857142857143e-07,
"logits/chosen": -0.6318084597587585,
"logits/rejected": -0.6108121275901794,
"logps/chosen": -103.8791732788086,
"logps/rejected": -110.17147064208984,
"loss": 122652.725,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.008601801469922066,
"rewards/margins": 0.00492085749283433,
"rewards/rejected": -0.013522659428417683,
"step": 230
},
{
"epoch": 0.7692307692307693,
"grad_norm": 706116.311213081,
"learning_rate": 1.2857142857142855e-07,
"logits/chosen": -0.6082527041435242,
"logits/rejected": -0.6249019503593445,
"logps/chosen": -85.2287826538086,
"logps/rejected": -85.55986785888672,
"loss": 123191.8125,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.009229953400790691,
"rewards/margins": 0.0028918907046318054,
"rewards/rejected": -0.012121843174099922,
"step": 240
},
{
"epoch": 0.8012820512820513,
"grad_norm": 762557.0917436344,
"learning_rate": 1.107142857142857e-07,
"logits/chosen": -0.5747382640838623,
"logits/rejected": -0.6171086430549622,
"logps/chosen": -94.86370086669922,
"logps/rejected": -107.7577896118164,
"loss": 124156.2875,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.01214513834565878,
"rewards/margins": 0.0048862299881875515,
"rewards/rejected": -0.017031369730830193,
"step": 250
},
{
"epoch": 0.8333333333333334,
"grad_norm": 720981.6104523474,
"learning_rate": 9.285714285714286e-08,
"logits/chosen": -0.6732273101806641,
"logits/rejected": -0.6552490592002869,
"logps/chosen": -93.73551940917969,
"logps/rejected": -95.43331146240234,
"loss": 125028.5375,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.00893234834074974,
"rewards/margins": 0.0013536241604015231,
"rewards/rejected": -0.010285971686244011,
"step": 260
},
{
"epoch": 0.8653846153846154,
"grad_norm": 632266.1767602823,
"learning_rate": 7.5e-08,
"logits/chosen": -0.6526715159416199,
"logits/rejected": -0.6659075617790222,
"logps/chosen": -104.94581604003906,
"logps/rejected": -123.0511245727539,
"loss": 122176.2,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.014885579235851765,
"rewards/margins": 0.005144301801919937,
"rewards/rejected": -0.020029881969094276,
"step": 270
},
{
"epoch": 0.8974358974358975,
"grad_norm": 886078.1218696759,
"learning_rate": 5.714285714285714e-08,
"logits/chosen": -0.7189252972602844,
"logits/rejected": -0.7166494131088257,
"logps/chosen": -110.89029693603516,
"logps/rejected": -116.58308410644531,
"loss": 123335.0875,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.011831143870949745,
"rewards/margins": 0.0017397601623088121,
"rewards/rejected": -0.013570902869105339,
"step": 280
},
{
"epoch": 0.9294871794871795,
"grad_norm": 651330.6783592023,
"learning_rate": 3.9285714285714285e-08,
"logits/chosen": -0.6643999814987183,
"logits/rejected": -0.6874372959136963,
"logps/chosen": -98.12127685546875,
"logps/rejected": -97.89227294921875,
"loss": 124226.45,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.011173027567565441,
"rewards/margins": 0.002893571276217699,
"rewards/rejected": -0.014066601172089577,
"step": 290
},
{
"epoch": 0.9615384615384616,
"grad_norm": 738901.7736935538,
"learning_rate": 2.142857142857143e-08,
"logits/chosen": -0.7286126017570496,
"logits/rejected": -0.6981081962585449,
"logps/chosen": -86.98307037353516,
"logps/rejected": -90.56110382080078,
"loss": 123651.1375,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.010809152387082577,
"rewards/margins": 0.00516370078548789,
"rewards/rejected": -0.01597285456955433,
"step": 300
},
{
"epoch": 0.9935897435897436,
"grad_norm": 792583.3715918568,
"learning_rate": 3.571428571428571e-09,
"logits/chosen": -0.6979326009750366,
"logits/rejected": -0.6595016717910767,
"logps/chosen": -99.2912368774414,
"logps/rejected": -106.7309341430664,
"loss": 125171.2,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.012163314037024975,
"rewards/margins": 0.002274113241583109,
"rewards/rejected": -0.014437426812946796,
"step": 310
},
{
"epoch": 1.0,
"step": 312,
"total_flos": 0.0,
"train_loss": 124066.95723157052,
"train_runtime": 2762.769,
"train_samples_per_second": 7.224,
"train_steps_per_second": 0.113
}
],
"logging_steps": 10,
"max_steps": 312,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|