error577 commited on
Commit
857135f
·
verified ·
1 Parent(s): 4a8a86a

Training in progress, step 75, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c5726baa7de3e8afd59b201515892c486d5172c9ec6ea12dff4da33cb979b6
3
  size 6804608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:261f8727997945eb87e0bb82facc9b388349955b33cdb62b6807af15ec1fdceb
3
  size 6804608
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47b1ec98f4b0bee92c11b84853dc1756dadfa77499fe2a6d647f5d5f0420b3a0
3
  size 3633530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c1e6436794826f46c90dbc8a3888e0168b34c222f452abc2c1e417b5fed11c
3
  size 3633530
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:602fb08d1680b53d6bd33a99882ac9da2eca9540b980d41f3e3fe7abb49c54f9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:682fa06fe8ef6e09ffdeef985646187c38172ce94e6a1d0b34ec5f6df8451d9d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89ffc445067fef9d6d02bb3ff9e61d5e3209e6fa67c7259b3b364b90dbaa2cd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f23e2214bcafb439ebc7528dcc283ef6218d509a276c0baff0743503ecbe3d92
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.17185821697099893,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -373,6 +373,181 @@
373
  "eval_samples_per_second": 29.26,
374
  "eval_steps_per_second": 29.26,
375
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 1,
@@ -392,7 +567,7 @@
392
  "attributes": {}
393
  }
394
  },
395
- "total_flos": 1068216062312448.0,
396
  "train_batch_size": 1,
397
  "trial_name": null,
398
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2577873254564984,
5
  "eval_steps": 50,
6
+ "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
373
  "eval_samples_per_second": 29.26,
374
  "eval_steps_per_second": 29.26,
375
  "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.1752953813104189,
379
+ "grad_norm": 0.3440859317779541,
380
+ "learning_rate": 5.695865504800327e-05,
381
+ "loss": 2.2643,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 0.17873254564983887,
386
+ "grad_norm": 0.3143952488899231,
387
+ "learning_rate": 5.522642316338268e-05,
388
+ "loss": 2.3741,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 0.18216970998925885,
393
+ "grad_norm": 0.5445249676704407,
394
+ "learning_rate": 5.348782368720626e-05,
395
+ "loss": 2.3737,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 0.18560687432867884,
400
+ "grad_norm": 0.29417288303375244,
401
+ "learning_rate": 5.174497483512506e-05,
402
+ "loss": 2.2626,
403
+ "step": 54
404
+ },
405
+ {
406
+ "epoch": 0.18904403866809882,
407
+ "grad_norm": 0.2821301519870758,
408
+ "learning_rate": 5e-05,
409
+ "loss": 2.2226,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 0.1924812030075188,
414
+ "grad_norm": 0.5049765706062317,
415
+ "learning_rate": 4.825502516487497e-05,
416
+ "loss": 2.2056,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 0.19591836734693877,
421
+ "grad_norm": 0.2741824984550476,
422
+ "learning_rate": 4.6512176312793736e-05,
423
+ "loss": 2.0916,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 0.19935553168635875,
428
+ "grad_norm": 0.2932296097278595,
429
+ "learning_rate": 4.477357683661734e-05,
430
+ "loss": 2.1948,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 0.20279269602577873,
435
+ "grad_norm": 0.2703547477722168,
436
+ "learning_rate": 4.3041344951996746e-05,
437
+ "loss": 2.2377,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 0.20622986036519872,
442
+ "grad_norm": 0.5145007371902466,
443
+ "learning_rate": 4.131759111665349e-05,
444
+ "loss": 2.2336,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 0.20966702470461868,
449
+ "grad_norm": 0.3122769594192505,
450
+ "learning_rate": 3.960441545911204e-05,
451
+ "loss": 2.1879,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 0.21310418904403866,
456
+ "grad_norm": 0.2763988673686981,
457
+ "learning_rate": 3.790390522001662e-05,
458
+ "loss": 2.2149,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 0.21654135338345865,
463
+ "grad_norm": 0.31647786498069763,
464
+ "learning_rate": 3.6218132209150045e-05,
465
+ "loss": 2.1863,
466
+ "step": 63
467
+ },
468
+ {
469
+ "epoch": 0.21997851772287863,
470
+ "grad_norm": 0.5407307744026184,
471
+ "learning_rate": 3.4549150281252636e-05,
472
+ "loss": 2.2516,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 0.22341568206229862,
477
+ "grad_norm": 0.31190451979637146,
478
+ "learning_rate": 3.289899283371657e-05,
479
+ "loss": 2.2059,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 0.22685284640171857,
484
+ "grad_norm": 0.3045297861099243,
485
+ "learning_rate": 3.12696703292044e-05,
486
+ "loss": 2.2161,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 0.23029001074113856,
491
+ "grad_norm": 0.3111981153488159,
492
+ "learning_rate": 2.9663167846209998e-05,
493
+ "loss": 2.2722,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 0.23372717508055854,
498
+ "grad_norm": 0.5072479844093323,
499
+ "learning_rate": 2.8081442660546125e-05,
500
+ "loss": 2.1672,
501
+ "step": 68
502
+ },
503
+ {
504
+ "epoch": 0.23716433941997853,
505
+ "grad_norm": 0.4372091293334961,
506
+ "learning_rate": 2.6526421860705473e-05,
507
+ "loss": 2.3158,
508
+ "step": 69
509
+ },
510
+ {
511
+ "epoch": 0.24060150375939848,
512
+ "grad_norm": 0.28215134143829346,
513
+ "learning_rate": 2.500000000000001e-05,
514
+ "loss": 2.2365,
515
+ "step": 70
516
+ },
517
+ {
518
+ "epoch": 0.24403866809881847,
519
+ "grad_norm": 0.3402535915374756,
520
+ "learning_rate": 2.350403678833976e-05,
521
+ "loss": 2.2135,
522
+ "step": 71
523
+ },
524
+ {
525
+ "epoch": 0.24747583243823845,
526
+ "grad_norm": 0.46998509764671326,
527
+ "learning_rate": 2.2040354826462668e-05,
528
+ "loss": 2.3625,
529
+ "step": 72
530
+ },
531
+ {
532
+ "epoch": 0.25091299677765844,
533
+ "grad_norm": 0.2795167565345764,
534
+ "learning_rate": 2.061073738537635e-05,
535
+ "loss": 2.1809,
536
+ "step": 73
537
+ },
538
+ {
539
+ "epoch": 0.2543501611170784,
540
+ "grad_norm": 0.37644481658935547,
541
+ "learning_rate": 1.9216926233717085e-05,
542
+ "loss": 2.2632,
543
+ "step": 74
544
+ },
545
+ {
546
+ "epoch": 0.2577873254564984,
547
+ "grad_norm": 0.32024654746055603,
548
+ "learning_rate": 1.7860619515673033e-05,
549
+ "loss": 2.2528,
550
+ "step": 75
551
  }
552
  ],
553
  "logging_steps": 1,
 
567
  "attributes": {}
568
  }
569
  },
570
+ "total_flos": 1619041760575488.0,
571
  "train_batch_size": 1,
572
  "trial_name": null,
573
  "trial_params": null