ProgramInNonsense commited on
Commit
778b628
·
verified ·
1 Parent(s): fadf73d

Training in progress, step 3450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22a9ddd35b3df7abd3e6cd6114478403a890fcb1640bd7224d3483b461c24d4f
3
  size 39131224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a25559e511b85c5aae5dfc33cd837d580d3e1921f3ac475da346fab5e037f2
3
  size 39131224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff83a9aacb5f52cacc114f9c70f23b7038636a8b55d7f0f10853d92591bee1be
3
  size 78504766
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97b79fda2582c0271274523fbb7a5c5cceaa62b5e25c60b4f9604c7a8d758368
3
  size 78504766
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2dd81a9d2a1c821b6acf845d8a2bc7b9c9a920083a1eb8a7ccd7a4bdc764e46
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:402ea5c8966f4a4d2b897a51ce5cf2cdad9ce9ef440b6d8cad953b85ed9745a4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5af34678d4362657736a6697e6bc5d13d1a967b12f171df00bcc4a7612a9b8a2
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36ebe1553452961b2895e7bd4eec6f90aa710519c9026af0f70af6770050886c
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.5112539529800415,
3
  "best_model_checkpoint": "./output/checkpoint-2550",
4
- "epoch": 6.273764258555133,
5
  "eval_steps": 150,
6
- "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2493,6 +2493,119 @@
2493
  "eval_samples_per_second": 9.01,
2494
  "eval_steps_per_second": 9.01,
2495
  "step": 3300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2496
  }
2497
  ],
2498
  "logging_steps": 10,
@@ -2512,7 +2625,7 @@
2512
  "attributes": {}
2513
  }
2514
  },
2515
- "total_flos": 3367221059607552.0,
2516
  "train_batch_size": 8,
2517
  "trial_name": null,
2518
  "trial_params": null
 
1
  {
2
  "best_metric": 0.5112539529800415,
3
  "best_model_checkpoint": "./output/checkpoint-2550",
4
+ "epoch": 6.55893536121673,
5
  "eval_steps": 150,
6
+ "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2493
  "eval_samples_per_second": 9.01,
2494
  "eval_steps_per_second": 9.01,
2495
  "step": 3300
2496
+ },
2497
+ {
2498
+ "epoch": 6.2927756653992395,
2499
+ "grad_norm": 1.0838931798934937,
2500
+ "learning_rate": 3.3236681277484654e-05,
2501
+ "loss": 0.0245,
2502
+ "step": 3310
2503
+ },
2504
+ {
2505
+ "epoch": 6.311787072243346,
2506
+ "grad_norm": 1.420167326927185,
2507
+ "learning_rate": 3.2883208595437584e-05,
2508
+ "loss": 0.0317,
2509
+ "step": 3320
2510
+ },
2511
+ {
2512
+ "epoch": 6.330798479087452,
2513
+ "grad_norm": 0.5360614061355591,
2514
+ "learning_rate": 3.2530953343684136e-05,
2515
+ "loss": 0.0365,
2516
+ "step": 3330
2517
+ },
2518
+ {
2519
+ "epoch": 6.349809885931559,
2520
+ "grad_norm": 3.0224130153656006,
2521
+ "learning_rate": 3.217993000205799e-05,
2522
+ "loss": 0.03,
2523
+ "step": 3340
2524
+ },
2525
+ {
2526
+ "epoch": 6.3688212927756656,
2527
+ "grad_norm": 0.7197875380516052,
2528
+ "learning_rate": 3.1830152999753903e-05,
2529
+ "loss": 0.0358,
2530
+ "step": 3350
2531
+ },
2532
+ {
2533
+ "epoch": 6.387832699619771,
2534
+ "grad_norm": 1.1931145191192627,
2535
+ "learning_rate": 3.148163671473439e-05,
2536
+ "loss": 0.0332,
2537
+ "step": 3360
2538
+ },
2539
+ {
2540
+ "epoch": 6.406844106463878,
2541
+ "grad_norm": 0.8724992275238037,
2542
+ "learning_rate": 3.113439547313892e-05,
2543
+ "loss": 0.0292,
2544
+ "step": 3370
2545
+ },
2546
+ {
2547
+ "epoch": 6.425855513307985,
2548
+ "grad_norm": 2.2905428409576416,
2549
+ "learning_rate": 3.0788443548694874e-05,
2550
+ "loss": 0.0369,
2551
+ "step": 3380
2552
+ },
2553
+ {
2554
+ "epoch": 6.444866920152092,
2555
+ "grad_norm": 0.48562759160995483,
2556
+ "learning_rate": 3.0443795162130876e-05,
2557
+ "loss": 0.0309,
2558
+ "step": 3390
2559
+ },
2560
+ {
2561
+ "epoch": 6.4638783269961975,
2562
+ "grad_norm": 1.1909234523773193,
2563
+ "learning_rate": 3.0100464480592185e-05,
2564
+ "loss": 0.0405,
2565
+ "step": 3400
2566
+ },
2567
+ {
2568
+ "epoch": 6.482889733840304,
2569
+ "grad_norm": 1.2226147651672363,
2570
+ "learning_rate": 2.9758465617058404e-05,
2571
+ "loss": 0.0344,
2572
+ "step": 3410
2573
+ },
2574
+ {
2575
+ "epoch": 6.501901140684411,
2576
+ "grad_norm": 0.8750718235969543,
2577
+ "learning_rate": 2.9417812629763285e-05,
2578
+ "loss": 0.0321,
2579
+ "step": 3420
2580
+ },
2581
+ {
2582
+ "epoch": 6.520912547528517,
2583
+ "grad_norm": 1.249475121498108,
2584
+ "learning_rate": 2.9078519521616894e-05,
2585
+ "loss": 0.0329,
2586
+ "step": 3430
2587
+ },
2588
+ {
2589
+ "epoch": 6.5399239543726235,
2590
+ "grad_norm": 1.1932651996612549,
2591
+ "learning_rate": 2.8740600239630002e-05,
2592
+ "loss": 0.033,
2593
+ "step": 3440
2594
+ },
2595
+ {
2596
+ "epoch": 6.55893536121673,
2597
+ "grad_norm": 0.5336318016052246,
2598
+ "learning_rate": 2.8404068674340714e-05,
2599
+ "loss": 0.0323,
2600
+ "step": 3450
2601
+ },
2602
+ {
2603
+ "epoch": 6.55893536121673,
2604
+ "eval_loss": 0.5230394005775452,
2605
+ "eval_runtime": 52.7826,
2606
+ "eval_samples_per_second": 8.867,
2607
+ "eval_steps_per_second": 8.867,
2608
+ "step": 3450
2609
  }
2610
  ],
2611
  "logging_steps": 10,
 
2625
  "attributes": {}
2626
  }
2627
  },
2628
+ "total_flos": 3521384583865344.0,
2629
  "train_batch_size": 8,
2630
  "trial_name": null,
2631
  "trial_params": null