neuralwonderland commited on
Commit
bea0195
·
verified ·
1 Parent(s): d67eea0

Training in progress, step 3300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:704458e22083d426be5e0b2430ec99e95658e2146eeda1abbadddcef1b66afa0
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2eb2f4a1272ed7c45d0d57597219e288173a36ff1d96174f964cf75aa7e50f1
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:516e6b0d7cfd706f5b04b458cc6f13af606fbcb05d80be45f02aa990d2fa7939
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91afd86c3f0645431d67d9e9caef6058ba72bbca9804fa1b34ae225fb0fdcdfc
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7a7bdab08336c0f7233e606ce96075425fa9cf729719c53f2840e05d72ac534
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89543781f745d82510d3991bd8bd26751b68ca2499fbac19015521a55810e601
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2ad8a27e92c879b969b5845f60871e76a73be3547e482cc45027df5fe072f15
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af34678d4362657736a6697e6bc5d13d1a967b12f171df00bcc4a7612a9b8a2
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.801069974899292,
3
- "best_model_checkpoint": "./output/checkpoint-3150",
4
- "epoch": 0.39159622078567874,
5
  "eval_steps": 150,
6
- "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2380,6 +2380,119 @@
2380
  "eval_samples_per_second": 8.834,
2381
  "eval_steps_per_second": 8.834,
2382
  "step": 3150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2383
  }
2384
  ],
2385
  "logging_steps": 10,
@@ -2399,7 +2512,7 @@
2399
  "attributes": {}
2400
  }
2401
  },
2402
- "total_flos": 1.0945600710137856e+17,
2403
  "train_batch_size": 16,
2404
  "trial_name": null,
2405
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7923147678375244,
3
+ "best_model_checkpoint": "./output/checkpoint-3300",
4
+ "epoch": 0.4102436598707111,
5
  "eval_steps": 150,
6
+ "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2380
  "eval_samples_per_second": 8.834,
2381
  "eval_steps_per_second": 8.834,
2382
  "step": 3150
2383
+ },
2384
+ {
2385
+ "epoch": 0.3928393833913476,
2386
+ "grad_norm": 1.8093242645263672,
2387
+ "learning_rate": 3.8674868876822395e-05,
2388
+ "loss": 0.5608,
2389
+ "step": 3160
2390
+ },
2391
+ {
2392
+ "epoch": 0.3940825459970164,
2393
+ "grad_norm": 1.6383775472640991,
2394
+ "learning_rate": 3.83049049164295e-05,
2395
+ "loss": 0.5706,
2396
+ "step": 3170
2397
+ },
2398
+ {
2399
+ "epoch": 0.39532570860268523,
2400
+ "grad_norm": 1.7762494087219238,
2401
+ "learning_rate": 3.793593552162978e-05,
2402
+ "loss": 0.6272,
2403
+ "step": 3180
2404
+ },
2405
+ {
2406
+ "epoch": 0.39656887120835405,
2407
+ "grad_norm": 1.989702582359314,
2408
+ "learning_rate": 3.75679758593099e-05,
2409
+ "loss": 0.6268,
2410
+ "step": 3190
2411
+ },
2412
+ {
2413
+ "epoch": 0.39781203381402286,
2414
+ "grad_norm": 1.2394602298736572,
2415
+ "learning_rate": 3.720104105485039e-05,
2416
+ "loss": 0.5745,
2417
+ "step": 3200
2418
+ },
2419
+ {
2420
+ "epoch": 0.39905519641969167,
2421
+ "grad_norm": 1.6666808128356934,
2422
+ "learning_rate": 3.6835146191503885e-05,
2423
+ "loss": 0.6287,
2424
+ "step": 3210
2425
+ },
2426
+ {
2427
+ "epoch": 0.40029835902536054,
2428
+ "grad_norm": 0.926642119884491,
2429
+ "learning_rate": 3.647030630977508e-05,
2430
+ "loss": 0.6038,
2431
+ "step": 3220
2432
+ },
2433
+ {
2434
+ "epoch": 0.40154152163102935,
2435
+ "grad_norm": 1.3358100652694702,
2436
+ "learning_rate": 3.6106536406802524e-05,
2437
+ "loss": 0.5941,
2438
+ "step": 3230
2439
+ },
2440
+ {
2441
+ "epoch": 0.40278468423669817,
2442
+ "grad_norm": 1.339179277420044,
2443
+ "learning_rate": 3.5743851435742176e-05,
2444
+ "loss": 0.5888,
2445
+ "step": 3240
2446
+ },
2447
+ {
2448
+ "epoch": 0.404027846842367,
2449
+ "grad_norm": 1.4704395532608032,
2450
+ "learning_rate": 3.538226630515262e-05,
2451
+ "loss": 0.5113,
2452
+ "step": 3250
2453
+ },
2454
+ {
2455
+ "epoch": 0.4052710094480358,
2456
+ "grad_norm": 1.2576725482940674,
2457
+ "learning_rate": 3.502179587838238e-05,
2458
+ "loss": 0.5874,
2459
+ "step": 3260
2460
+ },
2461
+ {
2462
+ "epoch": 0.4065141720537046,
2463
+ "grad_norm": 1.1804664134979248,
2464
+ "learning_rate": 3.46624549729588e-05,
2465
+ "loss": 0.6054,
2466
+ "step": 3270
2467
+ },
2468
+ {
2469
+ "epoch": 0.40775733465937347,
2470
+ "grad_norm": 1.6472457647323608,
2471
+ "learning_rate": 3.430425835997908e-05,
2472
+ "loss": 0.6168,
2473
+ "step": 3280
2474
+ },
2475
+ {
2476
+ "epoch": 0.4090004972650423,
2477
+ "grad_norm": 1.3699522018432617,
2478
+ "learning_rate": 3.394722076350302e-05,
2479
+ "loss": 0.5227,
2480
+ "step": 3290
2481
+ },
2482
+ {
2483
+ "epoch": 0.4102436598707111,
2484
+ "grad_norm": 0.9297524690628052,
2485
+ "learning_rate": 3.359135685994781e-05,
2486
+ "loss": 0.5818,
2487
+ "step": 3300
2488
+ },
2489
+ {
2490
+ "epoch": 0.4102436598707111,
2491
+ "eval_loss": 0.7923147678375244,
2492
+ "eval_runtime": 53.7845,
2493
+ "eval_samples_per_second": 9.296,
2494
+ "eval_steps_per_second": 9.296,
2495
+ "step": 3300
2496
  }
2497
  ],
2498
  "logging_steps": 10,
 
2512
  "attributes": {}
2513
  }
2514
  },
2515
+ "total_flos": 1.1468266790135808e+17,
2516
  "train_batch_size": 16,
2517
  "trial_name": null,
2518
  "trial_params": null