cwaud commited on
Commit
b4185eb
1 Parent(s): 7cb0fc3

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab64d18f054dfcdea19d99ed5b73097e8f275079ec51c2a5aed567137d2c1dc4
3
  size 42002136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd3d66cb4b394c16d9f5180b30c893c4433cf7f698436fa62fd3ebe42686a21a
3
  size 42002136
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11a74b01c0d89bed8cf29448cc690a19b8b6deda66a64663fb9b39eedc0d66f2
3
  size 21822612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74e1d32b6a73f8e6d5548b4ae4dcd2255ca42ca3b7df3d113fd09da31fb4bb06
3
  size 21822612
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaed7d90a5f89319fda24880971a10d1089f97da9f01931c815ca0b6e7db0f70
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca64702d15008eb5be59666b7714ec889b55ee755aef1a691dbe423dca958b1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa716f361012bfea1a362903884260726692b93d0166a77b9b223670c0a7732f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca9a25c72339c898b564e0c464a3f6fc75bbeec408008928b7ed05533156b98c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0010948828264821275,
5
  "eval_steps": 50,
6
- "global_step": 195,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1404,6 +1404,49 @@
1404
  "learning_rate": 3.415506993330153e-07,
1405
  "loss": 0.6357,
1406
  "step": 195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1407
  }
1408
  ],
1409
  "logging_steps": 1,
@@ -1418,12 +1461,12 @@
1418
  "should_evaluate": false,
1419
  "should_log": false,
1420
  "should_save": true,
1421
- "should_training_stop": false
1422
  },
1423
  "attributes": {}
1424
  }
1425
  },
1426
- "total_flos": 1.4406500688592896e+17,
1427
  "train_batch_size": 1,
1428
  "trial_name": null,
1429
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0011229567451098742,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1404
  "learning_rate": 3.415506993330153e-07,
1405
  "loss": 0.6357,
1406
  "step": 195
1407
+ },
1408
+ {
1409
+ "epoch": 0.001100497610207677,
1410
+ "grad_norm": 0.6193792819976807,
1411
+ "learning_rate": 2.1863727812254653e-07,
1412
+ "loss": 0.8182,
1413
+ "step": 196
1414
+ },
1415
+ {
1416
+ "epoch": 0.0011061123939332261,
1417
+ "grad_norm": 0.6518222093582153,
1418
+ "learning_rate": 1.230030851695263e-07,
1419
+ "loss": 0.7356,
1420
+ "step": 197
1421
+ },
1422
+ {
1423
+ "epoch": 0.0011117271776587756,
1424
+ "grad_norm": 0.6630931496620178,
1425
+ "learning_rate": 5.467426590739511e-08,
1426
+ "loss": 0.738,
1427
+ "step": 198
1428
+ },
1429
+ {
1430
+ "epoch": 0.001117341961384325,
1431
+ "grad_norm": 0.5655919909477234,
1432
+ "learning_rate": 1.3669500753099585e-08,
1433
+ "loss": 0.6549,
1434
+ "step": 199
1435
+ },
1436
+ {
1437
+ "epoch": 0.0011229567451098742,
1438
+ "grad_norm": 0.4997493624687195,
1439
+ "learning_rate": 0.0,
1440
+ "loss": 0.6157,
1441
+ "step": 200
1442
+ },
1443
+ {
1444
+ "epoch": 0.0011229567451098742,
1445
+ "eval_loss": 0.7180939316749573,
1446
+ "eval_runtime": 21573.751,
1447
+ "eval_samples_per_second": 1.738,
1448
+ "eval_steps_per_second": 1.738,
1449
+ "step": 200
1450
  }
1451
  ],
1452
  "logging_steps": 1,
 
1461
  "should_evaluate": false,
1462
  "should_log": false,
1463
  "should_save": true,
1464
+ "should_training_stop": true
1465
  },
1466
  "attributes": {}
1467
  }
1468
  },
1469
+ "total_flos": 1.477589814214656e+17,
1470
  "train_batch_size": 1,
1471
  "trial_name": null,
1472
  "trial_params": null