iamnguyen commited on
Commit
4817621
·
verified ·
1 Parent(s): 1b5f541

Training in progress, step 208, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b63e87e68bd6b0e7db190c08a005cbe1b19645de1654d4a6dbf7a6ef2dfcb4d
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33d1f902086bf162f42a891580c1d8f009b186a1b247fa2e5d0f3c8b552ca438
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3baa6c474e91338e28d2c11997d97f2255b26f7552ed683357ff6a193f69797
3
  size 240728084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffbe6c82a241eec13776bec8b0245f431cae9a909fe4fa531b5a99f34a39e259
3
  size 240728084
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:089115101bfed8297ba3fb18cc84d56ea340bae11356e34eac025d2beac1caf3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fb5768ef05fd83332549c08f206d2683f104437b7d63ada2cc0d97372b46d74
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.01241304905619208,
5
  "eval_steps": 500,
6
- "global_step": 192,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1351,6 +1351,118 @@
1351
  "learning_rate": 9.999855928766113e-06,
1352
  "loss": 1.4314,
1353
  "step": 192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1354
  }
1355
  ],
1356
  "logging_steps": 1,
@@ -1370,7 +1482,7 @@
1370
  "attributes": {}
1371
  }
1372
  },
1373
- "total_flos": 1.232522107274281e+17,
1374
  "train_batch_size": 2,
1375
  "trial_name": null,
1376
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.013447469810874753,
5
  "eval_steps": 500,
6
+ "global_step": 208,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1351
  "learning_rate": 9.999855928766113e-06,
1352
  "loss": 1.4314,
1353
  "step": 192
1354
+ },
1355
+ {
1356
+ "epoch": 0.012477700353359746,
1357
+ "grad_norm": 3.9474074840545654,
1358
+ "learning_rate": 9.99984803593353e-06,
1359
+ "loss": 1.4435,
1360
+ "step": 193
1361
+ },
1362
+ {
1363
+ "epoch": 0.012542351650527413,
1364
+ "grad_norm": 4.373626232147217,
1365
+ "learning_rate": 9.999839932629732e-06,
1366
+ "loss": 1.3644,
1367
+ "step": 194
1368
+ },
1369
+ {
1370
+ "epoch": 0.01260700294769508,
1371
+ "grad_norm": 4.185675621032715,
1372
+ "learning_rate": 9.999831618855058e-06,
1373
+ "loss": 1.3399,
1374
+ "step": 195
1375
+ },
1376
+ {
1377
+ "epoch": 0.012671654244862747,
1378
+ "grad_norm": 4.8992109298706055,
1379
+ "learning_rate": 9.999823094609862e-06,
1380
+ "loss": 1.2623,
1381
+ "step": 196
1382
+ },
1383
+ {
1384
+ "epoch": 0.012736305542030415,
1385
+ "grad_norm": 4.317060470581055,
1386
+ "learning_rate": 9.999814359894501e-06,
1387
+ "loss": 1.5297,
1388
+ "step": 197
1389
+ },
1390
+ {
1391
+ "epoch": 0.012800956839198081,
1392
+ "grad_norm": 4.501911640167236,
1393
+ "learning_rate": 9.999805414709344e-06,
1394
+ "loss": 1.4305,
1395
+ "step": 198
1396
+ },
1397
+ {
1398
+ "epoch": 0.01286560813636575,
1399
+ "grad_norm": 4.288606643676758,
1400
+ "learning_rate": 9.999796259054765e-06,
1401
+ "loss": 1.4358,
1402
+ "step": 199
1403
+ },
1404
+ {
1405
+ "epoch": 0.012930259433533415,
1406
+ "grad_norm": 4.692774772644043,
1407
+ "learning_rate": 9.99978689293115e-06,
1408
+ "loss": 1.4448,
1409
+ "step": 200
1410
+ },
1411
+ {
1412
+ "epoch": 0.012994910730701083,
1413
+ "grad_norm": 4.893410682678223,
1414
+ "learning_rate": 9.999777316338897e-06,
1415
+ "loss": 1.4313,
1416
+ "step": 201
1417
+ },
1418
+ {
1419
+ "epoch": 0.01305956202786875,
1420
+ "grad_norm": 4.01968240737915,
1421
+ "learning_rate": 9.999767529278403e-06,
1422
+ "loss": 1.3831,
1423
+ "step": 202
1424
+ },
1425
+ {
1426
+ "epoch": 0.013124213325036417,
1427
+ "grad_norm": 4.3122076988220215,
1428
+ "learning_rate": 9.999757531750086e-06,
1429
+ "loss": 1.3605,
1430
+ "step": 203
1431
+ },
1432
+ {
1433
+ "epoch": 0.013188864622204083,
1434
+ "grad_norm": 3.9625604152679443,
1435
+ "learning_rate": 9.999747323754363e-06,
1436
+ "loss": 1.2944,
1437
+ "step": 204
1438
+ },
1439
+ {
1440
+ "epoch": 0.013253515919371751,
1441
+ "grad_norm": 4.135870456695557,
1442
+ "learning_rate": 9.999736905291664e-06,
1443
+ "loss": 1.3465,
1444
+ "step": 205
1445
+ },
1446
+ {
1447
+ "epoch": 0.013318167216539419,
1448
+ "grad_norm": 3.485560655593872,
1449
+ "learning_rate": 9.999726276362429e-06,
1450
+ "loss": 1.4901,
1451
+ "step": 206
1452
+ },
1453
+ {
1454
+ "epoch": 0.013382818513707085,
1455
+ "grad_norm": 4.223531246185303,
1456
+ "learning_rate": 9.999715436967104e-06,
1457
+ "loss": 1.4342,
1458
+ "step": 207
1459
+ },
1460
+ {
1461
+ "epoch": 0.013447469810874753,
1462
+ "grad_norm": 4.688872814178467,
1463
+ "learning_rate": 9.999704387106147e-06,
1464
+ "loss": 1.3735,
1465
+ "step": 208
1466
  }
1467
  ],
1468
  "logging_steps": 1,
 
1482
  "attributes": {}
1483
  }
1484
  },
1485
+ "total_flos": 1.334557816141824e+17,
1486
  "train_batch_size": 2,
1487
  "trial_name": null,
1488
  "trial_params": null