neuralwonderland commited on
Commit
eddedca
·
verified ·
1 Parent(s): ecc3bf6

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e003bc47c34922f506575fa321af35e1688ba2653dd3f89f3a5d834309ab3d76
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ea5a9a3274c25655abcb7508b3e99f46f4d2540a8c280c82263e9204a27608
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84fa7f8a5315bdd769b48024d5184f26730eea2b9359d9041a6bb3be0351fea6
3
  size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5d2de183c2715bda0d33cd3b7f48104ab465f31540efdd87fd943f2ed1f8538
3
  size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1d03e882eb6f9e0ad39ba180eebc6702043d67d30929e93a57f986b1b21b0e3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3f6154bfb670e4d26b41c9bbf8d801b028d680d7736d4c9bed1a88cf8f8189
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e89283fc8dd36108e52f8865fe43971e6827f17fd9271c3eadbc32f37159ae4
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3661adfcbd9db9cbf825371f6d2c689ac041ce2f98ed8dfcbef94d998a20dea
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2118867635726929,
3
- "best_model_checkpoint": "./output/checkpoint-1650",
4
- "epoch": 0.07390817469204927,
5
  "eval_steps": 150,
6
- "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1250,6 +1250,119 @@
1250
  "eval_samples_per_second": 9.677,
1251
  "eval_steps_per_second": 9.677,
1252
  "step": 1650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1253
  }
1254
  ],
1255
  "logging_steps": 10,
@@ -1269,7 +1382,7 @@
1269
  "attributes": {}
1270
  }
1271
  },
1272
- "total_flos": 2.12376665714688e+17,
1273
  "train_batch_size": 4,
1274
  "trial_name": null,
1275
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2111696004867554,
3
+ "best_model_checkpoint": "./output/checkpoint-1800",
4
+ "epoch": 0.08062709966405375,
5
  "eval_steps": 150,
6
+ "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1250
  "eval_samples_per_second": 9.677,
1251
  "eval_steps_per_second": 9.677,
1252
  "step": 1650
1253
+ },
1254
+ {
1255
+ "epoch": 0.07435610302351624,
1256
+ "grad_norm": 4.368626117706299,
1257
+ "learning_rate": 5.775563985622202e-06,
1258
+ "loss": 1.1,
1259
+ "step": 1660
1260
+ },
1261
+ {
1262
+ "epoch": 0.0748040313549832,
1263
+ "grad_norm": 6.341384410858154,
1264
+ "learning_rate": 5.755288828702987e-06,
1265
+ "loss": 1.0292,
1266
+ "step": 1670
1267
+ },
1268
+ {
1269
+ "epoch": 0.07525195968645017,
1270
+ "grad_norm": 5.869757652282715,
1271
+ "learning_rate": 5.734931242214204e-06,
1272
+ "loss": 1.0937,
1273
+ "step": 1680
1274
+ },
1275
+ {
1276
+ "epoch": 0.07569988801791713,
1277
+ "grad_norm": 4.857089042663574,
1278
+ "learning_rate": 5.7144920629764955e-06,
1279
+ "loss": 1.0987,
1280
+ "step": 1690
1281
+ },
1282
+ {
1283
+ "epoch": 0.0761478163493841,
1284
+ "grad_norm": 5.114626884460449,
1285
+ "learning_rate": 5.693972131164471e-06,
1286
+ "loss": 0.9623,
1287
+ "step": 1700
1288
+ },
1289
+ {
1290
+ "epoch": 0.07659574468085106,
1291
+ "grad_norm": 5.152310371398926,
1292
+ "learning_rate": 5.673372290272149e-06,
1293
+ "loss": 1.1423,
1294
+ "step": 1710
1295
+ },
1296
+ {
1297
+ "epoch": 0.07704367301231803,
1298
+ "grad_norm": 3.8204965591430664,
1299
+ "learning_rate": 5.652693387078309e-06,
1300
+ "loss": 1.0523,
1301
+ "step": 1720
1302
+ },
1303
+ {
1304
+ "epoch": 0.077491601343785,
1305
+ "grad_norm": 3.0346767902374268,
1306
+ "learning_rate": 5.631936271611667e-06,
1307
+ "loss": 1.0483,
1308
+ "step": 1730
1309
+ },
1310
+ {
1311
+ "epoch": 0.07793952967525196,
1312
+ "grad_norm": 4.436351299285889,
1313
+ "learning_rate": 5.611101797115939e-06,
1314
+ "loss": 1.0144,
1315
+ "step": 1740
1316
+ },
1317
+ {
1318
+ "epoch": 0.07838745800671892,
1319
+ "grad_norm": 5.614783763885498,
1320
+ "learning_rate": 5.5901908200147685e-06,
1321
+ "loss": 1.078,
1322
+ "step": 1750
1323
+ },
1324
+ {
1325
+ "epoch": 0.07883538633818589,
1326
+ "grad_norm": 4.0426926612854,
1327
+ "learning_rate": 5.56920419987652e-06,
1328
+ "loss": 1.2628,
1329
+ "step": 1760
1330
+ },
1331
+ {
1332
+ "epoch": 0.07928331466965285,
1333
+ "grad_norm": 5.30089807510376,
1334
+ "learning_rate": 5.5481427993789534e-06,
1335
+ "loss": 1.1257,
1336
+ "step": 1770
1337
+ },
1338
+ {
1339
+ "epoch": 0.07973124300111982,
1340
+ "grad_norm": 3.5508739948272705,
1341
+ "learning_rate": 5.527007484273746e-06,
1342
+ "loss": 1.0355,
1343
+ "step": 1780
1344
+ },
1345
+ {
1346
+ "epoch": 0.08017917133258678,
1347
+ "grad_norm": 4.027277946472168,
1348
+ "learning_rate": 5.5057991233509225e-06,
1349
+ "loss": 0.9196,
1350
+ "step": 1790
1351
+ },
1352
+ {
1353
+ "epoch": 0.08062709966405375,
1354
+ "grad_norm": 7.427858352661133,
1355
+ "learning_rate": 5.484518588403134e-06,
1356
+ "loss": 1.1913,
1357
+ "step": 1800
1358
+ },
1359
+ {
1360
+ "epoch": 0.08062709966405375,
1361
+ "eval_loss": 1.2111696004867554,
1362
+ "eval_runtime": 51.6854,
1363
+ "eval_samples_per_second": 9.674,
1364
+ "eval_steps_per_second": 9.674,
1365
+ "step": 1800
1366
  }
1367
  ],
1368
  "logging_steps": 10,
 
1382
  "attributes": {}
1383
  }
1384
  },
1385
+ "total_flos": 2.32740933331968e+17,
1386
  "train_batch_size": 4,
1387
  "trial_name": null,
1388
  "trial_params": null