wcyat commited on
Commit
67bed29
·
verified ·
1 Parent(s): 434d90e

Training in progress, step 1745, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02157e19d58ac06f26321b6e228876cad88492d24c90275b550f5447764cb1d9
3
  size 1304192904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0a442c19a8f93a75a243371a4cc9498e64b1d948f140840330f017238dd9cf
3
  size 1304192904
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b3a0d4d40abc119125b2ab36120e3ac6a57c0081aecf560a4b9b10b29149dd2
3
  size 2608620781
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a4147d1d8a3b2da9638a83aa0c0935b498970d6bfc42578cbdab88e597a43dc
3
  size 2608620781
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26fb7a1297c736ec81c5ca20bf47879ae114a90a0c60269ef2a0eb3c2ec7224f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bd23c18ec75baba95a12a9dd10c0466198b046b24819b38c01287321191f8a8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66e6f95527be7d742c182b1b25b1632bf2465fc58ebbe4ae2f736399e0d31f82
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f2751b167a4a17a996a86ab9c13e427252ce0aaabcc580369f95041bdc96dd5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.2911098897457123,
3
  "best_model_checkpoint": "./results/checkpoint-280",
4
- "epoch": 4.2979942693409745,
5
  "eval_steps": 20,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,6 +1207,198 @@
1207
  "eval_samples_per_second": 10.8,
1208
  "eval_steps_per_second": 2.734,
1209
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 20,
@@ -1221,12 +1413,12 @@
1221
  "should_evaluate": false,
1222
  "should_log": false,
1223
  "should_save": true,
1224
- "should_training_stop": false
1225
  },
1226
  "attributes": {}
1227
  }
1228
  },
1229
- "total_flos": 4811545247256468.0,
1230
  "train_batch_size": 4,
1231
  "trial_name": null,
1232
  "trial_params": null
 
1
  {
2
  "best_metric": 0.2911098897457123,
3
  "best_model_checkpoint": "./results/checkpoint-280",
4
+ "epoch": 5.0,
5
  "eval_steps": 20,
6
+ "global_step": 1745,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "eval_samples_per_second": 10.8,
1208
  "eval_steps_per_second": 2.734,
1209
  "step": 1500
1210
+ },
1211
+ {
1212
+ "epoch": 4.355300859598854,
1213
+ "grad_norm": 3.432967185974121,
1214
+ "learning_rate": 2.5787965616045845e-06,
1215
+ "loss": 0.0714,
1216
+ "step": 1520
1217
+ },
1218
+ {
1219
+ "epoch": 4.355300859598854,
1220
+ "eval_accuracy": 0.8987341772151899,
1221
+ "eval_loss": 0.5381926894187927,
1222
+ "eval_runtime": 15.6025,
1223
+ "eval_samples_per_second": 10.127,
1224
+ "eval_steps_per_second": 2.564,
1225
+ "step": 1520
1226
+ },
1227
+ {
1228
+ "epoch": 4.412607449856734,
1229
+ "grad_norm": 0.03264419734477997,
1230
+ "learning_rate": 2.3495702005730663e-06,
1231
+ "loss": 0.0617,
1232
+ "step": 1540
1233
+ },
1234
+ {
1235
+ "epoch": 4.412607449856734,
1236
+ "eval_accuracy": 0.8924050632911392,
1237
+ "eval_loss": 0.6029611229896545,
1238
+ "eval_runtime": 14.4132,
1239
+ "eval_samples_per_second": 10.962,
1240
+ "eval_steps_per_second": 2.775,
1241
+ "step": 1540
1242
+ },
1243
+ {
1244
+ "epoch": 4.469914040114613,
1245
+ "grad_norm": 0.06593719124794006,
1246
+ "learning_rate": 2.1203438395415473e-06,
1247
+ "loss": 0.0802,
1248
+ "step": 1560
1249
+ },
1250
+ {
1251
+ "epoch": 4.469914040114613,
1252
+ "eval_accuracy": 0.8924050632911392,
1253
+ "eval_loss": 0.567659318447113,
1254
+ "eval_runtime": 14.8121,
1255
+ "eval_samples_per_second": 10.667,
1256
+ "eval_steps_per_second": 2.7,
1257
+ "step": 1560
1258
+ },
1259
+ {
1260
+ "epoch": 4.527220630372493,
1261
+ "grad_norm": 0.1013946682214737,
1262
+ "learning_rate": 1.8911174785100289e-06,
1263
+ "loss": 0.2404,
1264
+ "step": 1580
1265
+ },
1266
+ {
1267
+ "epoch": 4.527220630372493,
1268
+ "eval_accuracy": 0.8987341772151899,
1269
+ "eval_loss": 0.5836894512176514,
1270
+ "eval_runtime": 14.7362,
1271
+ "eval_samples_per_second": 10.722,
1272
+ "eval_steps_per_second": 2.714,
1273
+ "step": 1580
1274
+ },
1275
+ {
1276
+ "epoch": 4.584527220630372,
1277
+ "grad_norm": 6.956309795379639,
1278
+ "learning_rate": 1.66189111747851e-06,
1279
+ "loss": 0.2311,
1280
+ "step": 1600
1281
+ },
1282
+ {
1283
+ "epoch": 4.584527220630372,
1284
+ "eval_accuracy": 0.8987341772151899,
1285
+ "eval_loss": 0.6191691160202026,
1286
+ "eval_runtime": 14.4896,
1287
+ "eval_samples_per_second": 10.904,
1288
+ "eval_steps_per_second": 2.761,
1289
+ "step": 1600
1290
+ },
1291
+ {
1292
+ "epoch": 4.641833810888253,
1293
+ "grad_norm": 0.13025854527950287,
1294
+ "learning_rate": 1.4326647564469915e-06,
1295
+ "loss": 0.0031,
1296
+ "step": 1620
1297
+ },
1298
+ {
1299
+ "epoch": 4.641833810888253,
1300
+ "eval_accuracy": 0.8987341772151899,
1301
+ "eval_loss": 0.6153239011764526,
1302
+ "eval_runtime": 14.714,
1303
+ "eval_samples_per_second": 10.738,
1304
+ "eval_steps_per_second": 2.718,
1305
+ "step": 1620
1306
+ },
1307
+ {
1308
+ "epoch": 4.699140401146132,
1309
+ "grad_norm": 0.02252735011279583,
1310
+ "learning_rate": 1.2034383954154729e-06,
1311
+ "loss": 0.1621,
1312
+ "step": 1640
1313
+ },
1314
+ {
1315
+ "epoch": 4.699140401146132,
1316
+ "eval_accuracy": 0.8924050632911392,
1317
+ "eval_loss": 0.6008380651473999,
1318
+ "eval_runtime": 14.6006,
1319
+ "eval_samples_per_second": 10.821,
1320
+ "eval_steps_per_second": 2.74,
1321
+ "step": 1640
1322
+ },
1323
+ {
1324
+ "epoch": 4.756446991404012,
1325
+ "grad_norm": 0.03680579736828804,
1326
+ "learning_rate": 9.742120343839543e-07,
1327
+ "loss": 0.0841,
1328
+ "step": 1660
1329
+ },
1330
+ {
1331
+ "epoch": 4.756446991404012,
1332
+ "eval_accuracy": 0.8987341772151899,
1333
+ "eval_loss": 0.5886847376823425,
1334
+ "eval_runtime": 14.6522,
1335
+ "eval_samples_per_second": 10.783,
1336
+ "eval_steps_per_second": 2.73,
1337
+ "step": 1660
1338
+ },
1339
+ {
1340
+ "epoch": 4.813753581661891,
1341
+ "grad_norm": 0.027355097234249115,
1342
+ "learning_rate": 7.449856733524357e-07,
1343
+ "loss": 0.0014,
1344
+ "step": 1680
1345
+ },
1346
+ {
1347
+ "epoch": 4.813753581661891,
1348
+ "eval_accuracy": 0.8987341772151899,
1349
+ "eval_loss": 0.586622416973114,
1350
+ "eval_runtime": 14.7046,
1351
+ "eval_samples_per_second": 10.745,
1352
+ "eval_steps_per_second": 2.72,
1353
+ "step": 1680
1354
+ },
1355
+ {
1356
+ "epoch": 4.871060171919771,
1357
+ "grad_norm": 0.011458040215075016,
1358
+ "learning_rate": 5.15759312320917e-07,
1359
+ "loss": 0.1199,
1360
+ "step": 1700
1361
+ },
1362
+ {
1363
+ "epoch": 4.871060171919771,
1364
+ "eval_accuracy": 0.8987341772151899,
1365
+ "eval_loss": 0.590861976146698,
1366
+ "eval_runtime": 14.6646,
1367
+ "eval_samples_per_second": 10.774,
1368
+ "eval_steps_per_second": 2.728,
1369
+ "step": 1700
1370
+ },
1371
+ {
1372
+ "epoch": 4.92836676217765,
1373
+ "grad_norm": 0.025075102224946022,
1374
+ "learning_rate": 2.865329512893983e-07,
1375
+ "loss": 0.0124,
1376
+ "step": 1720
1377
+ },
1378
+ {
1379
+ "epoch": 4.92836676217765,
1380
+ "eval_accuracy": 0.8987341772151899,
1381
+ "eval_loss": 0.5905599594116211,
1382
+ "eval_runtime": 14.686,
1383
+ "eval_samples_per_second": 10.759,
1384
+ "eval_steps_per_second": 2.724,
1385
+ "step": 1720
1386
+ },
1387
+ {
1388
+ "epoch": 4.98567335243553,
1389
+ "grad_norm": 0.021264472976326942,
1390
+ "learning_rate": 5.730659025787966e-08,
1391
+ "loss": 0.046,
1392
+ "step": 1740
1393
+ },
1394
+ {
1395
+ "epoch": 4.98567335243553,
1396
+ "eval_accuracy": 0.8987341772151899,
1397
+ "eval_loss": 0.5924892425537109,
1398
+ "eval_runtime": 14.595,
1399
+ "eval_samples_per_second": 10.826,
1400
+ "eval_steps_per_second": 2.741,
1401
+ "step": 1740
1402
  }
1403
  ],
1404
  "logging_steps": 20,
 
1413
  "should_evaluate": false,
1414
  "should_log": false,
1415
  "should_save": true,
1416
+ "should_training_stop": true
1417
  },
1418
  "attributes": {}
1419
  }
1420
  },
1421
+ "total_flos": 5599966461345732.0,
1422
  "train_batch_size": 4,
1423
  "trial_name": null,
1424
  "trial_params": null