neuralwonderland commited on
Commit
b834adb
·
verified ·
1 Parent(s): 7a573c0

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c10e16e3ac21d2c3067ac066149d6847ada80249a0d2201ec8b0747c1cc6cf62
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b6a6a42ffc6d380e2565f1fc5063348a1e34156d7e515f8f7da05f46e62ad60
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:878ec53ac90deb1001fac14c0eae703bd28c1206475fc3b081fb43a039a2ed13
3
  size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e78a087bb6220991244d4fe57654663db01a4e71ac79cbc7cb3e84d260f51fe1
3
  size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3b9e5eea9433b8628e2a81dae4e1ee9ab1ee13a921f1ad76ef9904766aca9f5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad3318d3c187ce725fc1b6b7085bd28d07766f6a1219cb8fb8f59b2625444c2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1f3a1c2778942fd3be1a9f139839cd53a1e492e182302e5e768461dfa2919be
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3803bd21b70ba2e397dcc504f146e8f1f5465da72c7ce9f40dd721afbea3d107
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1951868534088135,
3
- "best_model_checkpoint": "./output/checkpoint-4350",
4
- "epoch": 0.1948488241881299,
5
  "eval_steps": 150,
6
- "global_step": 4350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3284,6 +3284,119 @@
3284
  "eval_samples_per_second": 9.682,
3285
  "eval_steps_per_second": 9.682,
3286
  "step": 4350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3287
  }
3288
  ],
3289
  "logging_steps": 10,
@@ -3303,7 +3416,7 @@
3303
  "attributes": {}
3304
  }
3305
  },
3306
- "total_flos": 5.6157580670976e+17,
3307
  "train_batch_size": 4,
3308
  "trial_name": null,
3309
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1950809955596924,
3
+ "best_model_checkpoint": "./output/checkpoint-4500",
4
+ "epoch": 0.20156774916013437,
5
  "eval_steps": 150,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3284
  "eval_samples_per_second": 9.682,
3285
  "eval_steps_per_second": 9.682,
3286
  "step": 4350
3287
+ },
3288
+ {
3289
+ "epoch": 0.19529675251959686,
3290
+ "grad_norm": 3.893348217010498,
3291
+ "learning_rate": 3.1129108238383095e-07,
3292
+ "loss": 1.2238,
3293
+ "step": 4360
3294
+ },
3295
+ {
3296
+ "epoch": 0.19574468085106383,
3297
+ "grad_norm": 3.704392433166504,
3298
+ "learning_rate": 3.017708529320604e-07,
3299
+ "loss": 1.0766,
3300
+ "step": 4370
3301
+ },
3302
+ {
3303
+ "epoch": 0.19619260918253079,
3304
+ "grad_norm": 4.406269073486328,
3305
+ "learning_rate": 2.923923666747357e-07,
3306
+ "loss": 0.9588,
3307
+ "step": 4380
3308
+ },
3309
+ {
3310
+ "epoch": 0.19664053751399777,
3311
+ "grad_norm": 6.578729152679443,
3312
+ "learning_rate": 2.8315600912469477e-07,
3313
+ "loss": 1.1622,
3314
+ "step": 4390
3315
+ },
3316
+ {
3317
+ "epoch": 0.19708846584546472,
3318
+ "grad_norm": 4.1804094314575195,
3319
+ "learning_rate": 2.740621599524189e-07,
3320
+ "loss": 1.1999,
3321
+ "step": 4400
3322
+ },
3323
+ {
3324
+ "epoch": 0.1975363941769317,
3325
+ "grad_norm": 6.192513465881348,
3326
+ "learning_rate": 2.651111929704303e-07,
3327
+ "loss": 1.1274,
3328
+ "step": 4410
3329
+ },
3330
+ {
3331
+ "epoch": 0.19798432250839865,
3332
+ "grad_norm": 4.356874942779541,
3333
+ "learning_rate": 2.563034761179223e-07,
3334
+ "loss": 1.0262,
3335
+ "step": 4420
3336
+ },
3337
+ {
3338
+ "epoch": 0.19843225083986563,
3339
+ "grad_norm": 4.435469627380371,
3340
+ "learning_rate": 2.476393714456384e-07,
3341
+ "loss": 1.1814,
3342
+ "step": 4430
3343
+ },
3344
+ {
3345
+ "epoch": 0.19888017917133258,
3346
+ "grad_norm": 3.9173505306243896,
3347
+ "learning_rate": 2.391192351009855e-07,
3348
+ "loss": 0.7984,
3349
+ "step": 4440
3350
+ },
3351
+ {
3352
+ "epoch": 0.19932810750279956,
3353
+ "grad_norm": 6.546506881713867,
3354
+ "learning_rate": 2.3074341731339837e-07,
3355
+ "loss": 1.168,
3356
+ "step": 4450
3357
+ },
3358
+ {
3359
+ "epoch": 0.1997760358342665,
3360
+ "grad_norm": 6.1646223068237305,
3361
+ "learning_rate": 2.225122623799407e-07,
3362
+ "loss": 1.2589,
3363
+ "step": 4460
3364
+ },
3365
+ {
3366
+ "epoch": 0.2002239641657335,
3367
+ "grad_norm": 3.210203170776367,
3368
+ "learning_rate": 2.1442610865115135e-07,
3369
+ "loss": 1.0636,
3370
+ "step": 4470
3371
+ },
3372
+ {
3373
+ "epoch": 0.20067189249720044,
3374
+ "grad_norm": 5.133816242218018,
3375
+ "learning_rate": 2.0648528851714077e-07,
3376
+ "loss": 1.0195,
3377
+ "step": 4480
3378
+ },
3379
+ {
3380
+ "epoch": 0.20111982082866742,
3381
+ "grad_norm": 4.449398517608643,
3382
+ "learning_rate": 1.9869012839392064e-07,
3383
+ "loss": 1.1007,
3384
+ "step": 4490
3385
+ },
3386
+ {
3387
+ "epoch": 0.20156774916013437,
3388
+ "grad_norm": 4.8083977699279785,
3389
+ "learning_rate": 1.9104094870999264e-07,
3390
+ "loss": 1.1975,
3391
+ "step": 4500
3392
+ },
3393
+ {
3394
+ "epoch": 0.20156774916013437,
3395
+ "eval_loss": 1.1950809955596924,
3396
+ "eval_runtime": 51.7311,
3397
+ "eval_samples_per_second": 9.665,
3398
+ "eval_steps_per_second": 9.665,
3399
+ "step": 4500
3400
  }
3401
  ],
3402
  "logging_steps": 10,
 
3416
  "attributes": {}
3417
  }
3418
  },
3419
+ "total_flos": 5.818050367543296e+17,
3420
  "train_batch_size": 4,
3421
  "trial_name": null,
3422
  "trial_params": null