neuralwonderland commited on
Commit
6b3e724
·
verified ·
1 Parent(s): 3e315b1

Training in progress, step 3150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e76345f809b53997ae5b0156376662cc932fae1628a32a6daa74b18bf353691c
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:704458e22083d426be5e0b2430ec99e95658e2146eeda1abbadddcef1b66afa0
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9350ab8952c9068ba9cb6662b755e3ae064e0864a10cc19275a73e3493f1d699
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:516e6b0d7cfd706f5b04b458cc6f13af606fbcb05d80be45f02aa990d2fa7939
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:221fbae6356b068d1a273b00f61f9d4825a8ecf84836f89b11a087624e7b10f1
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7a7bdab08336c0f7233e606ce96075425fa9cf729719c53f2840e05d72ac534
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:125a0482b0838ad97cbdd22589ee6289ec41c1a06bf562da8714c95f5c3581c0
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2ad8a27e92c879b969b5845f60871e76a73be3547e482cc45027df5fe072f15
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8055340647697449,
3
- "best_model_checkpoint": "./output/checkpoint-3000",
4
- "epoch": 0.37294878170064644,
5
  "eval_steps": 150,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2267,6 +2267,119 @@
2267
  "eval_samples_per_second": 9.152,
2268
  "eval_steps_per_second": 9.152,
2269
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2270
  }
2271
  ],
2272
  "logging_steps": 10,
@@ -2286,7 +2399,7 @@
2286
  "attributes": {}
2287
  }
2288
  },
2289
- "total_flos": 1.0423543389447168e+17,
2290
  "train_batch_size": 16,
2291
  "trial_name": null,
2292
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.801069974899292,
3
+ "best_model_checkpoint": "./output/checkpoint-3150",
4
+ "epoch": 0.39159622078567874,
5
  "eval_steps": 150,
6
+ "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2267
  "eval_samples_per_second": 9.152,
2268
  "eval_steps_per_second": 9.152,
2269
  "step": 3000
2270
+ },
2271
+ {
2272
+ "epoch": 0.37419194430631525,
2273
+ "grad_norm": 1.3308610916137695,
2274
+ "learning_rate": 4.433324295594166e-05,
2275
+ "loss": 0.6302,
2276
+ "step": 3010
2277
+ },
2278
+ {
2279
+ "epoch": 0.37543510691198406,
2280
+ "grad_norm": 1.3117073774337769,
2281
+ "learning_rate": 4.3950206840550585e-05,
2282
+ "loss": 0.5286,
2283
+ "step": 3020
2284
+ },
2285
+ {
2286
+ "epoch": 0.37667826951765293,
2287
+ "grad_norm": 1.3257042169570923,
2288
+ "learning_rate": 4.3567933234501746e-05,
2289
+ "loss": 0.6227,
2290
+ "step": 3030
2291
+ },
2292
+ {
2293
+ "epoch": 0.37792143212332174,
2294
+ "grad_norm": 1.614931583404541,
2295
+ "learning_rate": 4.318643785156579e-05,
2296
+ "loss": 0.5531,
2297
+ "step": 3040
2298
+ },
2299
+ {
2300
+ "epoch": 0.37916459472899056,
2301
+ "grad_norm": 1.4024949073791504,
2302
+ "learning_rate": 4.280573637352371e-05,
2303
+ "loss": 0.6107,
2304
+ "step": 3050
2305
+ },
2306
+ {
2307
+ "epoch": 0.38040775733465937,
2308
+ "grad_norm": 1.3442318439483643,
2309
+ "learning_rate": 4.242584444952216e-05,
2310
+ "loss": 0.619,
2311
+ "step": 3060
2312
+ },
2313
+ {
2314
+ "epoch": 0.3816509199403282,
2315
+ "grad_norm": 1.6472797393798828,
2316
+ "learning_rate": 4.204677769543019e-05,
2317
+ "loss": 0.6219,
2318
+ "step": 3070
2319
+ },
2320
+ {
2321
+ "epoch": 0.382894082545997,
2322
+ "grad_norm": 1.226382851600647,
2323
+ "learning_rate": 4.16685516931974e-05,
2324
+ "loss": 0.5669,
2325
+ "step": 3080
2326
+ },
2327
+ {
2328
+ "epoch": 0.38413724515166586,
2329
+ "grad_norm": 1.2685925960540771,
2330
+ "learning_rate": 4.1291181990213286e-05,
2331
+ "loss": 0.5875,
2332
+ "step": 3090
2333
+ },
2334
+ {
2335
+ "epoch": 0.3853804077573347,
2336
+ "grad_norm": 2.1690385341644287,
2337
+ "learning_rate": 4.0914684098668286e-05,
2338
+ "loss": 0.6366,
2339
+ "step": 3100
2340
+ },
2341
+ {
2342
+ "epoch": 0.3866235703630035,
2343
+ "grad_norm": 1.2350751161575317,
2344
+ "learning_rate": 4.053907349491608e-05,
2345
+ "loss": 0.6034,
2346
+ "step": 3110
2347
+ },
2348
+ {
2349
+ "epoch": 0.3878667329686723,
2350
+ "grad_norm": 1.4489704370498657,
2351
+ "learning_rate": 4.016436561883746e-05,
2352
+ "loss": 0.6346,
2353
+ "step": 3120
2354
+ },
2355
+ {
2356
+ "epoch": 0.3891098955743411,
2357
+ "grad_norm": 1.332980990409851,
2358
+ "learning_rate": 3.979057587320554e-05,
2359
+ "loss": 0.5858,
2360
+ "step": 3130
2361
+ },
2362
+ {
2363
+ "epoch": 0.3903530581800099,
2364
+ "grad_norm": 1.391718864440918,
2365
+ "learning_rate": 3.941771962305274e-05,
2366
+ "loss": 0.7218,
2367
+ "step": 3140
2368
+ },
2369
+ {
2370
+ "epoch": 0.39159622078567874,
2371
+ "grad_norm": 1.269722819328308,
2372
+ "learning_rate": 3.9045812195039125e-05,
2373
+ "loss": 0.5846,
2374
+ "step": 3150
2375
+ },
2376
+ {
2377
+ "epoch": 0.39159622078567874,
2378
+ "eval_loss": 0.801069974899292,
2379
+ "eval_runtime": 56.6006,
2380
+ "eval_samples_per_second": 8.834,
2381
+ "eval_steps_per_second": 8.834,
2382
+ "step": 3150
2383
  }
2384
  ],
2385
  "logging_steps": 10,
 
2399
  "attributes": {}
2400
  }
2401
  },
2402
+ "total_flos": 1.0945600710137856e+17,
2403
  "train_batch_size": 16,
2404
  "trial_name": null,
2405
  "trial_params": null