neuralwonderland commited on
Commit
1cae8a8
·
verified ·
1 Parent(s): 8c6526e

Training in progress, step 3150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a24ccb34ee670ad31c12d9251a8af3e2d476a5cb241dfcacfd09b83fd31da13
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89cc1745b4500ad6cf558198aa9b4e987065634d082f54bd44c3c89a26dd0906
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2be2c6a4a3b357e1b920c33a1c22c1fc7950fd6a88fee358b8557a1d6be1999f
3
  size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96cf2b77c6232af1bdf1652020d2d1e3b99db45a4d0a35dc7260f2f620431494
3
  size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9dec7d48193c1bf07d35e7fafa4ead566a1f9c6126b97351f8b8095ac049ca3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:469b796b4ae7827da357c6ad4d389b1a5f899bd0c5614fe11b27f1430a7f9bdc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3c408f4c434a323d7fe8a30b3b55f0cf203ab417bbc4794626805f567e54301
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a2801bc6a2e1667d6f68fb7b82fe94994bd9e743692539b17302be0c7385f74
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.199351191520691,
3
- "best_model_checkpoint": "./output/checkpoint-3000",
4
- "epoch": 0.1343784994400896,
5
  "eval_steps": 150,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2267,6 +2267,119 @@
2267
  "eval_samples_per_second": 9.696,
2268
  "eval_steps_per_second": 9.696,
2269
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2270
  }
2271
  ],
2272
  "logging_steps": 10,
@@ -2286,7 +2399,7 @@
2286
  "attributes": {}
2287
  }
2288
  },
2289
- "total_flos": 3.849433190903808e+17,
2290
  "train_batch_size": 4,
2291
  "trial_name": null,
2292
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1985480785369873,
3
+ "best_model_checkpoint": "./output/checkpoint-3150",
4
+ "epoch": 0.14109742441209405,
5
  "eval_steps": 150,
6
+ "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2267
  "eval_samples_per_second": 9.696,
2268
  "eval_steps_per_second": 9.696,
2269
  "step": 3000
2270
+ },
2271
+ {
2272
+ "epoch": 0.13482642777155654,
2273
+ "grad_norm": 4.307104587554932,
2274
+ "learning_rate": 2.6599945773564997e-06,
2275
+ "loss": 1.1743,
2276
+ "step": 3010
2277
+ },
2278
+ {
2279
+ "epoch": 0.13527435610302352,
2280
+ "grad_norm": 4.9457221031188965,
2281
+ "learning_rate": 2.6370124104330357e-06,
2282
+ "loss": 1.1287,
2283
+ "step": 3020
2284
+ },
2285
+ {
2286
+ "epoch": 0.13572228443449047,
2287
+ "grad_norm": 3.17401385307312,
2288
+ "learning_rate": 2.614075994070105e-06,
2289
+ "loss": 1.1686,
2290
+ "step": 3030
2291
+ },
2292
+ {
2293
+ "epoch": 0.13617021276595745,
2294
+ "grad_norm": 6.098177433013916,
2295
+ "learning_rate": 2.591186271093948e-06,
2296
+ "loss": 1.1546,
2297
+ "step": 3040
2298
+ },
2299
+ {
2300
+ "epoch": 0.1366181410974244,
2301
+ "grad_norm": 4.12905216217041,
2302
+ "learning_rate": 2.568344182411423e-06,
2303
+ "loss": 1.0909,
2304
+ "step": 3050
2305
+ },
2306
+ {
2307
+ "epoch": 0.13706606942889138,
2308
+ "grad_norm": 4.946627616882324,
2309
+ "learning_rate": 2.5455506669713293e-06,
2310
+ "loss": 1.2223,
2311
+ "step": 3060
2312
+ },
2313
+ {
2314
+ "epoch": 0.13751399776035833,
2315
+ "grad_norm": 4.25789737701416,
2316
+ "learning_rate": 2.522806661725812e-06,
2317
+ "loss": 1.0383,
2318
+ "step": 3070
2319
+ },
2320
+ {
2321
+ "epoch": 0.1379619260918253,
2322
+ "grad_norm": 6.536715030670166,
2323
+ "learning_rate": 2.5001131015918444e-06,
2324
+ "loss": 0.9992,
2325
+ "step": 3080
2326
+ },
2327
+ {
2328
+ "epoch": 0.13840985442329226,
2329
+ "grad_norm": 5.861030578613281,
2330
+ "learning_rate": 2.4774709194127973e-06,
2331
+ "loss": 1.1678,
2332
+ "step": 3090
2333
+ },
2334
+ {
2335
+ "epoch": 0.13885778275475924,
2336
+ "grad_norm": 4.58046293258667,
2337
+ "learning_rate": 2.4548810459200973e-06,
2338
+ "loss": 1.2545,
2339
+ "step": 3100
2340
+ },
2341
+ {
2342
+ "epoch": 0.1393057110862262,
2343
+ "grad_norm": 6.048022270202637,
2344
+ "learning_rate": 2.4323444096949647e-06,
2345
+ "loss": 1.0531,
2346
+ "step": 3110
2347
+ },
2348
+ {
2349
+ "epoch": 0.13975363941769317,
2350
+ "grad_norm": 5.86400842666626,
2351
+ "learning_rate": 2.409861937130248e-06,
2352
+ "loss": 1.1093,
2353
+ "step": 3120
2354
+ },
2355
+ {
2356
+ "epoch": 0.14020156774916012,
2357
+ "grad_norm": 3.7916102409362793,
2358
+ "learning_rate": 2.3874345523923327e-06,
2359
+ "loss": 1.1048,
2360
+ "step": 3130
2361
+ },
2362
+ {
2363
+ "epoch": 0.1406494960806271,
2364
+ "grad_norm": 4.009166717529297,
2365
+ "learning_rate": 2.3650631773831644e-06,
2366
+ "loss": 1.0198,
2367
+ "step": 3140
2368
+ },
2369
+ {
2370
+ "epoch": 0.14109742441209405,
2371
+ "grad_norm": 4.695572853088379,
2372
+ "learning_rate": 2.3427487317023477e-06,
2373
+ "loss": 1.1909,
2374
+ "step": 3150
2375
+ },
2376
+ {
2377
+ "epoch": 0.14109742441209405,
2378
+ "eval_loss": 1.1985480785369873,
2379
+ "eval_runtime": 51.6619,
2380
+ "eval_samples_per_second": 9.678,
2381
+ "eval_steps_per_second": 9.678,
2382
+ "step": 3150
2383
  }
2384
  ],
2385
  "logging_steps": 10,
 
2399
  "attributes": {}
2400
  }
2401
  },
2402
+ "total_flos": 4.04330569814016e+17,
2403
  "train_batch_size": 4,
2404
  "trial_name": null,
2405
  "trial_params": null