mamung commited on
Commit
eec3232
·
verified ·
1 Parent(s): 6e43112

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52330c76a8a9f3b5dee8f7f17ddb538429d1d38a0e61212061e577eddc0373b4
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca64e1bc21a66cf4f936b2ed516611c1088c1ad25b25c70c0440ed384e73b515
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:650ade0b447b4f51faaa60eaeb027a75fcd48144fe2cce6b7ccd28894963b953
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db697e1991b34936f0d77b86b5e578a88bd35b26f670092c6126c393e25ef29d
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e665c30820065f7c9fc581e9809a0f34116f84f4d40f3dcc68893910287ab3e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24eab67923497bc06cae1eaf0b8b5de49eefa88609da76e041852bb21060b40a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6131b86a23c4fb6c3b22cd21126d9949412029d60f6fd460c8c4560b239dc156
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d1e4429a3b99e660815786d3a7846a2fc0f39f97c2ce4dd14d2b15e5d88b2c2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.553610503282276,
5
  "eval_steps": 8,
6
- "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -255,6 +255,28 @@
255
  "eval_samples_per_second": 133.994,
256
  "eval_steps_per_second": 67.687,
257
  "step": 72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  }
259
  ],
260
  "logging_steps": 3,
@@ -274,7 +296,7 @@
274
  "attributes": {}
275
  }
276
  },
277
- "total_flos": 30886558040064.0,
278
  "train_batch_size": 2,
279
  "trial_name": null,
280
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.833698030634573,
5
  "eval_steps": 8,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
255
  "eval_samples_per_second": 133.994,
256
  "eval_steps_per_second": 67.687,
257
  "step": 72
258
+ },
259
+ {
260
+ "epoch": 2.658643326039387,
261
+ "grad_norm": 0.1350039690732956,
262
+ "learning_rate": 1.339745962155613e-05,
263
+ "loss": 10.279,
264
+ "step": 75
265
+ },
266
+ {
267
+ "epoch": 2.763676148796499,
268
+ "grad_norm": 0.09172733873128891,
269
+ "learning_rate": 7.163206698392744e-06,
270
+ "loss": 10.0236,
271
+ "step": 78
272
+ },
273
+ {
274
+ "epoch": 2.833698030634573,
275
+ "eval_loss": 10.288338661193848,
276
+ "eval_runtime": 0.7231,
277
+ "eval_samples_per_second": 134.151,
278
+ "eval_steps_per_second": 67.767,
279
+ "step": 80
280
  }
281
  ],
282
  "logging_steps": 3,
 
296
  "attributes": {}
297
  }
298
  },
299
+ "total_flos": 34334732845056.0,
300
  "train_batch_size": 2,
301
  "trial_name": null,
302
  "trial_params": null