mamung commited on
Commit
4d2f5c8
·
verified ·
1 Parent(s): b3aab5a

Training in progress, step 64, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99d093626ee59249ef6a651400719dc92cda3d23205108e8c0ed1d1f467de95
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4bbb5372f29a6caf128538be1b6f05973475bb5e1a9806bcb4c94754837795
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f441a5bbf6bd228867549861adab261f2f188ce88f7c3fa4b87682af7a8e6bf0
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bef68d4704428cd2075b9af0de2c4a7528e31bdc93d8faac3410566b8b58c44
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2ccd39fda0bf0ecc3227ac1a7a11b99683295c5148b14a2c9b0f8e73bbf6887
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:344338431e33a6e2f5674878a19c239f840bfbb7767463e4bb420c416a152c1a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54fd3d743640bd98861785afa48759ae85e753ebdc3bb34f138e5c40ed09d555
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f5fef290c2d601f702708df10bf5f98083660f03273309905f9ee205d8b58af
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9770240700218817,
5
  "eval_steps": 8,
6
- "global_step": 56,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -197,6 +197,35 @@
197
  "eval_samples_per_second": 134.919,
198
  "eval_steps_per_second": 68.155,
199
  "step": 56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  }
201
  ],
202
  "logging_steps": 3,
@@ -216,7 +245,7 @@
216
  "attributes": {}
217
  }
218
  },
219
- "total_flos": 24030303485952.0,
220
  "train_batch_size": 2,
221
  "trial_name": null,
222
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.273522975929978,
5
  "eval_steps": 8,
6
+ "global_step": 64,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
197
  "eval_samples_per_second": 134.919,
198
  "eval_steps_per_second": 68.155,
199
  "step": 56
200
+ },
201
+ {
202
+ "epoch": 2.0284463894967177,
203
+ "grad_norm": 0.2121211737394333,
204
+ "learning_rate": 8.107487556395901e-05,
205
+ "loss": 13.6014,
206
+ "step": 57
207
+ },
208
+ {
209
+ "epoch": 2.1334792122538295,
210
+ "grad_norm": 0.11470862478017807,
211
+ "learning_rate": 6.729320366825784e-05,
212
+ "loss": 10.2428,
213
+ "step": 60
214
+ },
215
+ {
216
+ "epoch": 2.238512035010941,
217
+ "grad_norm": 0.0803636685013771,
218
+ "learning_rate": 5.417734782725896e-05,
219
+ "loss": 10.1202,
220
+ "step": 63
221
+ },
222
+ {
223
+ "epoch": 2.273522975929978,
224
+ "eval_loss": 10.290419578552246,
225
+ "eval_runtime": 0.7218,
226
+ "eval_samples_per_second": 134.394,
227
+ "eval_steps_per_second": 67.89,
228
+ "step": 64
229
  }
230
  ],
231
  "logging_steps": 3,
 
245
  "attributes": {}
246
  }
247
  },
248
+ "total_flos": 27451748253696.0,
249
  "train_batch_size": 2,
250
  "trial_name": null,
251
  "trial_params": null