neuralwonderland commited on
Commit
baaa040
·
verified ·
1 Parent(s): 8af3885

Training in progress, step 4350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6b78acdd25633c93d9a84817fabd0deb4875c5954b9c3f63ef04ead20b369ed
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c10e16e3ac21d2c3067ac066149d6847ada80249a0d2201ec8b0747c1cc6cf62
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c5e89b18a6b91f7700ccc031244c743f903100ca90edb74470d2a1d7ac8bceb
3
- size 1049049378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878ec53ac90deb1001fac14c0eae703bd28c1206475fc3b081fb43a039a2ed13
3
+ size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a60ac613274fc65034ee410771d705cc6f3b3ae8982f6d7b8a50e62bf600cd66
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b9e5eea9433b8628e2a81dae4e1ee9ab1ee13a921f1ad76ef9904766aca9f5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:820726d6ef8ac6d9f9ca34e9eb1daa2e5ba6a674a46b1cafa3773bfd40fd74af
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1f3a1c2778942fd3be1a9f139839cd53a1e492e182302e5e768461dfa2919be
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1952238082885742,
3
- "best_model_checkpoint": "./output/checkpoint-4050",
4
- "epoch": 0.1881298992161254,
5
  "eval_steps": 150,
6
- "global_step": 4200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3171,6 +3171,119 @@
3171
  "eval_samples_per_second": 9.682,
3172
  "eval_steps_per_second": 9.682,
3173
  "step": 4200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3174
  }
3175
  ],
3176
  "logging_steps": 10,
@@ -3190,7 +3303,7 @@
3190
  "attributes": {}
3191
  }
3192
  },
3193
- "total_flos": 5.412158843609088e+17,
3194
  "train_batch_size": 4,
3195
  "trial_name": null,
3196
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1951868534088135,
3
+ "best_model_checkpoint": "./output/checkpoint-4350",
4
+ "epoch": 0.1948488241881299,
5
  "eval_steps": 150,
6
+ "global_step": 4350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3171
  "eval_samples_per_second": 9.682,
3172
  "eval_steps_per_second": 9.682,
3173
  "step": 4200
3174
+ },
3175
+ {
3176
+ "epoch": 0.1885778275475924,
3177
+ "grad_norm": 4.277423858642578,
3178
+ "learning_rate": 4.7082387189226646e-07,
3179
+ "loss": 1.0834,
3180
+ "step": 4210
3181
+ },
3182
+ {
3183
+ "epoch": 0.18902575587905934,
3184
+ "grad_norm": 3.7345645427703857,
3185
+ "learning_rate": 4.5922767833421454e-07,
3186
+ "loss": 1.255,
3187
+ "step": 4220
3188
+ },
3189
+ {
3190
+ "epoch": 0.18947368421052632,
3191
+ "grad_norm": 5.163575172424316,
3192
+ "learning_rate": 4.477667555372326e-07,
3193
+ "loss": 1.1317,
3194
+ "step": 4230
3195
+ },
3196
+ {
3197
+ "epoch": 0.18992161254199327,
3198
+ "grad_norm": 5.2220892906188965,
3199
+ "learning_rate": 4.364415746149678e-07,
3200
+ "loss": 1.0966,
3201
+ "step": 4240
3202
+ },
3203
+ {
3204
+ "epoch": 0.19036954087346025,
3205
+ "grad_norm": 5.796306610107422,
3206
+ "learning_rate": 4.2525260110124964e-07,
3207
+ "loss": 1.0268,
3208
+ "step": 4250
3209
+ },
3210
+ {
3211
+ "epoch": 0.1908174692049272,
3212
+ "grad_norm": 4.295403003692627,
3213
+ "learning_rate": 4.1420029493095623e-07,
3214
+ "loss": 1.0465,
3215
+ "step": 4260
3216
+ },
3217
+ {
3218
+ "epoch": 0.19126539753639418,
3219
+ "grad_norm": 5.671868324279785,
3220
+ "learning_rate": 4.032851104211036e-07,
3221
+ "loss": 1.2124,
3222
+ "step": 4270
3223
+ },
3224
+ {
3225
+ "epoch": 0.19171332586786113,
3226
+ "grad_norm": 4.053644180297852,
3227
+ "learning_rate": 3.925074962521762e-07,
3228
+ "loss": 1.0574,
3229
+ "step": 4280
3230
+ },
3231
+ {
3232
+ "epoch": 0.1921612541993281,
3233
+ "grad_norm": 3.7694053649902344,
3234
+ "learning_rate": 3.818678954496787e-07,
3235
+ "loss": 1.0604,
3236
+ "step": 4290
3237
+ },
3238
+ {
3239
+ "epoch": 0.19260918253079506,
3240
+ "grad_norm": 4.982527256011963,
3241
+ "learning_rate": 3.713667453659287e-07,
3242
+ "loss": 1.1518,
3243
+ "step": 4300
3244
+ },
3245
+ {
3246
+ "epoch": 0.19305711086226204,
3247
+ "grad_norm": 5.036848545074463,
3248
+ "learning_rate": 3.6100447766207473e-07,
3249
+ "loss": 1.0251,
3250
+ "step": 4310
3251
+ },
3252
+ {
3253
+ "epoch": 0.193505039193729,
3254
+ "grad_norm": 5.744006633758545,
3255
+ "learning_rate": 3.5078151829035693e-07,
3256
+ "loss": 1.0103,
3257
+ "step": 4320
3258
+ },
3259
+ {
3260
+ "epoch": 0.19395296752519597,
3261
+ "grad_norm": 3.843419075012207,
3262
+ "learning_rate": 3.4069828747659405e-07,
3263
+ "loss": 1.0053,
3264
+ "step": 4330
3265
+ },
3266
+ {
3267
+ "epoch": 0.19440089585666293,
3268
+ "grad_norm": 4.357511043548584,
3269
+ "learning_rate": 3.3075519970291144e-07,
3270
+ "loss": 1.202,
3271
+ "step": 4340
3272
+ },
3273
+ {
3274
+ "epoch": 0.1948488241881299,
3275
+ "grad_norm": 6.164062976837158,
3276
+ "learning_rate": 3.209526636907036e-07,
3277
+ "loss": 1.1136,
3278
+ "step": 4350
3279
+ },
3280
+ {
3281
+ "epoch": 0.1948488241881299,
3282
+ "eval_loss": 1.1951868534088135,
3283
+ "eval_runtime": 51.6432,
3284
+ "eval_samples_per_second": 9.682,
3285
+ "eval_steps_per_second": 9.682,
3286
+ "step": 4350
3287
  }
3288
  ],
3289
  "logging_steps": 10,
 
3303
  "attributes": {}
3304
  }
3305
  },
3306
+ "total_flos": 5.6157580670976e+17,
3307
  "train_batch_size": 4,
3308
  "trial_name": null,
3309
  "trial_params": null