neuralwonderland commited on
Commit
9efac7c
·
verified ·
1 Parent(s): 63d111c

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0627010dbcdd1bd4804a95111399ce8e7a21c77842832aaa6fdacef2dedea7fe
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76345f809b53997ae5b0156376662cc932fae1628a32a6daa74b18bf353691c
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec5d8c6905e253f2885c2f1916a8f76e6d4109ec1face63a4a28a3ddb2164b0a
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9350ab8952c9068ba9cb6662b755e3ae064e0864a10cc19275a73e3493f1d699
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8ef6d693325d1213db02dd00b4b8455f5cdb1b3db60f7539a13f7a9a5a3fbf3
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:221fbae6356b068d1a273b00f61f9d4825a8ecf84836f89b11a087624e7b10f1
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:675b3a9e5fe7372c3c43243a8218917ed6d7710114a3dd64818b1e7e3d4370e0
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125a0482b0838ad97cbdd22589ee6289ec41c1a06bf562da8714c95f5c3581c0
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.813827633857727,
3
- "best_model_checkpoint": "./output/checkpoint-2850",
4
- "epoch": 0.35430134261561413,
5
  "eval_steps": 150,
6
- "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2154,6 +2154,119 @@
2154
  "eval_samples_per_second": 8.853,
2155
  "eval_steps_per_second": 8.853,
2156
  "step": 2850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2157
  }
2158
  ],
2159
  "logging_steps": 10,
@@ -2173,7 +2286,7 @@
2173
  "attributes": {}
2174
  }
2175
  },
2176
- "total_flos": 9.896048668870656e+16,
2177
  "train_batch_size": 16,
2178
  "trial_name": null,
2179
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8055340647697449,
3
+ "best_model_checkpoint": "./output/checkpoint-3000",
4
+ "epoch": 0.37294878170064644,
5
  "eval_steps": 150,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2154
  "eval_samples_per_second": 8.853,
2155
  "eval_steps_per_second": 8.853,
2156
  "step": 2850
2157
+ },
2158
+ {
2159
+ "epoch": 0.35554450522128295,
2160
+ "grad_norm": 1.4959031343460083,
2161
+ "learning_rate": 5.015951009982081e-05,
2162
+ "loss": 0.6377,
2163
+ "step": 2860
2164
+ },
2165
+ {
2166
+ "epoch": 0.35678766782695176,
2167
+ "grad_norm": 1.524834394454956,
2168
+ "learning_rate": 4.976694176268278e-05,
2169
+ "loss": 0.6132,
2170
+ "step": 2870
2171
+ },
2172
+ {
2173
+ "epoch": 0.3580308304326206,
2174
+ "grad_norm": 1.1156436204910278,
2175
+ "learning_rate": 4.937489683169692e-05,
2176
+ "loss": 0.5865,
2177
+ "step": 2880
2178
+ },
2179
+ {
2180
+ "epoch": 0.3592739930382894,
2181
+ "grad_norm": 1.4648617506027222,
2182
+ "learning_rate": 4.8983391422294786e-05,
2183
+ "loss": 0.5489,
2184
+ "step": 2890
2185
+ },
2186
+ {
2187
+ "epoch": 0.36051715564395825,
2188
+ "grad_norm": 0.9886593222618103,
2189
+ "learning_rate": 4.8592441627730355e-05,
2190
+ "loss": 0.5844,
2191
+ "step": 2900
2192
+ },
2193
+ {
2194
+ "epoch": 0.36176031824962707,
2195
+ "grad_norm": 1.7449545860290527,
2196
+ "learning_rate": 4.820206351841842e-05,
2197
+ "loss": 0.617,
2198
+ "step": 2910
2199
+ },
2200
+ {
2201
+ "epoch": 0.3630034808552959,
2202
+ "grad_norm": 1.375961184501648,
2203
+ "learning_rate": 4.781227314127405e-05,
2204
+ "loss": 0.6068,
2205
+ "step": 2920
2206
+ },
2207
+ {
2208
+ "epoch": 0.3642466434609647,
2209
+ "grad_norm": 1.08450448513031,
2210
+ "learning_rate": 4.7423086519052966e-05,
2211
+ "loss": 0.6064,
2212
+ "step": 2930
2213
+ },
2214
+ {
2215
+ "epoch": 0.3654898060666335,
2216
+ "grad_norm": 1.6093776226043701,
2217
+ "learning_rate": 4.703451964969287e-05,
2218
+ "loss": 0.6449,
2219
+ "step": 2940
2220
+ },
2221
+ {
2222
+ "epoch": 0.3667329686723023,
2223
+ "grad_norm": 1.2619701623916626,
2224
+ "learning_rate": 4.66465885056558e-05,
2225
+ "loss": 0.5688,
2226
+ "step": 2950
2227
+ },
2228
+ {
2229
+ "epoch": 0.3679761312779712,
2230
+ "grad_norm": 1.2892088890075684,
2231
+ "learning_rate": 4.62593090332717e-05,
2232
+ "loss": 0.5941,
2233
+ "step": 2960
2234
+ },
2235
+ {
2236
+ "epoch": 0.36921929388364,
2237
+ "grad_norm": 1.0506901741027832,
2238
+ "learning_rate": 4.587269715208281e-05,
2239
+ "loss": 0.5454,
2240
+ "step": 2970
2241
+ },
2242
+ {
2243
+ "epoch": 0.3704624564893088,
2244
+ "grad_norm": 1.6574184894561768,
2245
+ "learning_rate": 4.5486768754189305e-05,
2246
+ "loss": 0.5409,
2247
+ "step": 2980
2248
+ },
2249
+ {
2250
+ "epoch": 0.3717056190949776,
2251
+ "grad_norm": 1.1367279291152954,
2252
+ "learning_rate": 4.510153970359606e-05,
2253
+ "loss": 0.6502,
2254
+ "step": 2990
2255
+ },
2256
+ {
2257
+ "epoch": 0.37294878170064644,
2258
+ "grad_norm": 1.1561517715454102,
2259
+ "learning_rate": 4.4717025835560476e-05,
2260
+ "loss": 0.5962,
2261
+ "step": 3000
2262
+ },
2263
+ {
2264
+ "epoch": 0.37294878170064644,
2265
+ "eval_loss": 0.8055340647697449,
2266
+ "eval_runtime": 54.6339,
2267
+ "eval_samples_per_second": 9.152,
2268
+ "eval_steps_per_second": 9.152,
2269
+ "step": 3000
2270
  }
2271
  ],
2272
  "logging_steps": 10,
 
2286
  "attributes": {}
2287
  }
2288
  },
2289
+ "total_flos": 1.0423543389447168e+17,
2290
  "train_batch_size": 16,
2291
  "trial_name": null,
2292
  "trial_params": null