neuralwonderland commited on
Commit
aaba86c
·
verified ·
1 Parent(s): 092e596

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3041ee336d7872f0f16235d3f8aa2ee8994195865960c90c216a2032fad6491
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a24ccb34ee670ad31c12d9251a8af3e2d476a5cb241dfcacfd09b83fd31da13
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79516741c68ad2bd3abfbf03e3596fe2cea5d265b0ab7fa9fae53e880850771b
3
  size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2be2c6a4a3b357e1b920c33a1c22c1fc7950fd6a88fee358b8557a1d6be1999f
3
  size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e060ed0cba0b653c0ae1781d1ecb988273d6035f4be3c081c9210d24bdbfd30
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9dec7d48193c1bf07d35e7fafa4ead566a1f9c6126b97351f8b8095ac049ca3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fb5a07357869015d5baa75c47552309f3268fa1ab75403f011820118b2afca5
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3c408f4c434a323d7fe8a30b3b55f0cf203ab417bbc4794626805f567e54301
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2028086185455322,
3
- "best_model_checkpoint": "./output/checkpoint-2850",
4
- "epoch": 0.1276595744680851,
5
  "eval_steps": 150,
6
- "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2154,6 +2154,119 @@
2154
  "eval_samples_per_second": 9.683,
2155
  "eval_steps_per_second": 9.683,
2156
  "step": 2850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2157
  }
2158
  ],
2159
  "logging_steps": 10,
@@ -2173,7 +2286,7 @@
2173
  "attributes": {}
2174
  }
2175
  },
2176
- "total_flos": 3.65468828746752e+17,
2177
  "train_batch_size": 4,
2178
  "trial_name": null,
2179
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.199351191520691,
3
+ "best_model_checkpoint": "./output/checkpoint-3000",
4
+ "epoch": 0.1343784994400896,
5
  "eval_steps": 150,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2154
  "eval_samples_per_second": 9.683,
2155
  "eval_steps_per_second": 9.683,
2156
  "step": 2850
2157
+ },
2158
+ {
2159
+ "epoch": 0.12810750279955208,
2160
+ "grad_norm": 5.54340124130249,
2161
+ "learning_rate": 3.009570605989249e-06,
2162
+ "loss": 0.999,
2163
+ "step": 2860
2164
+ },
2165
+ {
2166
+ "epoch": 0.12855543113101903,
2167
+ "grad_norm": 3.859863758087158,
2168
+ "learning_rate": 2.986016505760967e-06,
2169
+ "loss": 1.025,
2170
+ "step": 2870
2171
+ },
2172
+ {
2173
+ "epoch": 0.129003359462486,
2174
+ "grad_norm": 5.119099140167236,
2175
+ "learning_rate": 2.962493809901815e-06,
2176
+ "loss": 1.3963,
2177
+ "step": 2880
2178
+ },
2179
+ {
2180
+ "epoch": 0.12945128779395296,
2181
+ "grad_norm": 5.8379130363464355,
2182
+ "learning_rate": 2.9390034853376875e-06,
2183
+ "loss": 1.0822,
2184
+ "step": 2890
2185
+ },
2186
+ {
2187
+ "epoch": 0.12989921612541994,
2188
+ "grad_norm": 3.261016845703125,
2189
+ "learning_rate": 2.9155464976638217e-06,
2190
+ "loss": 1.0526,
2191
+ "step": 2900
2192
+ },
2193
+ {
2194
+ "epoch": 0.1303471444568869,
2195
+ "grad_norm": 3.678527355194092,
2196
+ "learning_rate": 2.8921238111051057e-06,
2197
+ "loss": 1.1167,
2198
+ "step": 2910
2199
+ },
2200
+ {
2201
+ "epoch": 0.13079507278835387,
2202
+ "grad_norm": 4.787365436553955,
2203
+ "learning_rate": 2.8687363884764434e-06,
2204
+ "loss": 1.0829,
2205
+ "step": 2920
2206
+ },
2207
+ {
2208
+ "epoch": 0.13124300111982082,
2209
+ "grad_norm": 3.475607395172119,
2210
+ "learning_rate": 2.8453851911431783e-06,
2211
+ "loss": 1.0801,
2212
+ "step": 2930
2213
+ },
2214
+ {
2215
+ "epoch": 0.1316909294512878,
2216
+ "grad_norm": 6.456125736236572,
2217
+ "learning_rate": 2.822071178981572e-06,
2218
+ "loss": 1.1287,
2219
+ "step": 2940
2220
+ },
2221
+ {
2222
+ "epoch": 0.13213885778275475,
2223
+ "grad_norm": 3.778585910797119,
2224
+ "learning_rate": 2.7987953103393484e-06,
2225
+ "loss": 1.1359,
2226
+ "step": 2950
2227
+ },
2228
+ {
2229
+ "epoch": 0.13258678611422173,
2230
+ "grad_norm": 3.37793231010437,
2231
+ "learning_rate": 2.7755585419963026e-06,
2232
+ "loss": 1.0584,
2233
+ "step": 2960
2234
+ },
2235
+ {
2236
+ "epoch": 0.13303471444568868,
2237
+ "grad_norm": 5.2485575675964355,
2238
+ "learning_rate": 2.7523618291249687e-06,
2239
+ "loss": 1.2037,
2240
+ "step": 2970
2241
+ },
2242
+ {
2243
+ "epoch": 0.13348264277715566,
2244
+ "grad_norm": 4.524936676025391,
2245
+ "learning_rate": 2.729206125251359e-06,
2246
+ "loss": 0.9778,
2247
+ "step": 2980
2248
+ },
2249
+ {
2250
+ "epoch": 0.1339305711086226,
2251
+ "grad_norm": 5.820756912231445,
2252
+ "learning_rate": 2.7060923822157638e-06,
2253
+ "loss": 1.0351,
2254
+ "step": 2990
2255
+ },
2256
+ {
2257
+ "epoch": 0.1343784994400896,
2258
+ "grad_norm": 5.031400680541992,
2259
+ "learning_rate": 2.6830215501336288e-06,
2260
+ "loss": 1.1926,
2261
+ "step": 3000
2262
+ },
2263
+ {
2264
+ "epoch": 0.1343784994400896,
2265
+ "eval_loss": 1.199351191520691,
2266
+ "eval_runtime": 51.5688,
2267
+ "eval_samples_per_second": 9.696,
2268
+ "eval_steps_per_second": 9.696,
2269
+ "step": 3000
2270
  }
2271
  ],
2272
  "logging_steps": 10,
 
2286
  "attributes": {}
2287
  }
2288
  },
2289
+ "total_flos": 3.849433190903808e+17,
2290
  "train_batch_size": 4,
2291
  "trial_name": null,
2292
  "trial_params": null