alicegoesdown commited on
Commit
c842c9e
·
verified ·
1 Parent(s): d9c15fc

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c17afe8aa543f5af10efb74fc10799331d25a8f627686d95c8a71afc3cbe5c6c
3
  size 653434568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7269ef19389d0dabe344eecadf2f972b366ebd89f995c229031ba1a7c0136bcf
3
  size 653434568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f01cfac957f228d5650c7481df5c462565f2a7a2e58978cdd5dc28d2358d55f9
3
  size 1288533754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2708ab75f4e02836f24126a1366e7f282df027f699dacde707967213963694b
3
  size 1288533754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49bafec96b9a846e62e9e8699c2d75ea8c8c9553d8e2c96fc7f4442a85631bec
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4772898ab80a86c6bfdd33cc0ed48892e379bff8e26992078eed7695e28768d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab1810ab1c70a1ca84d15fe5a2260ec41b4353db69d8cddb555e23347249850
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd6f337e19410a5cf93f3278d2c00428279e72ac3740a98521bd0469626afca4
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.5608752965927124,
3
- "best_model_checkpoint": "./output/checkpoint-2850",
4
- "epoch": 0.25270438020925695,
5
  "eval_steps": 150,
6
- "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2154,6 +2154,119 @@
2154
  "eval_samples_per_second": 8.428,
2155
  "eval_steps_per_second": 8.428,
2156
  "step": 2850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2157
  }
2158
  ],
2159
  "logging_steps": 10,
@@ -2173,7 +2286,7 @@
2173
  "attributes": {}
2174
  }
2175
  },
2176
- "total_flos": 9.597091296097567e+17,
2177
  "train_batch_size": 8,
2178
  "trial_name": null,
2179
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.5497733354568481,
3
+ "best_model_checkpoint": "./output/checkpoint-3000",
4
+ "epoch": 0.26600461074658627,
5
  "eval_steps": 150,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2154
  "eval_samples_per_second": 8.428,
2155
  "eval_steps_per_second": 8.428,
2156
  "step": 2850
2157
+ },
2158
+ {
2159
+ "epoch": 0.25359106224507894,
2160
+ "grad_norm": 7.351998329162598,
2161
+ "learning_rate": 4.012760807985666e-05,
2162
+ "loss": 1.5118,
2163
+ "step": 2860
2164
+ },
2165
+ {
2166
+ "epoch": 0.25447774428090086,
2167
+ "grad_norm": 9.371225357055664,
2168
+ "learning_rate": 3.9813553410146234e-05,
2169
+ "loss": 1.5299,
2170
+ "step": 2870
2171
+ },
2172
+ {
2173
+ "epoch": 0.25536442631672285,
2174
+ "grad_norm": 7.500007152557373,
2175
+ "learning_rate": 3.949991746535754e-05,
2176
+ "loss": 1.5855,
2177
+ "step": 2880
2178
+ },
2179
+ {
2180
+ "epoch": 0.2562511083525448,
2181
+ "grad_norm": 7.6006903648376465,
2182
+ "learning_rate": 3.918671313783584e-05,
2183
+ "loss": 1.5459,
2184
+ "step": 2890
2185
+ },
2186
+ {
2187
+ "epoch": 0.2571377903883667,
2188
+ "grad_norm": 6.81592321395874,
2189
+ "learning_rate": 3.8873953302184295e-05,
2190
+ "loss": 1.361,
2191
+ "step": 2900
2192
+ },
2193
+ {
2194
+ "epoch": 0.2580244724241887,
2195
+ "grad_norm": 6.851174831390381,
2196
+ "learning_rate": 3.856165081473475e-05,
2197
+ "loss": 1.2751,
2198
+ "step": 2910
2199
+ },
2200
+ {
2201
+ "epoch": 0.2589111544600106,
2202
+ "grad_norm": 8.746306419372559,
2203
+ "learning_rate": 3.824981851301925e-05,
2204
+ "loss": 1.3964,
2205
+ "step": 2920
2206
+ },
2207
+ {
2208
+ "epoch": 0.2597978364958326,
2209
+ "grad_norm": 8.92397689819336,
2210
+ "learning_rate": 3.7938469215242386e-05,
2211
+ "loss": 1.5833,
2212
+ "step": 2930
2213
+ },
2214
+ {
2215
+ "epoch": 0.26068451853165453,
2216
+ "grad_norm": 12.532337188720703,
2217
+ "learning_rate": 3.762761571975431e-05,
2218
+ "loss": 1.754,
2219
+ "step": 2940
2220
+ },
2221
+ {
2222
+ "epoch": 0.2615712005674765,
2223
+ "grad_norm": 7.304866313934326,
2224
+ "learning_rate": 3.731727080452465e-05,
2225
+ "loss": 1.5328,
2226
+ "step": 2950
2227
+ },
2228
+ {
2229
+ "epoch": 0.26245788260329844,
2230
+ "grad_norm": 7.864557266235352,
2231
+ "learning_rate": 3.700744722661737e-05,
2232
+ "loss": 1.5286,
2233
+ "step": 2960
2234
+ },
2235
+ {
2236
+ "epoch": 0.2633445646391204,
2237
+ "grad_norm": 6.201906204223633,
2238
+ "learning_rate": 3.669815772166626e-05,
2239
+ "loss": 1.5775,
2240
+ "step": 2970
2241
+ },
2242
+ {
2243
+ "epoch": 0.26423124667494235,
2244
+ "grad_norm": 8.181777954101562,
2245
+ "learning_rate": 3.6389415003351454e-05,
2246
+ "loss": 1.6203,
2247
+ "step": 2980
2248
+ },
2249
+ {
2250
+ "epoch": 0.26511792871076434,
2251
+ "grad_norm": 8.13985824584961,
2252
+ "learning_rate": 3.608123176287686e-05,
2253
+ "loss": 1.4212,
2254
+ "step": 2990
2255
+ },
2256
+ {
2257
+ "epoch": 0.26600461074658627,
2258
+ "grad_norm": 7.873915672302246,
2259
+ "learning_rate": 3.577362066844839e-05,
2260
+ "loss": 1.4327,
2261
+ "step": 3000
2262
+ },
2263
+ {
2264
+ "epoch": 0.26600461074658627,
2265
+ "eval_loss": 1.5497733354568481,
2266
+ "eval_runtime": 59.3515,
2267
+ "eval_samples_per_second": 8.424,
2268
+ "eval_steps_per_second": 8.424,
2269
+ "step": 3000
2270
  }
2271
  ],
2272
  "logging_steps": 10,
 
2286
  "attributes": {}
2287
  }
2288
  },
2289
+ "total_flos": 1.0103518774980772e+18,
2290
  "train_batch_size": 8,
2291
  "trial_name": null,
2292
  "trial_params": null