Training in progress, step 3000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 653434568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7269ef19389d0dabe344eecadf2f972b366ebd89f995c229031ba1a7c0136bcf
|
3 |
size 653434568
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1288533754
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2708ab75f4e02836f24126a1366e7f282df027f699dacde707967213963694b
|
3 |
size 1288533754
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4772898ab80a86c6bfdd33cc0ed48892e379bff8e26992078eed7695e28768d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd6f337e19410a5cf93f3278d2c00428279e72ac3740a98521bd0469626afca4
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2154,6 +2154,119 @@
|
|
2154 |
"eval_samples_per_second": 8.428,
|
2155 |
"eval_steps_per_second": 8.428,
|
2156 |
"step": 2850
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2157 |
}
|
2158 |
],
|
2159 |
"logging_steps": 10,
|
@@ -2173,7 +2286,7 @@
|
|
2173 |
"attributes": {}
|
2174 |
}
|
2175 |
},
|
2176 |
-
"total_flos":
|
2177 |
"train_batch_size": 8,
|
2178 |
"trial_name": null,
|
2179 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.5497733354568481,
|
3 |
+
"best_model_checkpoint": "./output/checkpoint-3000",
|
4 |
+
"epoch": 0.26600461074658627,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 3000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2154 |
"eval_samples_per_second": 8.428,
|
2155 |
"eval_steps_per_second": 8.428,
|
2156 |
"step": 2850
|
2157 |
+
},
|
2158 |
+
{
|
2159 |
+
"epoch": 0.25359106224507894,
|
2160 |
+
"grad_norm": 7.351998329162598,
|
2161 |
+
"learning_rate": 4.012760807985666e-05,
|
2162 |
+
"loss": 1.5118,
|
2163 |
+
"step": 2860
|
2164 |
+
},
|
2165 |
+
{
|
2166 |
+
"epoch": 0.25447774428090086,
|
2167 |
+
"grad_norm": 9.371225357055664,
|
2168 |
+
"learning_rate": 3.9813553410146234e-05,
|
2169 |
+
"loss": 1.5299,
|
2170 |
+
"step": 2870
|
2171 |
+
},
|
2172 |
+
{
|
2173 |
+
"epoch": 0.25536442631672285,
|
2174 |
+
"grad_norm": 7.500007152557373,
|
2175 |
+
"learning_rate": 3.949991746535754e-05,
|
2176 |
+
"loss": 1.5855,
|
2177 |
+
"step": 2880
|
2178 |
+
},
|
2179 |
+
{
|
2180 |
+
"epoch": 0.2562511083525448,
|
2181 |
+
"grad_norm": 7.6006903648376465,
|
2182 |
+
"learning_rate": 3.918671313783584e-05,
|
2183 |
+
"loss": 1.5459,
|
2184 |
+
"step": 2890
|
2185 |
+
},
|
2186 |
+
{
|
2187 |
+
"epoch": 0.2571377903883667,
|
2188 |
+
"grad_norm": 6.81592321395874,
|
2189 |
+
"learning_rate": 3.8873953302184295e-05,
|
2190 |
+
"loss": 1.361,
|
2191 |
+
"step": 2900
|
2192 |
+
},
|
2193 |
+
{
|
2194 |
+
"epoch": 0.2580244724241887,
|
2195 |
+
"grad_norm": 6.851174831390381,
|
2196 |
+
"learning_rate": 3.856165081473475e-05,
|
2197 |
+
"loss": 1.2751,
|
2198 |
+
"step": 2910
|
2199 |
+
},
|
2200 |
+
{
|
2201 |
+
"epoch": 0.2589111544600106,
|
2202 |
+
"grad_norm": 8.746306419372559,
|
2203 |
+
"learning_rate": 3.824981851301925e-05,
|
2204 |
+
"loss": 1.3964,
|
2205 |
+
"step": 2920
|
2206 |
+
},
|
2207 |
+
{
|
2208 |
+
"epoch": 0.2597978364958326,
|
2209 |
+
"grad_norm": 8.92397689819336,
|
2210 |
+
"learning_rate": 3.7938469215242386e-05,
|
2211 |
+
"loss": 1.5833,
|
2212 |
+
"step": 2930
|
2213 |
+
},
|
2214 |
+
{
|
2215 |
+
"epoch": 0.26068451853165453,
|
2216 |
+
"grad_norm": 12.532337188720703,
|
2217 |
+
"learning_rate": 3.762761571975431e-05,
|
2218 |
+
"loss": 1.754,
|
2219 |
+
"step": 2940
|
2220 |
+
},
|
2221 |
+
{
|
2222 |
+
"epoch": 0.2615712005674765,
|
2223 |
+
"grad_norm": 7.304866313934326,
|
2224 |
+
"learning_rate": 3.731727080452465e-05,
|
2225 |
+
"loss": 1.5328,
|
2226 |
+
"step": 2950
|
2227 |
+
},
|
2228 |
+
{
|
2229 |
+
"epoch": 0.26245788260329844,
|
2230 |
+
"grad_norm": 7.864557266235352,
|
2231 |
+
"learning_rate": 3.700744722661737e-05,
|
2232 |
+
"loss": 1.5286,
|
2233 |
+
"step": 2960
|
2234 |
+
},
|
2235 |
+
{
|
2236 |
+
"epoch": 0.2633445646391204,
|
2237 |
+
"grad_norm": 6.201906204223633,
|
2238 |
+
"learning_rate": 3.669815772166626e-05,
|
2239 |
+
"loss": 1.5775,
|
2240 |
+
"step": 2970
|
2241 |
+
},
|
2242 |
+
{
|
2243 |
+
"epoch": 0.26423124667494235,
|
2244 |
+
"grad_norm": 8.181777954101562,
|
2245 |
+
"learning_rate": 3.6389415003351454e-05,
|
2246 |
+
"loss": 1.6203,
|
2247 |
+
"step": 2980
|
2248 |
+
},
|
2249 |
+
{
|
2250 |
+
"epoch": 0.26511792871076434,
|
2251 |
+
"grad_norm": 8.13985824584961,
|
2252 |
+
"learning_rate": 3.608123176287686e-05,
|
2253 |
+
"loss": 1.4212,
|
2254 |
+
"step": 2990
|
2255 |
+
},
|
2256 |
+
{
|
2257 |
+
"epoch": 0.26600461074658627,
|
2258 |
+
"grad_norm": 7.873915672302246,
|
2259 |
+
"learning_rate": 3.577362066844839e-05,
|
2260 |
+
"loss": 1.4327,
|
2261 |
+
"step": 3000
|
2262 |
+
},
|
2263 |
+
{
|
2264 |
+
"epoch": 0.26600461074658627,
|
2265 |
+
"eval_loss": 1.5497733354568481,
|
2266 |
+
"eval_runtime": 59.3515,
|
2267 |
+
"eval_samples_per_second": 8.424,
|
2268 |
+
"eval_steps_per_second": 8.424,
|
2269 |
+
"step": 3000
|
2270 |
}
|
2271 |
],
|
2272 |
"logging_steps": 10,
|
|
|
2286 |
"attributes": {}
|
2287 |
}
|
2288 |
},
|
2289 |
+
"total_flos": 1.0103518774980772e+18,
|
2290 |
"train_batch_size": 8,
|
2291 |
"trial_name": null,
|
2292 |
"trial_params": null
|