step 10800
Browse files- model.safetensors +1 -1
- trainer_state.json +600 -4
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 352324400
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4cb33bf85bff9330bbdf1b1dc24b34597251a190543a6a00cf8fe1845f36985
|
3 |
size 352324400
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "checkpoints/BEE-spoke-data-bert-plus-L8-v1.0-allNLI_matryoshka-synthetic-text-similarity-Mar-07_22-56/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 300,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -15179,6 +15179,602 @@
|
|
15179 |
"learning_rate": 3.6397748592870546e-06,
|
15180 |
"loss": 0.0043,
|
15181 |
"step": 10400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15182 |
}
|
15183 |
],
|
15184 |
"logging_steps": 5,
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.9642074088296352,
|
3 |
+
"best_model_checkpoint": "checkpoints/BEE-spoke-data-bert-plus-L8-v1.0-allNLI_matryoshka-synthetic-text-similarity-Mar-07_22-56/checkpoint-10800",
|
4 |
+
"epoch": 0.8683417085427135,
|
5 |
"eval_steps": 300,
|
6 |
+
"global_step": 10800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
15179 |
"learning_rate": 3.6397748592870546e-06,
|
15180 |
"loss": 0.0043,
|
15181 |
"step": 10400
|
15182 |
+
},
|
15183 |
+
{
|
15184 |
+
"epoch": 0.84,
|
15185 |
+
"grad_norm": 0.4659731984138489,
|
15186 |
+
"learning_rate": 3.630840704011436e-06,
|
15187 |
+
"loss": 0.0064,
|
15188 |
+
"step": 10405
|
15189 |
+
},
|
15190 |
+
{
|
15191 |
+
"epoch": 0.84,
|
15192 |
+
"grad_norm": 0.8237358331680298,
|
15193 |
+
"learning_rate": 3.6219065487358176e-06,
|
15194 |
+
"loss": 0.0089,
|
15195 |
+
"step": 10410
|
15196 |
+
},
|
15197 |
+
{
|
15198 |
+
"epoch": 0.84,
|
15199 |
+
"grad_norm": 0.2651444971561432,
|
15200 |
+
"learning_rate": 3.6129723934601986e-06,
|
15201 |
+
"loss": 0.0088,
|
15202 |
+
"step": 10415
|
15203 |
+
},
|
15204 |
+
{
|
15205 |
+
"epoch": 0.84,
|
15206 |
+
"grad_norm": 0.2811392545700073,
|
15207 |
+
"learning_rate": 3.60403823818458e-06,
|
15208 |
+
"loss": 0.0045,
|
15209 |
+
"step": 10420
|
15210 |
+
},
|
15211 |
+
{
|
15212 |
+
"epoch": 0.84,
|
15213 |
+
"grad_norm": 0.26526081562042236,
|
15214 |
+
"learning_rate": 3.595104082908961e-06,
|
15215 |
+
"loss": 0.0077,
|
15216 |
+
"step": 10425
|
15217 |
+
},
|
15218 |
+
{
|
15219 |
+
"epoch": 0.84,
|
15220 |
+
"grad_norm": 0.31172770261764526,
|
15221 |
+
"learning_rate": 3.586169927633343e-06,
|
15222 |
+
"loss": 0.0066,
|
15223 |
+
"step": 10430
|
15224 |
+
},
|
15225 |
+
{
|
15226 |
+
"epoch": 0.84,
|
15227 |
+
"grad_norm": 0.24446207284927368,
|
15228 |
+
"learning_rate": 3.577235772357724e-06,
|
15229 |
+
"loss": 0.0095,
|
15230 |
+
"step": 10435
|
15231 |
+
},
|
15232 |
+
{
|
15233 |
+
"epoch": 0.84,
|
15234 |
+
"grad_norm": 0.36921826004981995,
|
15235 |
+
"learning_rate": 3.568301617082105e-06,
|
15236 |
+
"loss": 0.0092,
|
15237 |
+
"step": 10440
|
15238 |
+
},
|
15239 |
+
{
|
15240 |
+
"epoch": 0.84,
|
15241 |
+
"grad_norm": 0.37674766778945923,
|
15242 |
+
"learning_rate": 3.5593674618064865e-06,
|
15243 |
+
"loss": 0.0068,
|
15244 |
+
"step": 10445
|
15245 |
+
},
|
15246 |
+
{
|
15247 |
+
"epoch": 0.84,
|
15248 |
+
"grad_norm": 0.3865291178226471,
|
15249 |
+
"learning_rate": 3.5504333065308676e-06,
|
15250 |
+
"loss": 0.0101,
|
15251 |
+
"step": 10450
|
15252 |
+
},
|
15253 |
+
{
|
15254 |
+
"epoch": 0.84,
|
15255 |
+
"grad_norm": 0.27692753076553345,
|
15256 |
+
"learning_rate": 3.5414991512552494e-06,
|
15257 |
+
"loss": 0.0118,
|
15258 |
+
"step": 10455
|
15259 |
+
},
|
15260 |
+
{
|
15261 |
+
"epoch": 0.84,
|
15262 |
+
"grad_norm": 0.3851732611656189,
|
15263 |
+
"learning_rate": 3.5325649959796305e-06,
|
15264 |
+
"loss": 0.0064,
|
15265 |
+
"step": 10460
|
15266 |
+
},
|
15267 |
+
{
|
15268 |
+
"epoch": 0.84,
|
15269 |
+
"grad_norm": 0.3045642673969269,
|
15270 |
+
"learning_rate": 3.5236308407040115e-06,
|
15271 |
+
"loss": 0.006,
|
15272 |
+
"step": 10465
|
15273 |
+
},
|
15274 |
+
{
|
15275 |
+
"epoch": 0.84,
|
15276 |
+
"grad_norm": 0.33417442440986633,
|
15277 |
+
"learning_rate": 3.514696685428393e-06,
|
15278 |
+
"loss": 0.0053,
|
15279 |
+
"step": 10470
|
15280 |
+
},
|
15281 |
+
{
|
15282 |
+
"epoch": 0.84,
|
15283 |
+
"grad_norm": 0.25378555059432983,
|
15284 |
+
"learning_rate": 3.505762530152774e-06,
|
15285 |
+
"loss": 0.0054,
|
15286 |
+
"step": 10475
|
15287 |
+
},
|
15288 |
+
{
|
15289 |
+
"epoch": 0.84,
|
15290 |
+
"grad_norm": 0.30482515692710876,
|
15291 |
+
"learning_rate": 3.496828374877156e-06,
|
15292 |
+
"loss": 0.006,
|
15293 |
+
"step": 10480
|
15294 |
+
},
|
15295 |
+
{
|
15296 |
+
"epoch": 0.84,
|
15297 |
+
"grad_norm": 0.3786448836326599,
|
15298 |
+
"learning_rate": 3.487894219601537e-06,
|
15299 |
+
"loss": 0.0069,
|
15300 |
+
"step": 10485
|
15301 |
+
},
|
15302 |
+
{
|
15303 |
+
"epoch": 0.84,
|
15304 |
+
"grad_norm": 0.4568743407726288,
|
15305 |
+
"learning_rate": 3.478960064325918e-06,
|
15306 |
+
"loss": 0.007,
|
15307 |
+
"step": 10490
|
15308 |
+
},
|
15309 |
+
{
|
15310 |
+
"epoch": 0.84,
|
15311 |
+
"grad_norm": 0.4057961702346802,
|
15312 |
+
"learning_rate": 3.4700259090503e-06,
|
15313 |
+
"loss": 0.0072,
|
15314 |
+
"step": 10495
|
15315 |
+
},
|
15316 |
+
{
|
15317 |
+
"epoch": 0.84,
|
15318 |
+
"grad_norm": 0.4396592080593109,
|
15319 |
+
"learning_rate": 3.461091753774681e-06,
|
15320 |
+
"loss": 0.0068,
|
15321 |
+
"step": 10500
|
15322 |
+
},
|
15323 |
+
{
|
15324 |
+
"epoch": 0.84,
|
15325 |
+
"eval_loss": 0.0074067204259335995,
|
15326 |
+
"eval_pearson_cosine": 0.9673687223458171,
|
15327 |
+
"eval_pearson_dot": 0.9653915362220337,
|
15328 |
+
"eval_pearson_euclidean": 0.9564555475438925,
|
15329 |
+
"eval_pearson_manhattan": 0.9551556309301102,
|
15330 |
+
"eval_pearson_max": 0.9673687223458171,
|
15331 |
+
"eval_runtime": 426.0441,
|
15332 |
+
"eval_samples_per_second": 1.174,
|
15333 |
+
"eval_spearman_cosine": 0.9609645158580634,
|
15334 |
+
"eval_spearman_dot": 0.955628814515258,
|
15335 |
+
"eval_spearman_euclidean": 0.9613639734558937,
|
15336 |
+
"eval_spearman_manhattan": 0.9601420805683222,
|
15337 |
+
"eval_spearman_max": 0.9613639734558937,
|
15338 |
+
"eval_steps_per_second": 1.174,
|
15339 |
+
"step": 10500
|
15340 |
+
},
|
15341 |
+
{
|
15342 |
+
"epoch": 0.84,
|
15343 |
+
"grad_norm": 0.33185890316963196,
|
15344 |
+
"learning_rate": 3.4521575984990624e-06,
|
15345 |
+
"loss": 0.0108,
|
15346 |
+
"step": 10505
|
15347 |
+
},
|
15348 |
+
{
|
15349 |
+
"epoch": 0.85,
|
15350 |
+
"grad_norm": 0.42153021693229675,
|
15351 |
+
"learning_rate": 3.4432234432234434e-06,
|
15352 |
+
"loss": 0.0111,
|
15353 |
+
"step": 10510
|
15354 |
+
},
|
15355 |
+
{
|
15356 |
+
"epoch": 0.85,
|
15357 |
+
"grad_norm": 0.3933052122592926,
|
15358 |
+
"learning_rate": 3.4342892879478245e-06,
|
15359 |
+
"loss": 0.0053,
|
15360 |
+
"step": 10515
|
15361 |
+
},
|
15362 |
+
{
|
15363 |
+
"epoch": 0.85,
|
15364 |
+
"grad_norm": 0.37213289737701416,
|
15365 |
+
"learning_rate": 3.4253551326722063e-06,
|
15366 |
+
"loss": 0.0052,
|
15367 |
+
"step": 10520
|
15368 |
+
},
|
15369 |
+
{
|
15370 |
+
"epoch": 0.85,
|
15371 |
+
"grad_norm": 0.3832128643989563,
|
15372 |
+
"learning_rate": 3.4164209773965874e-06,
|
15373 |
+
"loss": 0.0083,
|
15374 |
+
"step": 10525
|
15375 |
+
},
|
15376 |
+
{
|
15377 |
+
"epoch": 0.85,
|
15378 |
+
"grad_norm": 0.2269567996263504,
|
15379 |
+
"learning_rate": 3.407486822120969e-06,
|
15380 |
+
"loss": 0.0046,
|
15381 |
+
"step": 10530
|
15382 |
+
},
|
15383 |
+
{
|
15384 |
+
"epoch": 0.85,
|
15385 |
+
"grad_norm": 0.3588921129703522,
|
15386 |
+
"learning_rate": 3.39855266684535e-06,
|
15387 |
+
"loss": 0.0085,
|
15388 |
+
"step": 10535
|
15389 |
+
},
|
15390 |
+
{
|
15391 |
+
"epoch": 0.85,
|
15392 |
+
"grad_norm": 0.2792292833328247,
|
15393 |
+
"learning_rate": 3.389618511569731e-06,
|
15394 |
+
"loss": 0.0064,
|
15395 |
+
"step": 10540
|
15396 |
+
},
|
15397 |
+
{
|
15398 |
+
"epoch": 0.85,
|
15399 |
+
"grad_norm": 0.30247360467910767,
|
15400 |
+
"learning_rate": 3.380684356294113e-06,
|
15401 |
+
"loss": 0.009,
|
15402 |
+
"step": 10545
|
15403 |
+
},
|
15404 |
+
{
|
15405 |
+
"epoch": 0.85,
|
15406 |
+
"grad_norm": 0.33265256881713867,
|
15407 |
+
"learning_rate": 3.371750201018494e-06,
|
15408 |
+
"loss": 0.0087,
|
15409 |
+
"step": 10550
|
15410 |
+
},
|
15411 |
+
{
|
15412 |
+
"epoch": 0.85,
|
15413 |
+
"grad_norm": 0.41412991285324097,
|
15414 |
+
"learning_rate": 3.3628160457428753e-06,
|
15415 |
+
"loss": 0.0059,
|
15416 |
+
"step": 10555
|
15417 |
+
},
|
15418 |
+
{
|
15419 |
+
"epoch": 0.85,
|
15420 |
+
"grad_norm": 0.31760454177856445,
|
15421 |
+
"learning_rate": 3.3538818904672563e-06,
|
15422 |
+
"loss": 0.0062,
|
15423 |
+
"step": 10560
|
15424 |
+
},
|
15425 |
+
{
|
15426 |
+
"epoch": 0.85,
|
15427 |
+
"grad_norm": 0.3177451193332672,
|
15428 |
+
"learning_rate": 3.3449477351916382e-06,
|
15429 |
+
"loss": 0.0072,
|
15430 |
+
"step": 10565
|
15431 |
+
},
|
15432 |
+
{
|
15433 |
+
"epoch": 0.85,
|
15434 |
+
"grad_norm": 0.45792147517204285,
|
15435 |
+
"learning_rate": 3.3360135799160193e-06,
|
15436 |
+
"loss": 0.0075,
|
15437 |
+
"step": 10570
|
15438 |
+
},
|
15439 |
+
{
|
15440 |
+
"epoch": 0.85,
|
15441 |
+
"grad_norm": 0.29566365480422974,
|
15442 |
+
"learning_rate": 3.3270794246404003e-06,
|
15443 |
+
"loss": 0.0059,
|
15444 |
+
"step": 10575
|
15445 |
+
},
|
15446 |
+
{
|
15447 |
+
"epoch": 0.85,
|
15448 |
+
"grad_norm": 0.32841619849205017,
|
15449 |
+
"learning_rate": 3.318145269364782e-06,
|
15450 |
+
"loss": 0.0108,
|
15451 |
+
"step": 10580
|
15452 |
+
},
|
15453 |
+
{
|
15454 |
+
"epoch": 0.85,
|
15455 |
+
"grad_norm": 0.341621458530426,
|
15456 |
+
"learning_rate": 3.3092111140891632e-06,
|
15457 |
+
"loss": 0.0128,
|
15458 |
+
"step": 10585
|
15459 |
+
},
|
15460 |
+
{
|
15461 |
+
"epoch": 0.85,
|
15462 |
+
"grad_norm": 0.423700213432312,
|
15463 |
+
"learning_rate": 3.3002769588135447e-06,
|
15464 |
+
"loss": 0.0075,
|
15465 |
+
"step": 10590
|
15466 |
+
},
|
15467 |
+
{
|
15468 |
+
"epoch": 0.85,
|
15469 |
+
"grad_norm": 0.3334985673427582,
|
15470 |
+
"learning_rate": 3.2913428035379257e-06,
|
15471 |
+
"loss": 0.0085,
|
15472 |
+
"step": 10595
|
15473 |
+
},
|
15474 |
+
{
|
15475 |
+
"epoch": 0.85,
|
15476 |
+
"grad_norm": 0.4427613317966461,
|
15477 |
+
"learning_rate": 3.2824086482623068e-06,
|
15478 |
+
"loss": 0.0098,
|
15479 |
+
"step": 10600
|
15480 |
+
},
|
15481 |
+
{
|
15482 |
+
"epoch": 0.85,
|
15483 |
+
"grad_norm": 0.510867714881897,
|
15484 |
+
"learning_rate": 3.2734744929866887e-06,
|
15485 |
+
"loss": 0.007,
|
15486 |
+
"step": 10605
|
15487 |
+
},
|
15488 |
+
{
|
15489 |
+
"epoch": 0.85,
|
15490 |
+
"grad_norm": 0.2945081293582916,
|
15491 |
+
"learning_rate": 3.2645403377110697e-06,
|
15492 |
+
"loss": 0.0095,
|
15493 |
+
"step": 10610
|
15494 |
+
},
|
15495 |
+
{
|
15496 |
+
"epoch": 0.85,
|
15497 |
+
"grad_norm": 0.3742575943470001,
|
15498 |
+
"learning_rate": 3.255606182435451e-06,
|
15499 |
+
"loss": 0.0052,
|
15500 |
+
"step": 10615
|
15501 |
+
},
|
15502 |
+
{
|
15503 |
+
"epoch": 0.85,
|
15504 |
+
"grad_norm": 0.3119674026966095,
|
15505 |
+
"learning_rate": 3.246672027159832e-06,
|
15506 |
+
"loss": 0.009,
|
15507 |
+
"step": 10620
|
15508 |
+
},
|
15509 |
+
{
|
15510 |
+
"epoch": 0.85,
|
15511 |
+
"grad_norm": 0.2969549894332886,
|
15512 |
+
"learning_rate": 3.2377378718842132e-06,
|
15513 |
+
"loss": 0.007,
|
15514 |
+
"step": 10625
|
15515 |
+
},
|
15516 |
+
{
|
15517 |
+
"epoch": 0.85,
|
15518 |
+
"grad_norm": 0.3154788315296173,
|
15519 |
+
"learning_rate": 3.228803716608595e-06,
|
15520 |
+
"loss": 0.0083,
|
15521 |
+
"step": 10630
|
15522 |
+
},
|
15523 |
+
{
|
15524 |
+
"epoch": 0.86,
|
15525 |
+
"grad_norm": 0.33745089173316956,
|
15526 |
+
"learning_rate": 3.219869561332976e-06,
|
15527 |
+
"loss": 0.0056,
|
15528 |
+
"step": 10635
|
15529 |
+
},
|
15530 |
+
{
|
15531 |
+
"epoch": 0.86,
|
15532 |
+
"grad_norm": 0.36266443133354187,
|
15533 |
+
"learning_rate": 3.2109354060573576e-06,
|
15534 |
+
"loss": 0.0087,
|
15535 |
+
"step": 10640
|
15536 |
+
},
|
15537 |
+
{
|
15538 |
+
"epoch": 0.86,
|
15539 |
+
"grad_norm": 0.40266337990760803,
|
15540 |
+
"learning_rate": 3.2020012507817387e-06,
|
15541 |
+
"loss": 0.0055,
|
15542 |
+
"step": 10645
|
15543 |
+
},
|
15544 |
+
{
|
15545 |
+
"epoch": 0.86,
|
15546 |
+
"grad_norm": 0.3595188558101654,
|
15547 |
+
"learning_rate": 3.19306709550612e-06,
|
15548 |
+
"loss": 0.0199,
|
15549 |
+
"step": 10650
|
15550 |
+
},
|
15551 |
+
{
|
15552 |
+
"epoch": 0.86,
|
15553 |
+
"grad_norm": 0.3521510064601898,
|
15554 |
+
"learning_rate": 3.1841329402305016e-06,
|
15555 |
+
"loss": 0.0082,
|
15556 |
+
"step": 10655
|
15557 |
+
},
|
15558 |
+
{
|
15559 |
+
"epoch": 0.86,
|
15560 |
+
"grad_norm": 0.3168518841266632,
|
15561 |
+
"learning_rate": 3.1751987849548826e-06,
|
15562 |
+
"loss": 0.009,
|
15563 |
+
"step": 10660
|
15564 |
+
},
|
15565 |
+
{
|
15566 |
+
"epoch": 0.86,
|
15567 |
+
"grad_norm": 0.4278966188430786,
|
15568 |
+
"learning_rate": 3.166264629679264e-06,
|
15569 |
+
"loss": 0.0075,
|
15570 |
+
"step": 10665
|
15571 |
+
},
|
15572 |
+
{
|
15573 |
+
"epoch": 0.86,
|
15574 |
+
"grad_norm": 0.379189670085907,
|
15575 |
+
"learning_rate": 3.1573304744036455e-06,
|
15576 |
+
"loss": 0.008,
|
15577 |
+
"step": 10670
|
15578 |
+
},
|
15579 |
+
{
|
15580 |
+
"epoch": 0.86,
|
15581 |
+
"grad_norm": 0.361432820558548,
|
15582 |
+
"learning_rate": 3.1483963191280266e-06,
|
15583 |
+
"loss": 0.0069,
|
15584 |
+
"step": 10675
|
15585 |
+
},
|
15586 |
+
{
|
15587 |
+
"epoch": 0.86,
|
15588 |
+
"grad_norm": 0.20192596316337585,
|
15589 |
+
"learning_rate": 3.139462163852408e-06,
|
15590 |
+
"loss": 0.0049,
|
15591 |
+
"step": 10680
|
15592 |
+
},
|
15593 |
+
{
|
15594 |
+
"epoch": 0.86,
|
15595 |
+
"grad_norm": 0.4057570695877075,
|
15596 |
+
"learning_rate": 3.130528008576789e-06,
|
15597 |
+
"loss": 0.0058,
|
15598 |
+
"step": 10685
|
15599 |
+
},
|
15600 |
+
{
|
15601 |
+
"epoch": 0.86,
|
15602 |
+
"grad_norm": 0.2621855139732361,
|
15603 |
+
"learning_rate": 3.121593853301171e-06,
|
15604 |
+
"loss": 0.007,
|
15605 |
+
"step": 10690
|
15606 |
+
},
|
15607 |
+
{
|
15608 |
+
"epoch": 0.86,
|
15609 |
+
"grad_norm": 0.4035142958164215,
|
15610 |
+
"learning_rate": 3.112659698025552e-06,
|
15611 |
+
"loss": 0.0077,
|
15612 |
+
"step": 10695
|
15613 |
+
},
|
15614 |
+
{
|
15615 |
+
"epoch": 0.86,
|
15616 |
+
"grad_norm": 0.35792276263237,
|
15617 |
+
"learning_rate": 3.103725542749933e-06,
|
15618 |
+
"loss": 0.0127,
|
15619 |
+
"step": 10700
|
15620 |
+
},
|
15621 |
+
{
|
15622 |
+
"epoch": 0.86,
|
15623 |
+
"grad_norm": 0.48556092381477356,
|
15624 |
+
"learning_rate": 3.0947913874743145e-06,
|
15625 |
+
"loss": 0.008,
|
15626 |
+
"step": 10705
|
15627 |
+
},
|
15628 |
+
{
|
15629 |
+
"epoch": 0.86,
|
15630 |
+
"grad_norm": 0.33445674180984497,
|
15631 |
+
"learning_rate": 3.0858572321986955e-06,
|
15632 |
+
"loss": 0.0059,
|
15633 |
+
"step": 10710
|
15634 |
+
},
|
15635 |
+
{
|
15636 |
+
"epoch": 0.86,
|
15637 |
+
"grad_norm": 0.4826265871524811,
|
15638 |
+
"learning_rate": 3.0769230769230774e-06,
|
15639 |
+
"loss": 0.0089,
|
15640 |
+
"step": 10715
|
15641 |
+
},
|
15642 |
+
{
|
15643 |
+
"epoch": 0.86,
|
15644 |
+
"grad_norm": 0.42223629355430603,
|
15645 |
+
"learning_rate": 3.0679889216474585e-06,
|
15646 |
+
"loss": 0.0066,
|
15647 |
+
"step": 10720
|
15648 |
+
},
|
15649 |
+
{
|
15650 |
+
"epoch": 0.86,
|
15651 |
+
"grad_norm": 0.45981764793395996,
|
15652 |
+
"learning_rate": 3.05905476637184e-06,
|
15653 |
+
"loss": 0.0085,
|
15654 |
+
"step": 10725
|
15655 |
+
},
|
15656 |
+
{
|
15657 |
+
"epoch": 0.86,
|
15658 |
+
"grad_norm": 0.2595252990722656,
|
15659 |
+
"learning_rate": 3.050120611096221e-06,
|
15660 |
+
"loss": 0.0063,
|
15661 |
+
"step": 10730
|
15662 |
+
},
|
15663 |
+
{
|
15664 |
+
"epoch": 0.86,
|
15665 |
+
"grad_norm": 0.5080291032791138,
|
15666 |
+
"learning_rate": 3.0411864558206024e-06,
|
15667 |
+
"loss": 0.0094,
|
15668 |
+
"step": 10735
|
15669 |
+
},
|
15670 |
+
{
|
15671 |
+
"epoch": 0.86,
|
15672 |
+
"grad_norm": 0.32294949889183044,
|
15673 |
+
"learning_rate": 3.032252300544984e-06,
|
15674 |
+
"loss": 0.0058,
|
15675 |
+
"step": 10740
|
15676 |
+
},
|
15677 |
+
{
|
15678 |
+
"epoch": 0.86,
|
15679 |
+
"grad_norm": 0.4228246808052063,
|
15680 |
+
"learning_rate": 3.023318145269365e-06,
|
15681 |
+
"loss": 0.0059,
|
15682 |
+
"step": 10745
|
15683 |
+
},
|
15684 |
+
{
|
15685 |
+
"epoch": 0.86,
|
15686 |
+
"grad_norm": 0.3359188139438629,
|
15687 |
+
"learning_rate": 3.0143839899937464e-06,
|
15688 |
+
"loss": 0.0062,
|
15689 |
+
"step": 10750
|
15690 |
+
},
|
15691 |
+
{
|
15692 |
+
"epoch": 0.86,
|
15693 |
+
"grad_norm": 0.2471199929714203,
|
15694 |
+
"learning_rate": 3.005449834718128e-06,
|
15695 |
+
"loss": 0.0066,
|
15696 |
+
"step": 10755
|
15697 |
+
},
|
15698 |
+
{
|
15699 |
+
"epoch": 0.87,
|
15700 |
+
"grad_norm": 0.8795719742774963,
|
15701 |
+
"learning_rate": 2.996515679442509e-06,
|
15702 |
+
"loss": 0.0129,
|
15703 |
+
"step": 10760
|
15704 |
+
},
|
15705 |
+
{
|
15706 |
+
"epoch": 0.87,
|
15707 |
+
"grad_norm": 1.3051950931549072,
|
15708 |
+
"learning_rate": 2.9875815241668904e-06,
|
15709 |
+
"loss": 0.0128,
|
15710 |
+
"step": 10765
|
15711 |
+
},
|
15712 |
+
{
|
15713 |
+
"epoch": 0.87,
|
15714 |
+
"grad_norm": 0.3568212389945984,
|
15715 |
+
"learning_rate": 2.9786473688912714e-06,
|
15716 |
+
"loss": 0.0061,
|
15717 |
+
"step": 10770
|
15718 |
+
},
|
15719 |
+
{
|
15720 |
+
"epoch": 0.87,
|
15721 |
+
"grad_norm": 0.3010600805282593,
|
15722 |
+
"learning_rate": 2.9697132136156533e-06,
|
15723 |
+
"loss": 0.0065,
|
15724 |
+
"step": 10775
|
15725 |
+
},
|
15726 |
+
{
|
15727 |
+
"epoch": 0.87,
|
15728 |
+
"grad_norm": 0.38424891233444214,
|
15729 |
+
"learning_rate": 2.9607790583400343e-06,
|
15730 |
+
"loss": 0.0078,
|
15731 |
+
"step": 10780
|
15732 |
+
},
|
15733 |
+
{
|
15734 |
+
"epoch": 0.87,
|
15735 |
+
"grad_norm": 0.309994101524353,
|
15736 |
+
"learning_rate": 2.9518449030644154e-06,
|
15737 |
+
"loss": 0.0086,
|
15738 |
+
"step": 10785
|
15739 |
+
},
|
15740 |
+
{
|
15741 |
+
"epoch": 0.87,
|
15742 |
+
"grad_norm": 0.37481045722961426,
|
15743 |
+
"learning_rate": 2.942910747788797e-06,
|
15744 |
+
"loss": 0.0122,
|
15745 |
+
"step": 10790
|
15746 |
+
},
|
15747 |
+
{
|
15748 |
+
"epoch": 0.87,
|
15749 |
+
"grad_norm": 0.432425856590271,
|
15750 |
+
"learning_rate": 2.933976592513178e-06,
|
15751 |
+
"loss": 0.0065,
|
15752 |
+
"step": 10795
|
15753 |
+
},
|
15754 |
+
{
|
15755 |
+
"epoch": 0.87,
|
15756 |
+
"grad_norm": 0.27843984961509705,
|
15757 |
+
"learning_rate": 2.9250424372375598e-06,
|
15758 |
+
"loss": 0.0076,
|
15759 |
+
"step": 10800
|
15760 |
+
},
|
15761 |
+
{
|
15762 |
+
"epoch": 0.87,
|
15763 |
+
"eval_loss": 0.007469375152140856,
|
15764 |
+
"eval_pearson_cosine": 0.9681370387245068,
|
15765 |
+
"eval_pearson_dot": 0.9657977526847813,
|
15766 |
+
"eval_pearson_euclidean": 0.957258084356729,
|
15767 |
+
"eval_pearson_manhattan": 0.956023768146456,
|
15768 |
+
"eval_pearson_max": 0.9681370387245068,
|
15769 |
+
"eval_runtime": 426.2283,
|
15770 |
+
"eval_samples_per_second": 1.173,
|
15771 |
+
"eval_spearman_cosine": 0.9642074088296352,
|
15772 |
+
"eval_spearman_dot": 0.9586846987387948,
|
15773 |
+
"eval_spearman_euclidean": 0.9637806871227486,
|
15774 |
+
"eval_spearman_manhattan": 0.9624153696614787,
|
15775 |
+
"eval_spearman_max": 0.9642074088296352,
|
15776 |
+
"eval_steps_per_second": 1.173,
|
15777 |
+
"step": 10800
|
15778 |
}
|
15779 |
],
|
15780 |
"logging_steps": 5,
|