End of training
Browse files- README.md +13 -12
- all_results.json +23 -23
- eval_results.json +9 -9
- predict_results.json +8 -8
- predictions.txt +0 -0
- tb/events.out.tfevents.1725579302.2a66098fac87.9264.1 +3 -0
- train.log +48 -0
- train_results.json +7 -7
- trainer_state.json +157 -157
README.md
CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
|
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
-
- drugtemist-85-ner
|
9 |
metrics:
|
10 |
- precision
|
11 |
- recall
|
@@ -18,24 +19,24 @@ model-index:
|
|
18 |
name: Token Classification
|
19 |
type: token-classification
|
20 |
dataset:
|
21 |
-
name: drugtemist-85-ner
|
22 |
-
type: drugtemist-85-ner
|
23 |
config: DrugTEMIST NER
|
24 |
split: validation
|
25 |
args: DrugTEMIST NER
|
26 |
metrics:
|
27 |
- name: Precision
|
28 |
type: precision
|
29 |
-
value: 0.
|
30 |
- name: Recall
|
31 |
type: recall
|
32 |
-
value: 0.
|
33 |
- name: F1
|
34 |
type: f1
|
35 |
-
value: 0.
|
36 |
- name: Accuracy
|
37 |
type: accuracy
|
38 |
-
value: 0.
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -43,12 +44,12 @@ should probably proofread and complete it, then remove this comment. -->
|
|
43 |
|
44 |
# output
|
45 |
|
46 |
-
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the drugtemist-85-ner dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
-
- Loss: 0.
|
49 |
-
- Precision: 0.
|
50 |
-
- Recall: 0.
|
51 |
-
- F1: 0.
|
52 |
- Accuracy: 0.9989
|
53 |
|
54 |
## Model description
|
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
6 |
+
- token-classification
|
7 |
- generated_from_trainer
|
8 |
datasets:
|
9 |
+
- Rodrigo1771/drugtemist-85-ner
|
10 |
metrics:
|
11 |
- precision
|
12 |
- recall
|
|
|
19 |
name: Token Classification
|
20 |
type: token-classification
|
21 |
dataset:
|
22 |
+
name: Rodrigo1771/drugtemist-85-ner
|
23 |
+
type: Rodrigo1771/drugtemist-85-ner
|
24 |
config: DrugTEMIST NER
|
25 |
split: validation
|
26 |
args: DrugTEMIST NER
|
27 |
metrics:
|
28 |
- name: Precision
|
29 |
type: precision
|
30 |
+
value: 0.9461187214611873
|
31 |
- name: Recall
|
32 |
type: recall
|
33 |
+
value: 0.9522058823529411
|
34 |
- name: F1
|
35 |
type: f1
|
36 |
+
value: 0.9491525423728814
|
37 |
- name: Accuracy
|
38 |
type: accuracy
|
39 |
+
value: 0.9989426998228679
|
40 |
---
|
41 |
|
42 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
44 |
|
45 |
# output
|
46 |
|
47 |
+
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/drugtemist-85-ner dataset.
|
48 |
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.0048
|
50 |
+
- Precision: 0.9461
|
51 |
+
- Recall: 0.9522
|
52 |
+
- F1: 0.9492
|
53 |
- Accuracy: 0.9989
|
54 |
|
55 |
## Model description
|
all_results.json
CHANGED
@@ -1,26 +1,26 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
"eval_samples": 6810,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
-
"predict_accuracy": 0.
|
13 |
-
"predict_f1": 0.
|
14 |
-
"predict_loss": 0.
|
15 |
-
"predict_precision": 0.
|
16 |
-
"predict_recall": 0.
|
17 |
-
"predict_runtime": 28.
|
18 |
-
"predict_samples_per_second":
|
19 |
-
"predict_steps_per_second":
|
20 |
-
"total_flos": 1.
|
21 |
-
"train_loss": 0.
|
22 |
-
"train_runtime":
|
23 |
-
"train_samples":
|
24 |
-
"train_samples_per_second":
|
25 |
-
"train_steps_per_second": 3.
|
26 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9989426998228679,
|
4 |
+
"eval_f1": 0.9491525423728814,
|
5 |
+
"eval_loss": 0.004777050111442804,
|
6 |
+
"eval_precision": 0.9461187214611873,
|
7 |
+
"eval_recall": 0.9522058823529411,
|
8 |
+
"eval_runtime": 13.9476,
|
9 |
"eval_samples": 6810,
|
10 |
+
"eval_samples_per_second": 488.256,
|
11 |
+
"eval_steps_per_second": 61.086,
|
12 |
+
"predict_accuracy": 0.9987478324070453,
|
13 |
+
"predict_f1": 0.9243073407597828,
|
14 |
+
"predict_loss": 0.005894536152482033,
|
15 |
+
"predict_precision": 0.9069506726457399,
|
16 |
+
"predict_recall": 0.9423412929528246,
|
17 |
+
"predict_runtime": 28.2179,
|
18 |
+
"predict_samples_per_second": 517.899,
|
19 |
+
"predict_steps_per_second": 64.746,
|
20 |
+
"total_flos": 1.4714840952259542e+16,
|
21 |
+
"train_loss": 0.002772659832779558,
|
22 |
+
"train_runtime": 1349.0548,
|
23 |
+
"train_samples": 29797,
|
24 |
+
"train_samples_per_second": 220.873,
|
25 |
+
"train_steps_per_second": 3.454
|
26 |
}
|
eval_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
"eval_samples": 6810,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9989426998228679,
|
4 |
+
"eval_f1": 0.9491525423728814,
|
5 |
+
"eval_loss": 0.004777050111442804,
|
6 |
+
"eval_precision": 0.9461187214611873,
|
7 |
+
"eval_recall": 0.9522058823529411,
|
8 |
+
"eval_runtime": 13.9476,
|
9 |
"eval_samples": 6810,
|
10 |
+
"eval_samples_per_second": 488.256,
|
11 |
+
"eval_steps_per_second": 61.086
|
12 |
}
|
predict_results.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"predict_accuracy": 0.
|
3 |
-
"predict_f1": 0.
|
4 |
-
"predict_loss": 0.
|
5 |
-
"predict_precision": 0.
|
6 |
-
"predict_recall": 0.
|
7 |
-
"predict_runtime": 28.
|
8 |
-
"predict_samples_per_second":
|
9 |
-
"predict_steps_per_second":
|
10 |
}
|
|
|
1 |
{
|
2 |
+
"predict_accuracy": 0.9987478324070453,
|
3 |
+
"predict_f1": 0.9243073407597828,
|
4 |
+
"predict_loss": 0.005894536152482033,
|
5 |
+
"predict_precision": 0.9069506726457399,
|
6 |
+
"predict_recall": 0.9423412929528246,
|
7 |
+
"predict_runtime": 28.2179,
|
8 |
+
"predict_samples_per_second": 517.899,
|
9 |
+
"predict_steps_per_second": 64.746
|
10 |
}
|
predictions.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725579302.2a66098fac87.9264.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ddb06f8e19acc3cfe83ba96582312f0eda7c54cb22293014a850cd5aff12d3d
|
3 |
+
size 560
|
train.log
CHANGED
@@ -1450,3 +1450,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
1450 |
{'eval_loss': 0.005793666001409292, 'eval_precision': 0.9347826086956522, 'eval_recall': 0.9485294117647058, 'eval_f1': 0.9416058394160585, 'eval_accuracy': 0.9989083718950389, 'eval_runtime': 14.4595, 'eval_samples_per_second': 470.971, 'eval_steps_per_second': 58.923, 'epoch': 10.0}
|
1451 |
{'train_runtime': 1349.0548, 'train_samples_per_second': 220.873, 'train_steps_per_second': 3.454, 'train_loss': 0.002772659832779558, 'epoch': 10.0}
|
1452 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1453 |
0%| | 0/852 [00:00<?, ?it/s]
|
1454 |
1%| | 10/852 [00:00<00:09, 89.77it/s]
|
1455 |
2%|▏ | 19/852 [00:00<00:10, 77.96it/s]
|
1456 |
3%|▎ | 27/852 [00:00<00:10, 78.13it/s]
|
1457 |
4%|▍ | 36/852 [00:00<00:10, 78.74it/s]
|
1458 |
5%|▌ | 45/852 [00:00<00:10, 80.47it/s]
|
1459 |
6%|▋ | 54/852 [00:00<00:09, 81.97it/s]
|
1460 |
7%|▋ | 63/852 [00:00<00:09, 81.96it/s]
|
1461 |
8%|▊ | 72/852 [00:00<00:09, 80.15it/s]
|
1462 |
10%|▉ | 81/852 [00:01<00:09, 80.19it/s]
|
1463 |
11%|█ | 90/852 [00:01<00:09, 80.63it/s]
|
1464 |
12%|█▏ | 99/852 [00:01<00:09, 80.42it/s]
|
1465 |
13%|█▎ | 108/852 [00:01<00:09, 79.74it/s]
|
1466 |
14%|█▎ | 117/852 [00:01<00:09, 80.86it/s]
|
1467 |
15%|█▍ | 126/852 [00:01<00:09, 78.38it/s]
|
1468 |
16%|█▌ | 135/852 [00:01<00:09, 79.16it/s]
|
1469 |
17%|█▋ | 143/852 [00:01<00:08, 79.17it/s]
|
1470 |
18%|█▊ | 151/852 [00:01<00:08, 78.56it/s]
|
1471 |
19%|█▉ | 160/852 [00:01<00:08, 81.22it/s]
|
1472 |
20%|█▉ | 169/852 [00:02<00:08, 81.16it/s]
|
1473 |
21%|██ | 178/852 [00:02<00:08, 81.76it/s]
|
1474 |
22%|██▏ | 187/852 [00:02<00:08, 82.46it/s]
|
1475 |
23%|██▎ | 196/852 [00:02<00:07, 82.29it/s]
|
1476 |
24%|██▍ | 205/852 [00:02<00:07, 83.12it/s]
|
1477 |
25%|██▌ | 214/852 [00:02<00:07, 80.08it/s]
|
1478 |
26%|██▌ | 223/852 [00:02<00:07, 81.13it/s]
|
1479 |
27%|██▋ | 232/852 [00:02<00:07, 81.21it/s]
|
1480 |
28%|██▊ | 241/852 [00:03<00:07, 78.10it/s]
|
1481 |
29%|██▉ | 250/852 [00:03<00:07, 79.34it/s]
|
1482 |
30%|███ | 259/852 [00:03<00:07, 80.76it/s]
|
1483 |
31%|███▏ | 268/852 [00:03<00:07, 74.97it/s]
|
1484 |
33%|███▎ | 277/852 [00:03<00:07, 77.71it/s]
|
1485 |
34%|███▎ | 286/852 [00:03<00:07, 80.09it/s]
|
1486 |
35%|███▍ | 295/852 [00:03<00:06, 80.08it/s]
|
1487 |
36%|███▌ | 304/852 [00:03<00:06, 81.59it/s]
|
1488 |
37%|███▋ | 313/852 [00:03<00:06, 80.30it/s]
|
1489 |
38%|███▊ | 322/852 [00:04<00:06, 82.39it/s]
|
1490 |
39%|███▉ | 331/852 [00:04<00:06, 82.16it/s]
|
1491 |
40%|███▉ | 340/852 [00:04<00:06, 82.30it/s]
|
1492 |
41%|████ | 349/852 [00:04<00:06, 82.47it/s]
|
1493 |
42%|████▏ | 358/852 [00:04<00:06, 80.49it/s]
|
1494 |
43%|████▎ | 367/852 [00:04<00:05, 81.17it/s]
|
1495 |
44%|████▍ | 376/852 [00:04<00:05, 81.82it/s]
|
1496 |
45%|████▌ | 385/852 [00:04<00:05, 81.26it/s]
|
1497 |
46%|████▌ | 394/852 [00:04<00:05, 81.23it/s]
|
1498 |
47%|████▋ | 403/852 [00:05<00:05, 81.17it/s]
|
1499 |
48%|████▊ | 412/852 [00:05<00:05, 79.22it/s]
|
1500 |
49%|████▉ | 421/852 [00:05<00:05, 80.98it/s]
|
1501 |
50%|█████ | 430/852 [00:05<00:05, 80.11it/s]
|
1502 |
52%|█████▏ | 439/852 [00:05<00:05, 81.83it/s]
|
1503 |
53%|█████▎ | 448/852 [00:05<00:04, 82.37it/s]
|
1504 |
54%|█████▎ | 457/852 [00:05<00:04, 83.25it/s]
|
1505 |
55%|█████▍ | 466/852 [00:05<00:04, 80.61it/s]
|
1506 |
56%|█████▌ | 475/852 [00:05<00:04, 77.21it/s]
|
1507 |
57%|█████▋ | 484/852 [00:06<00:04, 77.43it/s]
|
1508 |
58%|█████▊ | 493/852 [00:06<00:04, 79.71it/s]
|
1509 |
59%|█████▉ | 502/852 [00:06<00:04, 81.52it/s]
|
1510 |
60%|█████▉ | 511/852 [00:06<00:04, 81.79it/s]
|
1511 |
61%|██████ | 520/852 [00:06<00:04, 82.93it/s]
|
1512 |
62%|██████▏ | 529/852 [00:06<00:03, 80.76it/s]
|
1513 |
63%|████��█▎ | 538/852 [00:06<00:03, 82.65it/s]
|
1514 |
64%|██████▍ | 547/852 [00:06<00:03, 83.43it/s]
|
1515 |
65%|██████▌ | 556/852 [00:06<00:03, 80.56it/s]
|
1516 |
66%|██████▋ | 565/852 [00:06<00:03, 82.97it/s]
|
1517 |
67%|██████▋ | 574/852 [00:07<00:03, 83.67it/s]
|
1518 |
68%|██████▊ | 583/852 [00:07<00:03, 83.26it/s]
|
1519 |
69%|██████▉ | 592/852 [00:07<00:03, 82.65it/s]
|
1520 |
71%|███████ | 601/852 [00:07<00:03, 82.89it/s]
|
1521 |
72%|███████▏ | 610/852 [00:07<00:02, 83.17it/s]
|
1522 |
73%|███████▎ | 619/852 [00:07<00:02, 80.86it/s]
|
1523 |
74%|███████▎ | 628/852 [00:07<00:02, 80.15it/s]
|
1524 |
75%|███████▍ | 637/852 [00:07<00:02, 80.82it/s]
|
1525 |
76%|███████▌ | 646/852 [00:07<00:02, 78.59it/s]
|
1526 |
77%|███████▋ | 655/852 [00:08<00:02, 80.46it/s]
|
1527 |
78%|███████▊ | 664/852 [00:08<00:02, 81.10it/s]
|
1528 |
79%|███████▉ | 673/852 [00:08<00:02, 80.96it/s]
|
1529 |
80%|████████ | 682/852 [00:08<00:02, 81.12it/s]
|
1530 |
81%|████████ | 691/852 [00:08<00:01, 82.73it/s]
|
1531 |
82%|████████▏ | 700/852 [00:08<00:01, 83.32it/s]
|
1532 |
83%|████████▎ | 709/852 [00:08<00:01, 84.29it/s]
|
1533 |
84%|████████▍ | 718/852 [00:08<00:01, 83.48it/s]
|
1534 |
85%|████████▌ | 727/852 [00:08<00:01, 83.89it/s]
|
1535 |
86%|████████▋ | 736/852 [00:09<00:01, 83.25it/s]
|
1536 |
87%|████████▋ | 745/852 [00:09<00:01, 84.23it/s]
|
1537 |
88%|████████▊ | 754/852 [00:09<00:01, 84.42it/s]
|
1538 |
90%|████████▉ | 763/852 [00:09<00:01, 85.20it/s]
|
1539 |
91%|█████████ | 772/852 [00:09<00:00, 83.63it/s]
|
1540 |
92%|█████████▏| 781/852 [00:09<00:00, 82.15it/s]
|
1541 |
93%|█████████▎| 790/852 [00:09<00:00, 82.20it/s]
|
1542 |
94%|█████████▍| 799/852 [00:09<00:00, 82.36it/s]
|
1543 |
95%|█████████▍| 808/852 [00:09<00:00, 83.98it/s]
|
1544 |
96%|█████████▌| 817/852 [00:10<00:00, 82.39it/s]
|
1545 |
97%|█████████▋| 826/852 [00:10<00:00, 83.62it/s]
|
1546 |
98%|█████████▊| 835/852 [00:10<00:00, 83.71it/s]
|
1547 |
99%|█████████▉| 844/852 [00:10<00:00, 82.35it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1548 |
0%| | 0/1827 [00:00<?, ?it/s]
|
1549 |
1%| | 10/1827 [00:00<00:19, 90.92it/s]
|
1550 |
1%| | 20/1827 [00:00<00:22, 80.87it/s]
|
1551 |
2%|▏ | 29/1827 [00:00<00:21, 82.68it/s]
|
1552 |
2%|▏ | 38/1827 [00:00<00:21, 81.95it/s]
|
1553 |
3%|▎ | 47/1827 [00:00<00:21, 82.63it/s]
|
1554 |
3%|▎ | 56/1827 [00:00<00:21, 82.47it/s]
|
1555 |
4%|▎ | 65/1827 [00:00<00:21, 80.47it/s]
|
1556 |
4%|▍ | 74/1827 [00:00<00:21, 81.66it/s]
|
1557 |
5%|▍ | 83/1827 [00:01<00:21, 82.68it/s]
|
1558 |
5%|▌ | 92/1827 [00:01<00:21, 82.58it/s]
|
1559 |
6%|▌ | 101/1827 [00:01<00:20, 84.52it/s]
|
1560 |
6%|▌ | 110/1827 [00:01<00:20, 85.29it/s]
|
1561 |
7%|▋ | 119/1827 [00:01<00:20, 83.68it/s]
|
1562 |
7%|▋ | 128/1827 [00:01<00:20, 82.79it/s]
|
1563 |
7%|▋ | 137/1827 [00:01<00:20, 83.78it/s]
|
1564 |
8%|▊ | 146/1827 [00:01<00:19, 84.13it/s]
|
1565 |
8%|▊ | 155/1827 [00:01<00:19, 83.64it/s]
|
1566 |
9%|▉ | 164/1827 [00:01<00:20, 81.81it/s]
|
1567 |
9%|▉ | 173/1827 [00:02<00:19, 82.80it/s]
|
1568 |
10%|▉ | 182/1827 [00:02<00:19, 84.13it/s]
|
1569 |
10%|█ | 191/1827 [00:02<00:19, 84.70it/s]
|
1570 |
11%|█ | 200/1827 [00:02<00:19, 83.62it/s]
|
1571 |
11%|█▏ | 209/1827 [00:02<00:19, 84.42it/s]
|
1572 |
12%|█▏ | 218/1827 [00:02<00:19, 83.10it/s]
|
1573 |
12%|█▏ | 227/1827 [00:02<00:19, 83.75it/s]
|
1574 |
13%|█▎ | 236/1827 [00:02<00:19, 80.40it/s]
|
1575 |
13%|█▎ | 245/1827 [00:02<00:19, 81.38it/s]
|
1576 |
14%|█▍ | 254/1827 [00:03<00:19, 81.71it/s]
|
1577 |
14%|█▍ | 263/1827 [00:03<00:19, 80.90it/s]
|
1578 |
15%|█▍ | 272/1827 [00:03<00:18, 83.07it/s]
|
1579 |
15%|█▌ | 281/1827 [00:03<00:18, 84.44it/s]
|
1580 |
16%|█▌ | 290/1827 [00:03<00:18, 83.94it/s]
|
1581 |
16%|█▋ | 299/1827 [00:03<00:18, 84.45it/s]
|
1582 |
17%|█▋ | 308/1827 [00:03<00:18, 83.65it/s]
|
1583 |
17%|█▋ | 317/1827 [00:03<00:18, 83.14it/s]
|
1584 |
18%|█▊ | 326/1827 [00:03<00:17, 84.05it/s]
|
1585 |
18%|█▊ | 335/1827 [00:04<00:17, 83.67it/s]
|
1586 |
19%|█▉ | 344/1827 [00:04<00:17, 84.53it/s]
|
1587 |
19%|█▉ | 353/1827 [00:04<00:18, 79.63it/s]
|
1588 |
20%|█▉ | 362/1827 [00:04<00:18, 80.95it/s]
|
1589 |
20%|██ | 371/1827 [00:04<00:17, 82.08it/s]
|
1590 |
21%|██ | 380/1827 [00:04<00:17, 83.24it/s]
|
1591 |
21%|██▏ | 389/1827 [00:04<00:17, 82.54it/s]
|
1592 |
22%|██▏ | 398/1827 [00:04<00:17, 81.91it/s]
|
1593 |
22%|██▏ | 407/1827 [00:04<00:17, 83.46it/s]
|
1594 |
23%|██▎ | 416/1827 [00:05<00:17, 81.20it/s]
|
1595 |
23%|██▎ | 425/1827 [00:05<00:17, 82.39it/s]
|
1596 |
24%|██▍ | 434/1827 [00:05<00:17, 81.62it/s]
|
1597 |
24%|██▍ | 443/1827 [00:05<00:17, 80.18it/s]
|
1598 |
25%|██▍ | 452/1827 [00:05<00:16, 82.39it/s]
|
1599 |
25%|██▌ | 461/1827 [00:05<00:16, 83.63it/s]
|
1600 |
26%|██▌ | 470/1827 [00:05<00:16, 83.17it/s]
|
1601 |
26%|██▌ | 479/1827 [00:05<00:16, 81.50it/s]
|
1602 |
27%|██▋ | 488/1827 [00:05<00:16, 81.09it/s]
|
1603 |
27%|██▋ | 497/1827 [00:06<00:16, 80.25it/s]
|
1604 |
28%|██▊ | 506/1827 [00:06<00:16, 81.31it/s]
|
1605 |
28%|██▊ | 515/1827 [00:06<00:15, 82.50it/s]
|
1606 |
29%|██▊ | 524/1827 [00:06<00:15, 84.16it/s]
|
1607 |
29%|██▉ | 533/1827 [00:06<00:15, 83.22it/s]
|
1608 |
30%|██▉ | 542/1827 [00:06<00:15, 84.11it/s]
|
1609 |
30%|███ | 551/1827 [00:06<00:15, 83.08it/s]
|
1610 |
31%|███ | 560/1827 [00:06<00:15, 82.78it/s]
|
1611 |
31%|███ | 569/1827 [00:06<00:15, 83.19it/s]
|
1612 |
32%|███▏ | 578/1827 [00:06<00:15, 82.89it/s]
|
1613 |
32%|███▏ | 587/1827 [00:07<00:14, 82.68it/s]
|
1614 |
33%|███▎ | 596/1827 [00:07<00:14, 82.70it/s]
|
1615 |
33%|███▎ | 605/1827 [00:07<00:14, 82.71it/s]
|
1616 |
34%|███▎ | 614/1827 [00:07<00:14, 82.53it/s]
|
1617 |
34%|███▍ | 623/1827 [00:07<00:14, 83.35it/s]
|
1618 |
35%|███▍ | 632/1827 [00:07<00:14, 83.93it/s]
|
1619 |
35%|███▌ | 641/1827 [00:07<00:14, 84.33it/s]
|
1620 |
36%|███▌ | 650/1827 [00:07<00:14, 79.40it/s]
|
1621 |
36%|███▌ | 659/1827 [00:07<00:14, 79.16it/s]
|
1622 |
37%|███▋ | 668/1827 [00:08<00:14, 80.89it/s]
|
1623 |
37%|███▋ | 677/1827 [00:08<00:14, 78.78it/s]
|
1624 |
38%|███▊ | 686/1827 [00:08<00:14, 80.15it/s]
|
1625 |
38%|███▊ | 695/1827 [00:08<00:13, 81.68it/s]
|
1626 |
39%|███▊ | 704/1827 [00:08<00:14, 78.28it/s]
|
1627 |
39%|███▉ | 713/1827 [00:08<00:13, 80.50it/s]
|
1628 |
40%|███▉ | 722/1827 [00:08<00:13, 82.68it/s]
|
1629 |
40%|████ | 731/1827 [00:08<00:13, 83.49it/s]
|
1630 |
41%|████ | 740/1827 [00:08<00:13, 82.99it/s]
|
1631 |
41%|████ | 749/1827 [00:09<00:12, 84.43it/s]
|
1632 |
41%|████▏ | 758/1827 [00:09<00:12, 84.86it/s]
|
1633 |
42%|████▏ | 767/1827 [00:09<00:12, 85.25it/s]
|
1634 |
42%|████▏ | 776/1827 [00:09<00:12, 85.62it/s]
|
1635 |
43%|████▎ | 785/1827 [00:09<00:12, 82.78it/s]
|
1636 |
43%|████▎ | 794/1827 [00:09<00:12, 84.06it/s]
|
1637 |
44%|████▍ | 803/1827 [00:09<00:12, 83.01it/s]
|
1638 |
44%|████▍ | 812/1827 [00:09<00:12, 83.78it/s]
|
1639 |
45%|████▍ | 821/1827 [00:09<00:11, 84.28it/s]
|
1640 |
45%|████▌ | 830/1827 [00:10<00:11, 84.61it/s]
|
1641 |
46%|████▌ | 839/1827 [00:10<00:11, 83.72it/s]
|
1642 |
46%|████▋ | 848/1827 [00:10<00:11, 85.15it/s]
|
1643 |
47%|████▋ | 857/1827 [00:10<00:11, 85.42it/s]
|
1644 |
47%|████▋ | 866/1827 [00:10<00:11, 85.71it/s]
|
1645 |
48%|████▊ | 875/1827 [00:10<00:11, 86.06it/s]
|
1646 |
48%|████▊ | 884/1827 [00:10<00:10, 86.43it/s]
|
1647 |
49%|████▉ | 893/1827 [00:10<00:10, 85.57it/s]
|
1648 |
49%|████▉ | 902/1827 [00:10<00:10, 86.11it/s]
|
1649 |
50%|████▉ | 911/1827 [00:10<00:10, 86.50it/s]
|
1650 |
50%|█████ | 920/1827 [00:11<00:10, 86.07it/s]
|
1651 |
51%|█████ | 929/1827 [00:11<00:10, 86.11it/s]
|
1652 |
51%|█████▏ | 938/1827 [00:11<00:10, 82.86it/s]
|
1653 |
52%|█████▏ | 947/1827 [00:11<00:10, 81.36it/s]
|
1654 |
52%|█████▏ | 956/1827 [00:11<00:10, 82.29it/s]
|
1655 |
53%|█████▎ | 965/1827 [00:11<00:10, 83.27it/s]
|
1656 |
53%|█████▎ | 974/1827 [00:11<00:10, 83.79it/s]
|
1657 |
54%|█████▍ | 983/1827 [00:11<00:10, 83.54it/s]
|
1658 |
54%|█████▍ | 992/1827 [00:11<00:10, 83.43it/s]
|
1659 |
55%|█████▍ | 1001/1827 [00:12<00:09, 84.18it/s]
|
1660 |
55%|█████▌ | 1010/1827 [00:12<00:09, 83.88it/s]
|
1661 |
56%|█████▌ | 1019/1827 [00:12<00:09, 84.69it/s]
|
1662 |
56%|█████▋ | 1028/1827 [00:12<00:09, 85.70it/s]
|
1663 |
57%|█████▋ | 1037/1827 [00:12<00:09, 84.01it/s]
|
1664 |
57%|█████▋ | 1046/1827 [00:12<00:09, 84.57it/s]
|
1665 |
58%|█████▊ | 1055/1827 [00:12<00:09, 84.49it/s]
|
1666 |
58%|████��▊ | 1064/1827 [00:12<00:09, 84.45it/s]
|
1667 |
59%|█████▊ | 1073/1827 [00:12<00:08, 85.10it/s]
|
1668 |
59%|█████▉ | 1082/1827 [00:13<00:08, 85.50it/s]
|
1669 |
60%|█████▉ | 1091/1827 [00:13<00:08, 85.18it/s]
|
1670 |
60%|██████ | 1100/1827 [00:13<00:08, 85.85it/s]
|
1671 |
61%|██████ | 1109/1827 [00:13<00:08, 85.34it/s]
|
1672 |
61%|██████ | 1118/1827 [00:13<00:08, 83.96it/s]
|
1673 |
62%|██████▏ | 1127/1827 [00:13<00:08, 83.91it/s]
|
1674 |
62%|██████▏ | 1136/1827 [00:13<00:08, 84.94it/s]
|
1675 |
63%|██████▎ | 1145/1827 [00:13<00:08, 84.62it/s]
|
1676 |
63%|██████▎ | 1154/1827 [00:13<00:07, 84.47it/s]
|
1677 |
64%|██████▎ | 1163/1827 [00:13<00:08, 82.48it/s]
|
1678 |
64%|██████▍ | 1172/1827 [00:14<00:07, 83.17it/s]
|
1679 |
65%|██████▍ | 1181/1827 [00:14<00:07, 81.16it/s]
|
1680 |
65%|██████▌ | 1190/1827 [00:14<00:07, 83.08it/s]
|
1681 |
66%|██████▌ | 1199/1827 [00:14<00:07, 83.48it/s]
|
1682 |
66%|██████▌ | 1208/1827 [00:14<00:07, 84.04it/s]
|
1683 |
67%|██████▋ | 1217/1827 [00:14<00:07, 82.40it/s]
|
1684 |
67%|██████▋ | 1226/1827 [00:14<00:07, 82.82it/s]
|
1685 |
68%|██████▊ | 1235/1827 [00:14<00:07, 83.66it/s]
|
1686 |
68%|██████▊ | 1244/1827 [00:14<00:06, 84.34it/s]
|
1687 |
69%|██████▊ | 1253/1827 [00:15<00:07, 81.83it/s]
|
1688 |
69%|██████▉ | 1262/1827 [00:15<00:06, 81.92it/s]
|
1689 |
70%|██████▉ | 1271/1827 [00:15<00:06, 82.56it/s]
|
1690 |
70%|███████ | 1280/1827 [00:15<00:06, 83.63it/s]
|
1691 |
71%|███████ | 1289/1827 [00:15<00:06, 84.21it/s]
|
1692 |
71%|███████ | 1298/1827 [00:15<00:06, 85.46it/s]
|
1693 |
72%|███████▏ | 1307/1827 [00:15<00:06, 85.59it/s]
|
1694 |
72%|███████▏ | 1316/1827 [00:15<00:05, 86.19it/s]
|
1695 |
73%|███████▎ | 1325/1827 [00:15<00:05, 85.74it/s]
|
1696 |
73%|███████▎ | 1334/1827 [00:16<00:05, 86.33it/s]
|
1697 |
74%|███████▎ | 1343/1827 [00:16<00:05, 84.87it/s]
|
1698 |
74%|███████▍ | 1352/1827 [00:16<00:05, 85.11it/s]
|
1699 |
74%|███████▍ | 1361/1827 [00:16<00:05, 85.88it/s]
|
1700 |
75%|███████▍ | 1370/1827 [00:16<00:05, 85.65it/s]
|
1701 |
75%|███████▌ | 1379/1827 [00:16<00:05, 85.85it/s]
|
1702 |
76%|███████▌ | 1388/1827 [00:16<00:05, 85.76it/s]
|
1703 |
76%|███████▋ | 1397/1827 [00:16<00:05, 85.56it/s]
|
1704 |
77%|███████▋ | 1406/1827 [00:16<00:04, 85.93it/s]
|
1705 |
77%|███████▋ | 1415/1827 [00:16<00:04, 85.21it/s]
|
1706 |
78%|███████▊ | 1424/1827 [00:17<00:04, 84.33it/s]
|
1707 |
78%|███████▊ | 1433/1827 [00:17<00:04, 83.44it/s]
|
1708 |
79%|███████▉ | 1442/1827 [00:17<00:04, 81.50it/s]
|
1709 |
79%|███████▉ | 1451/1827 [00:17<00:04, 82.65it/s]
|
1710 |
80%|███████▉ | 1460/1827 [00:17<00:04, 83.66it/s]
|
1711 |
80%|████████ | 1469/1827 [00:17<00:04, 81.75it/s]
|
1712 |
81%|████████ | 1478/1827 [00:17<00:04, 81.42it/s]
|
1713 |
81%|████████▏ | 1487/1827 [00:17<00:04, 82.24it/s]
|
1714 |
82%|████████▏ | 1496/1827 [00:17<00:04, 80.08it/s]
|
1715 |
82%|████████▏ | 1505/1827 [00:18<00:03, 80.52it/s]
|
1716 |
83%|████████▎ | 1514/1827 [00:18<00:03, 82.13it/s]
|
1717 |
83%|████████▎ | 1523/1827 [00:18<00:03, 82.92it/s]
|
1718 |
84%|████████▍ | 1532/1827 [00:18<00:03, 82.73it/s]
|
1719 |
84%|████████▍ | 1541/1827 [00:18<00:03, 83.28it/s]
|
1720 |
85%|████████▍ | 1550/1827 [00:18<00:03, 82.29it/s]
|
1721 |
85%|████████▌ | 1559/1827 [00:18<00:03, 82.49it/s]
|
1722 |
86%|████████▌ | 1568/1827 [00:18<00:03, 82.39it/s]
|
1723 |
86%|████████▋ | 1577/1827 [00:18<00:03, 82.15it/s]
|
1724 |
87%|████████▋ | 1586/1827 [00:19<00:02, 82.43it/s]
|
1725 |
87%|████████▋ | 1595/1827 [00:19<00:02, 81.93it/s]
|
1726 |
88%|████████▊ | 1604/1827 [00:19<00:02, 82.96it/s]
|
1727 |
88%|████████▊ | 1613/1827 [00:19<00:02, 83.75it/s]
|
1728 |
89%|████████▉ | 1622/1827 [00:19<00:02, 78.81it/s]
|
1729 |
89%|████████▉ | 1630/1827 [00:19<00:02, 76.60it/s]
|
1730 |
90%|████████▉ | 1638/1827 [00:19<00:02, 77.25it/s]
|
1731 |
90%|█████████ | 1647/1827 [00:19<00:02, 79.86it/s]
|
1732 |
91%|█████████ | 1656/1827 [00:19<00:02, 81.63it/s]
|
1733 |
91%|█████████ | 1665/1827 [00:20<00:01, 81.29it/s]
|
1734 |
92%|█████████▏| 1674/1827 [00:20<00:01, 82.22it/s]
|
1735 |
92%|█████████▏| 1683/1827 [00:20<00:01, 84.30it/s]
|
1736 |
93%|█████████▎| 1692/1827 [00:20<00:01, 82.83it/s]
|
1737 |
93%|█████████▎| 1701/1827 [00:20<00:01, 83.31it/s]
|
1738 |
94%|█████████▎| 1710/1827 [00:20<00:01, 84.27it/s]
|
1739 |
94%|█████████▍| 1719/1827 [00:20<00:01, 81.37it/s]
|
1740 |
95%|█████████▍| 1728/1827 [00:20<00:01, 82.82it/s]
|
1741 |
95%|█████████▌| 1737/1827 [00:20<00:01, 83.49it/s]
|
1742 |
96%|█████████▌| 1746/1827 [00:20<00:00, 84.78it/s]
|
1743 |
96%|█████████▌| 1755/1827 [00:21<00:00, 85.12it/s]
|
1744 |
97%|█████████▋| 1764/1827 [00:21<00:00, 81.96it/s]
|
1745 |
97%|█████████▋| 1773/1827 [00:21<00:00, 82.90it/s]
|
1746 |
98%|█████████▊| 1782/1827 [00:21<00:00, 84.18it/s]
|
1747 |
98%|█████████▊| 1791/1827 [00:21<00:00, 84.49it/s]
|
1748 |
99%|█████████▊| 1800/1827 [00:21<00:00, 81.26it/s]
|
1749 |
99%|█████████▉| 1809/1827 [00:21<00:00, 82.04it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1450 |
{'eval_loss': 0.005793666001409292, 'eval_precision': 0.9347826086956522, 'eval_recall': 0.9485294117647058, 'eval_f1': 0.9416058394160585, 'eval_accuracy': 0.9989083718950389, 'eval_runtime': 14.4595, 'eval_samples_per_second': 470.971, 'eval_steps_per_second': 58.923, 'epoch': 10.0}
|
1451 |
{'train_runtime': 1349.0548, 'train_samples_per_second': 220.873, 'train_steps_per_second': 3.454, 'train_loss': 0.002772659832779558, 'epoch': 10.0}
|
1452 |
|
1453 |
+
***** train metrics *****
|
1454 |
+
epoch = 10.0
|
1455 |
+
total_flos = 13704263GF
|
1456 |
+
train_loss = 0.0028
|
1457 |
+
train_runtime = 0:22:29.05
|
1458 |
+
train_samples = 29797
|
1459 |
+
train_samples_per_second = 220.873
|
1460 |
+
train_steps_per_second = 3.454
|
1461 |
+
09/05/2024 23:34:48 - INFO - __main__ - *** Evaluate ***
|
1462 |
+
[INFO|trainer.py:811] 2024-09-05 23:34:48,810 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
1463 |
+
[INFO|trainer.py:3819] 2024-09-05 23:34:48,813 >>
|
1464 |
+
***** Running Evaluation *****
|
1465 |
+
[INFO|trainer.py:3821] 2024-09-05 23:34:48,813 >> Num examples = 6810
|
1466 |
+
[INFO|trainer.py:3824] 2024-09-05 23:34:48,813 >> Batch size = 8
|
1467 |
+
|
1468 |
0%| | 0/852 [00:00<?, ?it/s]
|
1469 |
1%| | 10/852 [00:00<00:09, 89.77it/s]
|
1470 |
2%|▏ | 19/852 [00:00<00:10, 77.96it/s]
|
1471 |
3%|▎ | 27/852 [00:00<00:10, 78.13it/s]
|
1472 |
4%|▍ | 36/852 [00:00<00:10, 78.74it/s]
|
1473 |
5%|▌ | 45/852 [00:00<00:10, 80.47it/s]
|
1474 |
6%|▋ | 54/852 [00:00<00:09, 81.97it/s]
|
1475 |
7%|▋ | 63/852 [00:00<00:09, 81.96it/s]
|
1476 |
8%|▊ | 72/852 [00:00<00:09, 80.15it/s]
|
1477 |
10%|▉ | 81/852 [00:01<00:09, 80.19it/s]
|
1478 |
11%|█ | 90/852 [00:01<00:09, 80.63it/s]
|
1479 |
12%|█▏ | 99/852 [00:01<00:09, 80.42it/s]
|
1480 |
13%|█▎ | 108/852 [00:01<00:09, 79.74it/s]
|
1481 |
14%|█▎ | 117/852 [00:01<00:09, 80.86it/s]
|
1482 |
15%|█▍ | 126/852 [00:01<00:09, 78.38it/s]
|
1483 |
16%|█▌ | 135/852 [00:01<00:09, 79.16it/s]
|
1484 |
17%|█▋ | 143/852 [00:01<00:08, 79.17it/s]
|
1485 |
18%|█▊ | 151/852 [00:01<00:08, 78.56it/s]
|
1486 |
19%|█▉ | 160/852 [00:01<00:08, 81.22it/s]
|
1487 |
20%|█▉ | 169/852 [00:02<00:08, 81.16it/s]
|
1488 |
21%|██ | 178/852 [00:02<00:08, 81.76it/s]
|
1489 |
22%|██▏ | 187/852 [00:02<00:08, 82.46it/s]
|
1490 |
23%|██▎ | 196/852 [00:02<00:07, 82.29it/s]
|
1491 |
24%|██▍ | 205/852 [00:02<00:07, 83.12it/s]
|
1492 |
25%|██▌ | 214/852 [00:02<00:07, 80.08it/s]
|
1493 |
26%|██▌ | 223/852 [00:02<00:07, 81.13it/s]
|
1494 |
27%|██▋ | 232/852 [00:02<00:07, 81.21it/s]
|
1495 |
28%|██▊ | 241/852 [00:03<00:07, 78.10it/s]
|
1496 |
29%|██▉ | 250/852 [00:03<00:07, 79.34it/s]
|
1497 |
30%|███ | 259/852 [00:03<00:07, 80.76it/s]
|
1498 |
31%|███▏ | 268/852 [00:03<00:07, 74.97it/s]
|
1499 |
33%|███▎ | 277/852 [00:03<00:07, 77.71it/s]
|
1500 |
34%|███▎ | 286/852 [00:03<00:07, 80.09it/s]
|
1501 |
35%|███▍ | 295/852 [00:03<00:06, 80.08it/s]
|
1502 |
36%|███▌ | 304/852 [00:03<00:06, 81.59it/s]
|
1503 |
37%|███▋ | 313/852 [00:03<00:06, 80.30it/s]
|
1504 |
38%|███▊ | 322/852 [00:04<00:06, 82.39it/s]
|
1505 |
39%|███▉ | 331/852 [00:04<00:06, 82.16it/s]
|
1506 |
40%|███▉ | 340/852 [00:04<00:06, 82.30it/s]
|
1507 |
41%|████ | 349/852 [00:04<00:06, 82.47it/s]
|
1508 |
42%|████▏ | 358/852 [00:04<00:06, 80.49it/s]
|
1509 |
43%|████▎ | 367/852 [00:04<00:05, 81.17it/s]
|
1510 |
44%|████▍ | 376/852 [00:04<00:05, 81.82it/s]
|
1511 |
45%|████▌ | 385/852 [00:04<00:05, 81.26it/s]
|
1512 |
46%|████▌ | 394/852 [00:04<00:05, 81.23it/s]
|
1513 |
47%|████▋ | 403/852 [00:05<00:05, 81.17it/s]
|
1514 |
48%|████▊ | 412/852 [00:05<00:05, 79.22it/s]
|
1515 |
49%|████▉ | 421/852 [00:05<00:05, 80.98it/s]
|
1516 |
50%|█████ | 430/852 [00:05<00:05, 80.11it/s]
|
1517 |
52%|█████▏ | 439/852 [00:05<00:05, 81.83it/s]
|
1518 |
53%|█████▎ | 448/852 [00:05<00:04, 82.37it/s]
|
1519 |
54%|█████▎ | 457/852 [00:05<00:04, 83.25it/s]
|
1520 |
55%|█████▍ | 466/852 [00:05<00:04, 80.61it/s]
|
1521 |
56%|█████▌ | 475/852 [00:05<00:04, 77.21it/s]
|
1522 |
57%|█████▋ | 484/852 [00:06<00:04, 77.43it/s]
|
1523 |
58%|█████▊ | 493/852 [00:06<00:04, 79.71it/s]
|
1524 |
59%|█████▉ | 502/852 [00:06<00:04, 81.52it/s]
|
1525 |
60%|█████▉ | 511/852 [00:06<00:04, 81.79it/s]
|
1526 |
61%|██████ | 520/852 [00:06<00:04, 82.93it/s]
|
1527 |
62%|██████▏ | 529/852 [00:06<00:03, 80.76it/s]
|
1528 |
63%|████��█▎ | 538/852 [00:06<00:03, 82.65it/s]
|
1529 |
64%|██████▍ | 547/852 [00:06<00:03, 83.43it/s]
|
1530 |
65%|██████▌ | 556/852 [00:06<00:03, 80.56it/s]
|
1531 |
66%|██████▋ | 565/852 [00:06<00:03, 82.97it/s]
|
1532 |
67%|██████▋ | 574/852 [00:07<00:03, 83.67it/s]
|
1533 |
68%|██████▊ | 583/852 [00:07<00:03, 83.26it/s]
|
1534 |
69%|██████▉ | 592/852 [00:07<00:03, 82.65it/s]
|
1535 |
71%|███████ | 601/852 [00:07<00:03, 82.89it/s]
|
1536 |
72%|███████▏ | 610/852 [00:07<00:02, 83.17it/s]
|
1537 |
73%|███████▎ | 619/852 [00:07<00:02, 80.86it/s]
|
1538 |
74%|███████▎ | 628/852 [00:07<00:02, 80.15it/s]
|
1539 |
75%|███████▍ | 637/852 [00:07<00:02, 80.82it/s]
|
1540 |
76%|███████▌ | 646/852 [00:07<00:02, 78.59it/s]
|
1541 |
77%|███████▋ | 655/852 [00:08<00:02, 80.46it/s]
|
1542 |
78%|███████▊ | 664/852 [00:08<00:02, 81.10it/s]
|
1543 |
79%|███████▉ | 673/852 [00:08<00:02, 80.96it/s]
|
1544 |
80%|████████ | 682/852 [00:08<00:02, 81.12it/s]
|
1545 |
81%|████████ | 691/852 [00:08<00:01, 82.73it/s]
|
1546 |
82%|████████▏ | 700/852 [00:08<00:01, 83.32it/s]
|
1547 |
83%|████████▎ | 709/852 [00:08<00:01, 84.29it/s]
|
1548 |
84%|████████▍ | 718/852 [00:08<00:01, 83.48it/s]
|
1549 |
85%|████████▌ | 727/852 [00:08<00:01, 83.89it/s]
|
1550 |
86%|████████▋ | 736/852 [00:09<00:01, 83.25it/s]
|
1551 |
87%|████████▋ | 745/852 [00:09<00:01, 84.23it/s]
|
1552 |
88%|████████▊ | 754/852 [00:09<00:01, 84.42it/s]
|
1553 |
90%|████████▉ | 763/852 [00:09<00:01, 85.20it/s]
|
1554 |
91%|█████████ | 772/852 [00:09<00:00, 83.63it/s]
|
1555 |
92%|█████████▏| 781/852 [00:09<00:00, 82.15it/s]
|
1556 |
93%|█████████▎| 790/852 [00:09<00:00, 82.20it/s]
|
1557 |
94%|█████████▍| 799/852 [00:09<00:00, 82.36it/s]
|
1558 |
95%|█████████▍| 808/852 [00:09<00:00, 83.98it/s]
|
1559 |
96%|█████████▌| 817/852 [00:10<00:00, 82.39it/s]
|
1560 |
97%|█████████▋| 826/852 [00:10<00:00, 83.62it/s]
|
1561 |
98%|█████████▊| 835/852 [00:10<00:00, 83.71it/s]
|
1562 |
99%|█████████▉| 844/852 [00:10<00:00, 82.35it/s]
|
1563 |
+
***** eval metrics *****
|
1564 |
+
epoch = 10.0
|
1565 |
+
eval_accuracy = 0.9989
|
1566 |
+
eval_f1 = 0.9492
|
1567 |
+
eval_loss = 0.0048
|
1568 |
+
eval_precision = 0.9461
|
1569 |
+
eval_recall = 0.9522
|
1570 |
+
eval_runtime = 0:00:13.94
|
1571 |
+
eval_samples = 6810
|
1572 |
+
eval_samples_per_second = 488.256
|
1573 |
+
eval_steps_per_second = 61.086
|
1574 |
+
09/05/2024 23:35:02 - INFO - __main__ - *** Predict ***
|
1575 |
+
[INFO|trainer.py:811] 2024-09-05 23:35:02,765 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
1576 |
+
[INFO|trainer.py:3819] 2024-09-05 23:35:02,767 >>
|
1577 |
+
***** Running Prediction *****
|
1578 |
+
[INFO|trainer.py:3821] 2024-09-05 23:35:02,767 >> Num examples = 14614
|
1579 |
+
[INFO|trainer.py:3824] 2024-09-05 23:35:02,767 >> Batch size = 8
|
1580 |
+
|
1581 |
0%| | 0/1827 [00:00<?, ?it/s]
|
1582 |
1%| | 10/1827 [00:00<00:19, 90.92it/s]
|
1583 |
1%| | 20/1827 [00:00<00:22, 80.87it/s]
|
1584 |
2%|▏ | 29/1827 [00:00<00:21, 82.68it/s]
|
1585 |
2%|▏ | 38/1827 [00:00<00:21, 81.95it/s]
|
1586 |
3%|▎ | 47/1827 [00:00<00:21, 82.63it/s]
|
1587 |
3%|▎ | 56/1827 [00:00<00:21, 82.47it/s]
|
1588 |
4%|▎ | 65/1827 [00:00<00:21, 80.47it/s]
|
1589 |
4%|▍ | 74/1827 [00:00<00:21, 81.66it/s]
|
1590 |
5%|▍ | 83/1827 [00:01<00:21, 82.68it/s]
|
1591 |
5%|▌ | 92/1827 [00:01<00:21, 82.58it/s]
|
1592 |
6%|▌ | 101/1827 [00:01<00:20, 84.52it/s]
|
1593 |
6%|▌ | 110/1827 [00:01<00:20, 85.29it/s]
|
1594 |
7%|▋ | 119/1827 [00:01<00:20, 83.68it/s]
|
1595 |
7%|▋ | 128/1827 [00:01<00:20, 82.79it/s]
|
1596 |
7%|▋ | 137/1827 [00:01<00:20, 83.78it/s]
|
1597 |
8%|▊ | 146/1827 [00:01<00:19, 84.13it/s]
|
1598 |
8%|▊ | 155/1827 [00:01<00:19, 83.64it/s]
|
1599 |
9%|▉ | 164/1827 [00:01<00:20, 81.81it/s]
|
1600 |
9%|▉ | 173/1827 [00:02<00:19, 82.80it/s]
|
1601 |
10%|▉ | 182/1827 [00:02<00:19, 84.13it/s]
|
1602 |
10%|█ | 191/1827 [00:02<00:19, 84.70it/s]
|
1603 |
11%|█ | 200/1827 [00:02<00:19, 83.62it/s]
|
1604 |
11%|█▏ | 209/1827 [00:02<00:19, 84.42it/s]
|
1605 |
12%|█▏ | 218/1827 [00:02<00:19, 83.10it/s]
|
1606 |
12%|█▏ | 227/1827 [00:02<00:19, 83.75it/s]
|
1607 |
13%|█▎ | 236/1827 [00:02<00:19, 80.40it/s]
|
1608 |
13%|█▎ | 245/1827 [00:02<00:19, 81.38it/s]
|
1609 |
14%|█▍ | 254/1827 [00:03<00:19, 81.71it/s]
|
1610 |
14%|█▍ | 263/1827 [00:03<00:19, 80.90it/s]
|
1611 |
15%|█▍ | 272/1827 [00:03<00:18, 83.07it/s]
|
1612 |
15%|█▌ | 281/1827 [00:03<00:18, 84.44it/s]
|
1613 |
16%|█▌ | 290/1827 [00:03<00:18, 83.94it/s]
|
1614 |
16%|█▋ | 299/1827 [00:03<00:18, 84.45it/s]
|
1615 |
17%|█▋ | 308/1827 [00:03<00:18, 83.65it/s]
|
1616 |
17%|█▋ | 317/1827 [00:03<00:18, 83.14it/s]
|
1617 |
18%|█▊ | 326/1827 [00:03<00:17, 84.05it/s]
|
1618 |
18%|█▊ | 335/1827 [00:04<00:17, 83.67it/s]
|
1619 |
19%|█▉ | 344/1827 [00:04<00:17, 84.53it/s]
|
1620 |
19%|█▉ | 353/1827 [00:04<00:18, 79.63it/s]
|
1621 |
20%|█▉ | 362/1827 [00:04<00:18, 80.95it/s]
|
1622 |
20%|██ | 371/1827 [00:04<00:17, 82.08it/s]
|
1623 |
21%|██ | 380/1827 [00:04<00:17, 83.24it/s]
|
1624 |
21%|██▏ | 389/1827 [00:04<00:17, 82.54it/s]
|
1625 |
22%|██▏ | 398/1827 [00:04<00:17, 81.91it/s]
|
1626 |
22%|██▏ | 407/1827 [00:04<00:17, 83.46it/s]
|
1627 |
23%|██▎ | 416/1827 [00:05<00:17, 81.20it/s]
|
1628 |
23%|██▎ | 425/1827 [00:05<00:17, 82.39it/s]
|
1629 |
24%|██▍ | 434/1827 [00:05<00:17, 81.62it/s]
|
1630 |
24%|██▍ | 443/1827 [00:05<00:17, 80.18it/s]
|
1631 |
25%|██▍ | 452/1827 [00:05<00:16, 82.39it/s]
|
1632 |
25%|██▌ | 461/1827 [00:05<00:16, 83.63it/s]
|
1633 |
26%|██▌ | 470/1827 [00:05<00:16, 83.17it/s]
|
1634 |
26%|██▌ | 479/1827 [00:05<00:16, 81.50it/s]
|
1635 |
27%|██▋ | 488/1827 [00:05<00:16, 81.09it/s]
|
1636 |
27%|██▋ | 497/1827 [00:06<00:16, 80.25it/s]
|
1637 |
28%|██▊ | 506/1827 [00:06<00:16, 81.31it/s]
|
1638 |
28%|██▊ | 515/1827 [00:06<00:15, 82.50it/s]
|
1639 |
29%|██▊ | 524/1827 [00:06<00:15, 84.16it/s]
|
1640 |
29%|██▉ | 533/1827 [00:06<00:15, 83.22it/s]
|
1641 |
30%|██▉ | 542/1827 [00:06<00:15, 84.11it/s]
|
1642 |
30%|███ | 551/1827 [00:06<00:15, 83.08it/s]
|
1643 |
31%|███ | 560/1827 [00:06<00:15, 82.78it/s]
|
1644 |
31%|███ | 569/1827 [00:06<00:15, 83.19it/s]
|
1645 |
32%|███▏ | 578/1827 [00:06<00:15, 82.89it/s]
|
1646 |
32%|███▏ | 587/1827 [00:07<00:14, 82.68it/s]
|
1647 |
33%|███▎ | 596/1827 [00:07<00:14, 82.70it/s]
|
1648 |
33%|███▎ | 605/1827 [00:07<00:14, 82.71it/s]
|
1649 |
34%|███▎ | 614/1827 [00:07<00:14, 82.53it/s]
|
1650 |
34%|███▍ | 623/1827 [00:07<00:14, 83.35it/s]
|
1651 |
35%|███▍ | 632/1827 [00:07<00:14, 83.93it/s]
|
1652 |
35%|███▌ | 641/1827 [00:07<00:14, 84.33it/s]
|
1653 |
36%|███▌ | 650/1827 [00:07<00:14, 79.40it/s]
|
1654 |
36%|███▌ | 659/1827 [00:07<00:14, 79.16it/s]
|
1655 |
37%|███▋ | 668/1827 [00:08<00:14, 80.89it/s]
|
1656 |
37%|███▋ | 677/1827 [00:08<00:14, 78.78it/s]
|
1657 |
38%|███▊ | 686/1827 [00:08<00:14, 80.15it/s]
|
1658 |
38%|███▊ | 695/1827 [00:08<00:13, 81.68it/s]
|
1659 |
39%|███▊ | 704/1827 [00:08<00:14, 78.28it/s]
|
1660 |
39%|███▉ | 713/1827 [00:08<00:13, 80.50it/s]
|
1661 |
40%|███▉ | 722/1827 [00:08<00:13, 82.68it/s]
|
1662 |
40%|████ | 731/1827 [00:08<00:13, 83.49it/s]
|
1663 |
41%|████ | 740/1827 [00:08<00:13, 82.99it/s]
|
1664 |
41%|████ | 749/1827 [00:09<00:12, 84.43it/s]
|
1665 |
41%|████▏ | 758/1827 [00:09<00:12, 84.86it/s]
|
1666 |
42%|████▏ | 767/1827 [00:09<00:12, 85.25it/s]
|
1667 |
42%|████▏ | 776/1827 [00:09<00:12, 85.62it/s]
|
1668 |
43%|████▎ | 785/1827 [00:09<00:12, 82.78it/s]
|
1669 |
43%|████▎ | 794/1827 [00:09<00:12, 84.06it/s]
|
1670 |
44%|████▍ | 803/1827 [00:09<00:12, 83.01it/s]
|
1671 |
44%|████▍ | 812/1827 [00:09<00:12, 83.78it/s]
|
1672 |
45%|████▍ | 821/1827 [00:09<00:11, 84.28it/s]
|
1673 |
45%|████▌ | 830/1827 [00:10<00:11, 84.61it/s]
|
1674 |
46%|████▌ | 839/1827 [00:10<00:11, 83.72it/s]
|
1675 |
46%|████▋ | 848/1827 [00:10<00:11, 85.15it/s]
|
1676 |
47%|████▋ | 857/1827 [00:10<00:11, 85.42it/s]
|
1677 |
47%|████▋ | 866/1827 [00:10<00:11, 85.71it/s]
|
1678 |
48%|████▊ | 875/1827 [00:10<00:11, 86.06it/s]
|
1679 |
48%|████▊ | 884/1827 [00:10<00:10, 86.43it/s]
|
1680 |
49%|████▉ | 893/1827 [00:10<00:10, 85.57it/s]
|
1681 |
49%|████▉ | 902/1827 [00:10<00:10, 86.11it/s]
|
1682 |
50%|████▉ | 911/1827 [00:10<00:10, 86.50it/s]
|
1683 |
50%|█████ | 920/1827 [00:11<00:10, 86.07it/s]
|
1684 |
51%|█████ | 929/1827 [00:11<00:10, 86.11it/s]
|
1685 |
51%|█████▏ | 938/1827 [00:11<00:10, 82.86it/s]
|
1686 |
52%|█████▏ | 947/1827 [00:11<00:10, 81.36it/s]
|
1687 |
52%|█████▏ | 956/1827 [00:11<00:10, 82.29it/s]
|
1688 |
53%|█████▎ | 965/1827 [00:11<00:10, 83.27it/s]
|
1689 |
53%|█████▎ | 974/1827 [00:11<00:10, 83.79it/s]
|
1690 |
54%|█████▍ | 983/1827 [00:11<00:10, 83.54it/s]
|
1691 |
54%|█████▍ | 992/1827 [00:11<00:10, 83.43it/s]
|
1692 |
55%|█████▍ | 1001/1827 [00:12<00:09, 84.18it/s]
|
1693 |
55%|█████▌ | 1010/1827 [00:12<00:09, 83.88it/s]
|
1694 |
56%|█████▌ | 1019/1827 [00:12<00:09, 84.69it/s]
|
1695 |
56%|█████▋ | 1028/1827 [00:12<00:09, 85.70it/s]
|
1696 |
57%|█████▋ | 1037/1827 [00:12<00:09, 84.01it/s]
|
1697 |
57%|█████▋ | 1046/1827 [00:12<00:09, 84.57it/s]
|
1698 |
58%|█████▊ | 1055/1827 [00:12<00:09, 84.49it/s]
|
1699 |
58%|████��▊ | 1064/1827 [00:12<00:09, 84.45it/s]
|
1700 |
59%|█████▊ | 1073/1827 [00:12<00:08, 85.10it/s]
|
1701 |
59%|█████▉ | 1082/1827 [00:13<00:08, 85.50it/s]
|
1702 |
60%|█████▉ | 1091/1827 [00:13<00:08, 85.18it/s]
|
1703 |
60%|██████ | 1100/1827 [00:13<00:08, 85.85it/s]
|
1704 |
61%|██████ | 1109/1827 [00:13<00:08, 85.34it/s]
|
1705 |
61%|██████ | 1118/1827 [00:13<00:08, 83.96it/s]
|
1706 |
62%|██████▏ | 1127/1827 [00:13<00:08, 83.91it/s]
|
1707 |
62%|██████▏ | 1136/1827 [00:13<00:08, 84.94it/s]
|
1708 |
63%|██████▎ | 1145/1827 [00:13<00:08, 84.62it/s]
|
1709 |
63%|██████▎ | 1154/1827 [00:13<00:07, 84.47it/s]
|
1710 |
64%|██████▎ | 1163/1827 [00:13<00:08, 82.48it/s]
|
1711 |
64%|██████▍ | 1172/1827 [00:14<00:07, 83.17it/s]
|
1712 |
65%|██████▍ | 1181/1827 [00:14<00:07, 81.16it/s]
|
1713 |
65%|██████▌ | 1190/1827 [00:14<00:07, 83.08it/s]
|
1714 |
66%|██████▌ | 1199/1827 [00:14<00:07, 83.48it/s]
|
1715 |
66%|██████▌ | 1208/1827 [00:14<00:07, 84.04it/s]
|
1716 |
67%|██████▋ | 1217/1827 [00:14<00:07, 82.40it/s]
|
1717 |
67%|██████▋ | 1226/1827 [00:14<00:07, 82.82it/s]
|
1718 |
68%|██████▊ | 1235/1827 [00:14<00:07, 83.66it/s]
|
1719 |
68%|██████▊ | 1244/1827 [00:14<00:06, 84.34it/s]
|
1720 |
69%|██████▊ | 1253/1827 [00:15<00:07, 81.83it/s]
|
1721 |
69%|██████▉ | 1262/1827 [00:15<00:06, 81.92it/s]
|
1722 |
70%|██████▉ | 1271/1827 [00:15<00:06, 82.56it/s]
|
1723 |
70%|███████ | 1280/1827 [00:15<00:06, 83.63it/s]
|
1724 |
71%|███████ | 1289/1827 [00:15<00:06, 84.21it/s]
|
1725 |
71%|███████ | 1298/1827 [00:15<00:06, 85.46it/s]
|
1726 |
72%|███████▏ | 1307/1827 [00:15<00:06, 85.59it/s]
|
1727 |
72%|███████▏ | 1316/1827 [00:15<00:05, 86.19it/s]
|
1728 |
73%|███████▎ | 1325/1827 [00:15<00:05, 85.74it/s]
|
1729 |
73%|███████▎ | 1334/1827 [00:16<00:05, 86.33it/s]
|
1730 |
74%|███████▎ | 1343/1827 [00:16<00:05, 84.87it/s]
|
1731 |
74%|███████▍ | 1352/1827 [00:16<00:05, 85.11it/s]
|
1732 |
74%|███████▍ | 1361/1827 [00:16<00:05, 85.88it/s]
|
1733 |
75%|███████▍ | 1370/1827 [00:16<00:05, 85.65it/s]
|
1734 |
75%|███████▌ | 1379/1827 [00:16<00:05, 85.85it/s]
|
1735 |
76%|███████▌ | 1388/1827 [00:16<00:05, 85.76it/s]
|
1736 |
76%|███████▋ | 1397/1827 [00:16<00:05, 85.56it/s]
|
1737 |
77%|███████▋ | 1406/1827 [00:16<00:04, 85.93it/s]
|
1738 |
77%|███████▋ | 1415/1827 [00:16<00:04, 85.21it/s]
|
1739 |
78%|███████▊ | 1424/1827 [00:17<00:04, 84.33it/s]
|
1740 |
78%|███████▊ | 1433/1827 [00:17<00:04, 83.44it/s]
|
1741 |
79%|███████▉ | 1442/1827 [00:17<00:04, 81.50it/s]
|
1742 |
79%|███████▉ | 1451/1827 [00:17<00:04, 82.65it/s]
|
1743 |
80%|███████▉ | 1460/1827 [00:17<00:04, 83.66it/s]
|
1744 |
80%|████████ | 1469/1827 [00:17<00:04, 81.75it/s]
|
1745 |
81%|████████ | 1478/1827 [00:17<00:04, 81.42it/s]
|
1746 |
81%|████████▏ | 1487/1827 [00:17<00:04, 82.24it/s]
|
1747 |
82%|████████▏ | 1496/1827 [00:17<00:04, 80.08it/s]
|
1748 |
82%|████████▏ | 1505/1827 [00:18<00:03, 80.52it/s]
|
1749 |
83%|████████▎ | 1514/1827 [00:18<00:03, 82.13it/s]
|
1750 |
83%|████████▎ | 1523/1827 [00:18<00:03, 82.92it/s]
|
1751 |
84%|████████▍ | 1532/1827 [00:18<00:03, 82.73it/s]
|
1752 |
84%|████████▍ | 1541/1827 [00:18<00:03, 83.28it/s]
|
1753 |
85%|████████▍ | 1550/1827 [00:18<00:03, 82.29it/s]
|
1754 |
85%|████████▌ | 1559/1827 [00:18<00:03, 82.49it/s]
|
1755 |
86%|████████▌ | 1568/1827 [00:18<00:03, 82.39it/s]
|
1756 |
86%|████████▋ | 1577/1827 [00:18<00:03, 82.15it/s]
|
1757 |
87%|████████▋ | 1586/1827 [00:19<00:02, 82.43it/s]
|
1758 |
87%|████████▋ | 1595/1827 [00:19<00:02, 81.93it/s]
|
1759 |
88%|████████▊ | 1604/1827 [00:19<00:02, 82.96it/s]
|
1760 |
88%|████████▊ | 1613/1827 [00:19<00:02, 83.75it/s]
|
1761 |
89%|████████▉ | 1622/1827 [00:19<00:02, 78.81it/s]
|
1762 |
89%|████████▉ | 1630/1827 [00:19<00:02, 76.60it/s]
|
1763 |
90%|████████▉ | 1638/1827 [00:19<00:02, 77.25it/s]
|
1764 |
90%|█████████ | 1647/1827 [00:19<00:02, 79.86it/s]
|
1765 |
91%|█████████ | 1656/1827 [00:19<00:02, 81.63it/s]
|
1766 |
91%|█████████ | 1665/1827 [00:20<00:01, 81.29it/s]
|
1767 |
92%|█████████▏| 1674/1827 [00:20<00:01, 82.22it/s]
|
1768 |
92%|█████████▏| 1683/1827 [00:20<00:01, 84.30it/s]
|
1769 |
93%|█████████▎| 1692/1827 [00:20<00:01, 82.83it/s]
|
1770 |
93%|█████████▎| 1701/1827 [00:20<00:01, 83.31it/s]
|
1771 |
94%|█████████▎| 1710/1827 [00:20<00:01, 84.27it/s]
|
1772 |
94%|█████████▍| 1719/1827 [00:20<00:01, 81.37it/s]
|
1773 |
95%|█████████▍| 1728/1827 [00:20<00:01, 82.82it/s]
|
1774 |
95%|█████████▌| 1737/1827 [00:20<00:01, 83.49it/s]
|
1775 |
96%|█████████▌| 1746/1827 [00:20<00:00, 84.78it/s]
|
1776 |
96%|█████████▌| 1755/1827 [00:21<00:00, 85.12it/s]
|
1777 |
97%|█████████▋| 1764/1827 [00:21<00:00, 81.96it/s]
|
1778 |
97%|█████████▋| 1773/1827 [00:21<00:00, 82.90it/s]
|
1779 |
98%|█████████▊| 1782/1827 [00:21<00:00, 84.18it/s]
|
1780 |
98%|█████████▊| 1791/1827 [00:21<00:00, 84.49it/s]
|
1781 |
99%|█████████▊| 1800/1827 [00:21<00:00, 81.26it/s]
|
1782 |
99%|█████████▉| 1809/1827 [00:21<00:00, 82.04it/s]
|
1783 |
+
[INFO|trainer.py:3503] 2024-09-05 23:35:31,629 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
1784 |
+
[INFO|configuration_utils.py:472] 2024-09-05 23:35:31,631 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
1785 |
+
[INFO|modeling_utils.py:2799] 2024-09-05 23:35:32,981 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
1786 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-05 23:35:32,982 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
1787 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-05 23:35:32,982 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
1788 |
+
***** predict metrics *****
|
1789 |
+
predict_accuracy = 0.9987
|
1790 |
+
predict_f1 = 0.9243
|
1791 |
+
predict_loss = 0.0059
|
1792 |
+
predict_precision = 0.907
|
1793 |
+
predict_recall = 0.9423
|
1794 |
+
predict_runtime = 0:00:28.21
|
1795 |
+
predict_samples_per_second = 517.899
|
1796 |
+
predict_steps_per_second = 64.746
|
1797 |
+
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"total_flos": 1.
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 3.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"total_flos": 1.4714840952259542e+16,
|
4 |
+
"train_loss": 0.002772659832779558,
|
5 |
+
"train_runtime": 1349.0548,
|
6 |
+
"train_samples": 29797,
|
7 |
+
"train_samples_per_second": 220.873,
|
8 |
+
"train_steps_per_second": 3.454
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,208 +1,208 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss": 0.
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime":
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second":
|
21 |
-
"step":
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"epoch": 1.
|
25 |
-
"grad_norm": 0.
|
26 |
-
"learning_rate": 4.
|
27 |
-
"loss": 0.
|
28 |
"step": 500
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
-
"eval_accuracy": 0.
|
33 |
-
"eval_f1": 0.
|
34 |
-
"eval_loss": 0.
|
35 |
-
"eval_precision": 0.
|
36 |
-
"eval_recall": 0.
|
37 |
-
"eval_runtime": 14.
|
38 |
-
"eval_samples_per_second":
|
39 |
-
"eval_steps_per_second": 60.
|
40 |
-
"step":
|
41 |
-
},
|
42 |
-
{
|
43 |
-
"epoch": 2.
|
44 |
-
"grad_norm": 0.
|
45 |
-
"learning_rate": 3.
|
46 |
-
"loss": 0.
|
47 |
"step": 1000
|
48 |
},
|
49 |
{
|
50 |
-
"epoch":
|
51 |
-
"eval_accuracy": 0.
|
52 |
-
"eval_f1": 0.
|
53 |
-
"eval_loss": 0.
|
54 |
-
"eval_precision": 0.
|
55 |
-
"eval_recall": 0.
|
56 |
-
"eval_runtime":
|
57 |
-
"eval_samples_per_second":
|
58 |
-
"eval_steps_per_second":
|
59 |
-
"step":
|
60 |
},
|
61 |
{
|
62 |
-
"epoch": 3.
|
63 |
-
"grad_norm": 0.
|
64 |
-
"learning_rate": 3.
|
65 |
-
"loss": 0.
|
66 |
"step": 1500
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
-
"eval_accuracy": 0.
|
71 |
-
"eval_f1": 0.
|
72 |
-
"eval_loss": 0.
|
73 |
-
"eval_precision": 0.
|
74 |
-
"eval_recall": 0.
|
75 |
-
"eval_runtime": 14.
|
76 |
-
"eval_samples_per_second":
|
77 |
-
"eval_steps_per_second": 59.
|
78 |
-
"step":
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"epoch": 4.
|
82 |
-
"grad_norm": 0.
|
83 |
-
"learning_rate": 2.
|
84 |
-
"loss": 0.
|
85 |
"step": 2000
|
86 |
},
|
87 |
{
|
88 |
-
"epoch":
|
89 |
-
"eval_accuracy": 0.
|
90 |
-
"eval_f1": 0.
|
91 |
-
"eval_loss": 0.
|
92 |
-
"eval_precision": 0.
|
93 |
-
"eval_recall": 0.
|
94 |
-
"eval_runtime":
|
95 |
-
"eval_samples_per_second":
|
96 |
-
"eval_steps_per_second":
|
97 |
-
"step":
|
98 |
},
|
99 |
{
|
100 |
-
"epoch": 5.
|
101 |
-
"grad_norm": 0.
|
102 |
-
"learning_rate": 2.
|
103 |
-
"loss": 0.
|
104 |
"step": 2500
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
-
"eval_accuracy": 0.
|
109 |
-
"eval_f1": 0.
|
110 |
-
"eval_loss": 0.
|
111 |
-
"eval_precision": 0.
|
112 |
-
"eval_recall": 0.
|
113 |
-
"eval_runtime": 14.
|
114 |
-
"eval_samples_per_second":
|
115 |
-
"eval_steps_per_second":
|
116 |
-
"step":
|
117 |
-
},
|
118 |
-
{
|
119 |
-
"epoch": 6.
|
120 |
-
"grad_norm": 0.
|
121 |
-
"learning_rate": 1.
|
122 |
-
"loss": 0.
|
123 |
"step": 3000
|
124 |
},
|
125 |
{
|
126 |
-
"epoch":
|
127 |
-
"eval_accuracy": 0.
|
128 |
-
"eval_f1": 0.
|
129 |
-
"eval_loss": 0.
|
130 |
-
"eval_precision": 0.
|
131 |
-
"eval_recall": 0.
|
132 |
-
"eval_runtime":
|
133 |
-
"eval_samples_per_second":
|
134 |
-
"eval_steps_per_second":
|
135 |
-
"step":
|
136 |
},
|
137 |
{
|
138 |
-
"epoch": 7.
|
139 |
-
"grad_norm": 0.
|
140 |
-
"learning_rate": 1.
|
141 |
-
"loss": 0.
|
142 |
"step": 3500
|
143 |
},
|
144 |
{
|
145 |
"epoch": 8.0,
|
146 |
-
"eval_accuracy": 0.
|
147 |
-
"eval_f1": 0.
|
148 |
-
"eval_loss": 0.
|
149 |
-
"eval_precision": 0.
|
150 |
-
"eval_recall": 0.
|
151 |
-
"eval_runtime":
|
152 |
-
"eval_samples_per_second":
|
153 |
-
"eval_steps_per_second":
|
154 |
-
"step":
|
155 |
-
},
|
156 |
-
{
|
157 |
-
"epoch": 8.
|
158 |
-
"grad_norm": 0.
|
159 |
-
"learning_rate":
|
160 |
-
"loss": 0.
|
161 |
"step": 4000
|
162 |
},
|
163 |
{
|
164 |
-
"epoch":
|
165 |
-
"eval_accuracy": 0.
|
166 |
-
"eval_f1": 0.
|
167 |
-
"eval_loss": 0.
|
168 |
-
"eval_precision": 0.
|
169 |
-
"eval_recall": 0.
|
170 |
-
"eval_runtime":
|
171 |
-
"eval_samples_per_second":
|
172 |
-
"eval_steps_per_second":
|
173 |
-
"step":
|
174 |
},
|
175 |
{
|
176 |
-
"epoch": 9.
|
177 |
-
"grad_norm": 0.
|
178 |
-
"learning_rate":
|
179 |
-
"loss": 0.
|
180 |
"step": 4500
|
181 |
},
|
182 |
{
|
183 |
-
"epoch":
|
184 |
-
"eval_accuracy": 0.
|
185 |
-
"eval_f1": 0.
|
186 |
-
"eval_loss": 0.
|
187 |
-
"eval_precision": 0.
|
188 |
-
"eval_recall": 0.
|
189 |
-
"eval_runtime": 14.
|
190 |
-
"eval_samples_per_second":
|
191 |
-
"eval_steps_per_second":
|
192 |
-
"step":
|
193 |
-
},
|
194 |
-
{
|
195 |
-
"epoch":
|
196 |
-
"step":
|
197 |
-
"total_flos": 1.
|
198 |
-
"train_loss": 0.
|
199 |
-
"train_runtime":
|
200 |
-
"train_samples_per_second":
|
201 |
-
"train_steps_per_second": 3.
|
202 |
}
|
203 |
],
|
204 |
"logging_steps": 500,
|
205 |
-
"max_steps":
|
206 |
"num_input_tokens_seen": 0,
|
207 |
"num_train_epochs": 10,
|
208 |
"save_steps": 500,
|
@@ -218,7 +218,7 @@
|
|
218 |
"attributes": {}
|
219 |
}
|
220 |
},
|
221 |
-
"total_flos": 1.
|
222 |
"train_batch_size": 32,
|
223 |
"trial_name": null,
|
224 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.9491525423728814,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2330",
|
4 |
+
"epoch": 10.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 4660,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.9989495654084337,
|
14 |
+
"eval_f1": 0.9351598173515981,
|
15 |
+
"eval_loss": 0.0030670168343931437,
|
16 |
+
"eval_precision": 0.9292196007259528,
|
17 |
+
"eval_recall": 0.9411764705882353,
|
18 |
+
"eval_runtime": 13.9946,
|
19 |
+
"eval_samples_per_second": 486.615,
|
20 |
+
"eval_steps_per_second": 60.88,
|
21 |
+
"step": 466
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.0729613733905579,
|
25 |
+
"grad_norm": 0.07174628973007202,
|
26 |
+
"learning_rate": 4.4635193133047216e-05,
|
27 |
+
"loss": 0.0199,
|
28 |
"step": 500
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
+
"eval_accuracy": 0.9989358342373021,
|
33 |
+
"eval_f1": 0.9386834986474301,
|
34 |
+
"eval_loss": 0.0030621723271906376,
|
35 |
+
"eval_precision": 0.9212389380530973,
|
36 |
+
"eval_recall": 0.9568014705882353,
|
37 |
+
"eval_runtime": 14.0045,
|
38 |
+
"eval_samples_per_second": 486.272,
|
39 |
+
"eval_steps_per_second": 60.838,
|
40 |
+
"step": 932
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.1459227467811157,
|
44 |
+
"grad_norm": 0.13906870782375336,
|
45 |
+
"learning_rate": 3.927038626609442e-05,
|
46 |
+
"loss": 0.0026,
|
47 |
"step": 1000
|
48 |
},
|
49 |
{
|
50 |
+
"epoch": 3.0,
|
51 |
+
"eval_accuracy": 0.9989152374806047,
|
52 |
+
"eval_f1": 0.9360919540229885,
|
53 |
+
"eval_loss": 0.004003152716904879,
|
54 |
+
"eval_precision": 0.9365225390984361,
|
55 |
+
"eval_recall": 0.9356617647058824,
|
56 |
+
"eval_runtime": 13.9451,
|
57 |
+
"eval_samples_per_second": 488.343,
|
58 |
+
"eval_steps_per_second": 61.097,
|
59 |
+
"step": 1398
|
60 |
},
|
61 |
{
|
62 |
+
"epoch": 3.218884120171674,
|
63 |
+
"grad_norm": 0.14111244678497314,
|
64 |
+
"learning_rate": 3.3905579399141636e-05,
|
65 |
+
"loss": 0.0011,
|
66 |
"step": 1500
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
+
"eval_accuracy": 0.9987230010847625,
|
71 |
+
"eval_f1": 0.9308584686774942,
|
72 |
+
"eval_loss": 0.005216046702116728,
|
73 |
+
"eval_precision": 0.9400187441424555,
|
74 |
+
"eval_recall": 0.921875,
|
75 |
+
"eval_runtime": 14.2384,
|
76 |
+
"eval_samples_per_second": 478.283,
|
77 |
+
"eval_steps_per_second": 59.838,
|
78 |
+
"step": 1864
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 4.291845493562231,
|
82 |
+
"grad_norm": 0.09000600874423981,
|
83 |
+
"learning_rate": 2.8540772532188842e-05,
|
84 |
+
"loss": 0.001,
|
85 |
"step": 2000
|
86 |
},
|
87 |
{
|
88 |
+
"epoch": 5.0,
|
89 |
+
"eval_accuracy": 0.9989426998228679,
|
90 |
+
"eval_f1": 0.9491525423728814,
|
91 |
+
"eval_loss": 0.004777050111442804,
|
92 |
+
"eval_precision": 0.9461187214611873,
|
93 |
+
"eval_recall": 0.9522058823529411,
|
94 |
+
"eval_runtime": 13.9397,
|
95 |
+
"eval_samples_per_second": 488.533,
|
96 |
+
"eval_steps_per_second": 61.12,
|
97 |
+
"step": 2330
|
98 |
},
|
99 |
{
|
100 |
+
"epoch": 5.364806866952789,
|
101 |
+
"grad_norm": 0.0045097870752215385,
|
102 |
+
"learning_rate": 2.3175965665236052e-05,
|
103 |
+
"loss": 0.0005,
|
104 |
"step": 2500
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
+
"eval_accuracy": 0.9988809095527758,
|
109 |
+
"eval_f1": 0.9448244414044688,
|
110 |
+
"eval_loss": 0.004644877277314663,
|
111 |
+
"eval_precision": 0.9375565610859729,
|
112 |
+
"eval_recall": 0.9522058823529411,
|
113 |
+
"eval_runtime": 14.0259,
|
114 |
+
"eval_samples_per_second": 485.531,
|
115 |
+
"eval_steps_per_second": 60.745,
|
116 |
+
"step": 2796
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 6.437768240343348,
|
120 |
+
"grad_norm": 0.029634617269039154,
|
121 |
+
"learning_rate": 1.7811158798283262e-05,
|
122 |
+
"loss": 0.0004,
|
123 |
"step": 3000
|
124 |
},
|
125 |
{
|
126 |
+
"epoch": 7.0,
|
127 |
+
"eval_accuracy": 0.9989564309939994,
|
128 |
+
"eval_f1": 0.9446460980036298,
|
129 |
+
"eval_loss": 0.0049773636274039745,
|
130 |
+
"eval_precision": 0.9327956989247311,
|
131 |
+
"eval_recall": 0.9568014705882353,
|
132 |
+
"eval_runtime": 13.9217,
|
133 |
+
"eval_samples_per_second": 489.166,
|
134 |
+
"eval_steps_per_second": 61.2,
|
135 |
+
"step": 3262
|
136 |
},
|
137 |
{
|
138 |
+
"epoch": 7.510729613733906,
|
139 |
+
"grad_norm": 0.003798937890678644,
|
140 |
+
"learning_rate": 1.2446351931330473e-05,
|
141 |
+
"loss": 0.0002,
|
142 |
"step": 3500
|
143 |
},
|
144 |
{
|
145 |
"epoch": 8.0,
|
146 |
+
"eval_accuracy": 0.9989221030661705,
|
147 |
+
"eval_f1": 0.9435520881138136,
|
148 |
+
"eval_loss": 0.005484889727085829,
|
149 |
+
"eval_precision": 0.9422548120989918,
|
150 |
+
"eval_recall": 0.9448529411764706,
|
151 |
+
"eval_runtime": 13.8923,
|
152 |
+
"eval_samples_per_second": 490.2,
|
153 |
+
"eval_steps_per_second": 61.329,
|
154 |
+
"step": 3728
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"epoch": 8.583690987124463,
|
158 |
+
"grad_norm": 0.0004711664514616132,
|
159 |
+
"learning_rate": 7.0815450643776825e-06,
|
160 |
+
"loss": 0.0001,
|
161 |
"step": 4000
|
162 |
},
|
163 |
{
|
164 |
+
"epoch": 9.0,
|
165 |
+
"eval_accuracy": 0.9989426998228679,
|
166 |
+
"eval_f1": 0.9441903019213176,
|
167 |
+
"eval_loss": 0.0057435426861047745,
|
168 |
+
"eval_precision": 0.9398907103825137,
|
169 |
+
"eval_recall": 0.9485294117647058,
|
170 |
+
"eval_runtime": 13.9755,
|
171 |
+
"eval_samples_per_second": 487.281,
|
172 |
+
"eval_steps_per_second": 60.964,
|
173 |
+
"step": 4194
|
174 |
},
|
175 |
{
|
176 |
+
"epoch": 9.656652360515022,
|
177 |
+
"grad_norm": 0.005946693476289511,
|
178 |
+
"learning_rate": 1.7167381974248929e-06,
|
179 |
+
"loss": 0.0001,
|
180 |
"step": 4500
|
181 |
},
|
182 |
{
|
183 |
+
"epoch": 10.0,
|
184 |
+
"eval_accuracy": 0.9989083718950389,
|
185 |
+
"eval_f1": 0.9416058394160585,
|
186 |
+
"eval_loss": 0.005793666001409292,
|
187 |
+
"eval_precision": 0.9347826086956522,
|
188 |
+
"eval_recall": 0.9485294117647058,
|
189 |
+
"eval_runtime": 14.4595,
|
190 |
+
"eval_samples_per_second": 470.971,
|
191 |
+
"eval_steps_per_second": 58.923,
|
192 |
+
"step": 4660
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"epoch": 10.0,
|
196 |
+
"step": 4660,
|
197 |
+
"total_flos": 1.4714840952259542e+16,
|
198 |
+
"train_loss": 0.002772659832779558,
|
199 |
+
"train_runtime": 1349.0548,
|
200 |
+
"train_samples_per_second": 220.873,
|
201 |
+
"train_steps_per_second": 3.454
|
202 |
}
|
203 |
],
|
204 |
"logging_steps": 500,
|
205 |
+
"max_steps": 4660,
|
206 |
"num_input_tokens_seen": 0,
|
207 |
"num_train_epochs": 10,
|
208 |
"save_steps": 500,
|
|
|
218 |
"attributes": {}
|
219 |
}
|
220 |
},
|
221 |
+
"total_flos": 1.4714840952259542e+16,
|
222 |
"train_batch_size": 32,
|
223 |
"trial_name": null,
|
224 |
"trial_params": null
|