End of training
Browse files- README.md +8 -7
- all_results.json +26 -0
- eval_results.json +12 -0
- predict_results.json +10 -0
- predictions.txt +0 -0
- tb/events.out.tfevents.1725573899.df0b2d2cc7fe.2457.1 +3 -0
- train.log +48 -0
- train_results.json +9 -0
- trainer_state.json +232 -0
README.md
CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
|
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
-
- cantemist-85-ner
|
9 |
metrics:
|
10 |
- precision
|
11 |
- recall
|
@@ -18,8 +19,8 @@ model-index:
|
|
18 |
name: Token Classification
|
19 |
type: token-classification
|
20 |
dataset:
|
21 |
-
name: cantemist-85-ner
|
22 |
-
type: cantemist-85-ner
|
23 |
config: CantemistNer
|
24 |
split: validation
|
25 |
args: CantemistNer
|
@@ -35,7 +36,7 @@ model-index:
|
|
35 |
value: 0.8506998444790046
|
36 |
- name: Accuracy
|
37 |
type: accuracy
|
38 |
-
value: 0.
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
43 |
|
44 |
# output
|
45 |
|
46 |
-
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the cantemist-85-ner dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
-
- Loss: 0.
|
49 |
- Precision: 0.8399
|
50 |
- Recall: 0.8618
|
51 |
- F1: 0.8507
|
52 |
-
- Accuracy: 0.
|
53 |
|
54 |
## Model description
|
55 |
|
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
6 |
+
- token-classification
|
7 |
- generated_from_trainer
|
8 |
datasets:
|
9 |
+
- Rodrigo1771/cantemist-85-ner
|
10 |
metrics:
|
11 |
- precision
|
12 |
- recall
|
|
|
19 |
name: Token Classification
|
20 |
type: token-classification
|
21 |
dataset:
|
22 |
+
name: Rodrigo1771/cantemist-85-ner
|
23 |
+
type: Rodrigo1771/cantemist-85-ner
|
24 |
config: CantemistNer
|
25 |
split: validation
|
26 |
args: CantemistNer
|
|
|
36 |
value: 0.8506998444790046
|
37 |
- name: Accuracy
|
38 |
type: accuracy
|
39 |
+
value: 0.9916544445403043
|
40 |
---
|
41 |
|
42 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
44 |
|
45 |
# output
|
46 |
|
47 |
+
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/cantemist-85-ner dataset.
|
48 |
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.0496
|
50 |
- Precision: 0.8399
|
51 |
- Recall: 0.8618
|
52 |
- F1: 0.8507
|
53 |
+
- Accuracy: 0.9917
|
54 |
|
55 |
## Model description
|
56 |
|
all_results.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9916544445403043,
|
4 |
+
"eval_f1": 0.8506998444790046,
|
5 |
+
"eval_loss": 0.04956069961190224,
|
6 |
+
"eval_precision": 0.8399232245681382,
|
7 |
+
"eval_recall": 0.8617565970854667,
|
8 |
+
"eval_runtime": 15.5432,
|
9 |
+
"eval_samples": 7354,
|
10 |
+
"eval_samples_per_second": 473.133,
|
11 |
+
"eval_steps_per_second": 59.19,
|
12 |
+
"predict_accuracy": 0.9919921133225054,
|
13 |
+
"predict_f1": 0.8561699809316262,
|
14 |
+
"predict_loss": 0.04697508364915848,
|
15 |
+
"predict_precision": 0.8359042553191489,
|
16 |
+
"predict_recall": 0.8774427694025684,
|
17 |
+
"predict_runtime": 23.0034,
|
18 |
+
"predict_samples_per_second": 471.149,
|
19 |
+
"predict_steps_per_second": 58.904,
|
20 |
+
"total_flos": 1.5369860684670966e+16,
|
21 |
+
"train_loss": 0.009731636565258824,
|
22 |
+
"train_runtime": 1413.3317,
|
23 |
+
"train_samples": 32675,
|
24 |
+
"train_samples_per_second": 231.191,
|
25 |
+
"train_steps_per_second": 3.616
|
26 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9916544445403043,
|
4 |
+
"eval_f1": 0.8506998444790046,
|
5 |
+
"eval_loss": 0.04956069961190224,
|
6 |
+
"eval_precision": 0.8399232245681382,
|
7 |
+
"eval_recall": 0.8617565970854667,
|
8 |
+
"eval_runtime": 15.5432,
|
9 |
+
"eval_samples": 7354,
|
10 |
+
"eval_samples_per_second": 473.133,
|
11 |
+
"eval_steps_per_second": 59.19
|
12 |
+
}
|
predict_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_accuracy": 0.9919921133225054,
|
3 |
+
"predict_f1": 0.8561699809316262,
|
4 |
+
"predict_loss": 0.04697508364915848,
|
5 |
+
"predict_precision": 0.8359042553191489,
|
6 |
+
"predict_recall": 0.8774427694025684,
|
7 |
+
"predict_runtime": 23.0034,
|
8 |
+
"predict_samples_per_second": 471.149,
|
9 |
+
"predict_steps_per_second": 58.904
|
10 |
+
}
|
predictions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725573899.df0b2d2cc7fe.2457.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6423da8ec0617924aeaa8bb9f423f80167211d3925beb4a31c9c0f6412f1122c
|
3 |
+
size 560
|
train.log
CHANGED
@@ -1562,3 +1562,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
1562 |
{'eval_loss': 0.05025585740804672, 'eval_precision': 0.8399232245681382, 'eval_recall': 0.8617565970854667, 'eval_f1': 0.8506998444790046, 'eval_accuracy': 0.9916486929514279, 'eval_runtime': 16.4037, 'eval_samples_per_second': 448.314, 'eval_steps_per_second': 56.085, 'epoch': 10.0}
|
1563 |
{'train_runtime': 1413.3317, 'train_samples_per_second': 231.191, 'train_steps_per_second': 3.616, 'train_loss': 0.009731636565258824, 'epoch': 10.0}
|
1564 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1565 |
0%| | 0/920 [00:00<?, ?it/s]
|
1566 |
1%| | 9/920 [00:00<00:10, 84.54it/s]
|
1567 |
2%|▏ | 18/920 [00:00<00:12, 73.05it/s]
|
1568 |
3%|▎ | 26/920 [00:00<00:11, 74.96it/s]
|
1569 |
4%|▍ | 35/920 [00:00<00:11, 77.46it/s]
|
1570 |
5%|▍ | 43/920 [00:00<00:11, 77.65it/s]
|
1571 |
6%|▌ | 52/920 [00:00<00:10, 79.04it/s]
|
1572 |
7%|▋ | 60/920 [00:00<00:11, 75.68it/s]
|
1573 |
7%|▋ | 68/920 [00:00<00:11, 76.64it/s]
|
1574 |
8%|▊ | 77/920 [00:00<00:10, 78.76it/s]
|
1575 |
9%|▉ | 85/920 [00:01<00:10, 79.00it/s]
|
1576 |
10%|█ | 94/920 [00:01<00:10, 79.41it/s]
|
1577 |
11%|█ | 103/920 [00:01<00:10, 79.93it/s]
|
1578 |
12%|█▏ | 111/920 [00:01<00:10, 79.52it/s]
|
1579 |
13%|█▎ | 120/920 [00:01<00:09, 80.07it/s]
|
1580 |
14%|█▍ | 129/920 [00:01<00:09, 81.24it/s]
|
1581 |
15%|█▌ | 138/920 [00:01<00:09, 81.96it/s]
|
1582 |
16%|█▌ | 147/920 [00:01<00:09, 82.29it/s]
|
1583 |
17%|█▋ | 156/920 [00:01<00:09, 80.75it/s]
|
1584 |
18%|█▊ | 165/920 [00:02<00:09, 81.61it/s]
|
1585 |
19%|█▉ | 174/920 [00:02<00:09, 82.72it/s]
|
1586 |
20%|█▉ | 183/920 [00:02<00:08, 82.84it/s]
|
1587 |
21%|██ | 192/920 [00:02<00:08, 81.41it/s]
|
1588 |
22%|██▏ | 201/920 [00:02<00:08, 80.35it/s]
|
1589 |
23%|██▎ | 210/920 [00:02<00:09, 77.43it/s]
|
1590 |
24%|██▍ | 219/920 [00:02<00:08, 79.21it/s]
|
1591 |
25%|██▍ | 228/920 [00:02<00:08, 80.10it/s]
|
1592 |
26%|██▌ | 237/920 [00:02<00:08, 81.55it/s]
|
1593 |
27%|██▋ | 246/920 [00:03<00:08, 82.11it/s]
|
1594 |
28%|██▊ | 255/920 [00:03<00:08, 81.17it/s]
|
1595 |
29%|██▊ | 264/920 [00:03<00:08, 78.82it/s]
|
1596 |
30%|██▉ | 272/920 [00:03<00:08, 78.57it/s]
|
1597 |
30%|███ | 280/920 [00:03<00:08, 78.09it/s]
|
1598 |
31%|███▏ | 288/920 [00:03<00:08, 78.11it/s]
|
1599 |
32%|███▏ | 297/920 [00:03<00:07, 78.87it/s]
|
1600 |
33%|███▎ | 306/920 [00:03<00:07, 79.62it/s]
|
1601 |
34%|███▍ | 315/920 [00:03<00:07, 80.07it/s]
|
1602 |
35%|███▌ | 324/920 [00:04<00:07, 79.49it/s]
|
1603 |
36%|███▌ | 332/920 [00:04<00:07, 78.63it/s]
|
1604 |
37%|███▋ | 340/920 [00:04<00:07, 77.32it/s]
|
1605 |
38%|███▊ | 349/920 [00:04<00:07, 78.42it/s]
|
1606 |
39%|███▉ | 357/920 [00:04<00:07, 78.09it/s]
|
1607 |
40%|███▉ | 365/920 [00:04<00:07, 78.40it/s]
|
1608 |
41%|████ | 374/920 [00:04<00:06, 79.90it/s]
|
1609 |
42%|████▏ | 383/920 [00:04<00:06, 81.28it/s]
|
1610 |
43%|████▎ | 392/920 [00:04<00:06, 78.97it/s]
|
1611 |
43%|████▎ | 400/920 [00:05<00:06, 77.62it/s]
|
1612 |
44%|████▍ | 408/920 [00:05<00:06, 76.16it/s]
|
1613 |
45%|████▌ | 417/920 [00:05<00:06, 77.84it/s]
|
1614 |
46%|████▋ | 426/920 [00:05<00:06, 78.34it/s]
|
1615 |
47%|████▋ | 435/920 [00:05<00:06, 79.22it/s]
|
1616 |
48%|████▊ | 444/920 [00:05<00:05, 80.13it/s]
|
1617 |
49%|████▉ | 453/920 [00:05<00:05, 81.19it/s]
|
1618 |
50%|█████ | 462/920 [00:05<00:05, 81.32it/s]
|
1619 |
51%|█████ | 471/920 [00:05<00:05, 80.10it/s]
|
1620 |
52%|█████▏ | 480/920 [00:06<00:05, 81.21it/s]
|
1621 |
53%|█████▎ | 489/920 [00:06<00:05, 81.06it/s]
|
1622 |
54%|█████▍ | 498/920 [00:06<00:05, 81.04it/s]
|
1623 |
55%|█████▌ | 507/920 [00:06<00:05, 81.72it/s]
|
1624 |
56%|█████▌ | 516/920 [00:06<00:04, 81.77it/s]
|
1625 |
57%|█████▋ | 525/920 [00:06<00:04, 82.08it/s]
|
1626 |
58%|█████▊ | 534/920 [00:06<00:04, 81.28it/s]
|
1627 |
59%|█████▉ | 543/920 [00:06<00:04, 81.25it/s]
|
1628 |
60%|██████ | 552/920 [00:06<00:04, 78.20it/s]
|
1629 |
61%|██████ | 560/920 [00:07<00:04, 75.62it/s]
|
1630 |
62%|██████▏ | 568/920 [00:07<00:04, 76.50it/s]
|
1631 |
63%|██████▎ | 577/920 [00:07<00:04, 78.56it/s]
|
1632 |
64%|██████▎ | 585/920 [00:07<00:04, 77.87it/s]
|
1633 |
65%|██████▍ | 594/920 [00:07<00:04, 79.73it/s]
|
1634 |
66%|██████▌ | 603/920 [00:07<00:03, 81.43it/s]
|
1635 |
67%|██████▋ | 612/920 [00:07<00:03, 81.46it/s]
|
1636 |
68%|██████▊ | 621/920 [00:07<00:03, 80.50it/s]
|
1637 |
68%|██████▊ | 630/920 [00:07<00:03, 79.92it/s]
|
1638 |
69%|██████▉ | 639/920 [00:08<00:03, 80.53it/s]
|
1639 |
70%|███████ | 648/920 [00:08<00:03, 81.62it/s]
|
1640 |
71%|███████▏ | 657/920 [00:08<00:03, 81.38it/s]
|
1641 |
72%|███████▏ | 666/920 [00:08<00:03, 81.64it/s]
|
1642 |
73%|███████▎ | 675/920 [00:08<00:02, 82.37it/s]
|
1643 |
74%|███████▍ | 684/920 [00:08<00:02, 83.02it/s]
|
1644 |
75%|███████▌ | 693/920 [00:08<00:02, 81.40it/s]
|
1645 |
76%|███████▋ | 702/920 [00:08<00:02, 81.01it/s]
|
1646 |
77%|███████▋ | 711/920 [00:08<00:02, 80.04it/s]
|
1647 |
78%|███████▊ | 720/920 [00:09<00:02, 81.29it/s]
|
1648 |
79%|███████▉ | 729/920 [00:09<00:02, 82.40it/s]
|
1649 |
80%|████████ | 738/920 [00:09<00:02, 81.32it/s]
|
1650 |
81%|████████ | 747/920 [00:09<00:02, 78.84it/s]
|
1651 |
82%|████████▏ | 756/920 [00:09<00:02, 79.45it/s]
|
1652 |
83%|████████▎ | 765/920 [00:09<00:01, 80.42it/s]
|
1653 |
84%|████████▍ | 774/920 [00:09<00:01, 81.22it/s]
|
1654 |
85%|████████▌ | 783/920 [00:09<00:01, 80.34it/s]
|
1655 |
86%|████████▌ | 792/920 [00:09<00:01, 79.82it/s]
|
1656 |
87%|████████▋ | 800/920 [00:10<00:01, 79.49it/s]
|
1657 |
88%|████████▊ | 808/920 [00:10<00:01, 78.30it/s]
|
1658 |
89%|████████▉ | 817/920 [00:10<00:01, 79.52it/s]
|
1659 |
90%|████████▉ | 826/920 [00:10<00:01, 80.37it/s]
|
1660 |
91%|█████████ | 835/920 [00:10<00:01, 81.87it/s]
|
1661 |
92%|█████████▏| 844/920 [00:10<00:00, 80.79it/s]
|
1662 |
93%|█████████▎| 853/920 [00:10<00:00, 76.52it/s]
|
1663 |
94%|█████████▎| 861/920 [00:10<00:00, 75.56it/s]
|
1664 |
95%|█████████▍| 870/920 [00:10<00:00, 77.00it/s]
|
1665 |
96%|█████████▌| 879/920 [00:11<00:00, 78.67it/s]
|
1666 |
97%|█████████▋| 888/920 [00:11<00:00, 79.25it/s]
|
1667 |
97%|█████████▋| 896/920 [00:11<00:00, 76.28it/s]
|
1668 |
98%|█████████▊| 905/920 [00:11<00:00, 77.41it/s]
|
1669 |
99%|█████████▉| 914/920 [00:11<00:00, 79.26it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1670 |
0%| | 0/1355 [00:00<?, ?it/s]
|
1671 |
1%| | 9/1355 [00:00<00:15, 89.65it/s]
|
1672 |
1%|▏ | 18/1355 [00:00<00:16, 78.98it/s]
|
1673 |
2%|▏ | 27/1355 [00:00<00:16, 80.91it/s]
|
1674 |
3%|▎ | 36/1355 [00:00<00:16, 80.26it/s]
|
1675 |
3%|▎ | 45/1355 [00:00<00:16, 79.74it/s]
|
1676 |
4%|▍ | 53/1355 [00:00<00:17, 76.43it/s]
|
1677 |
5%|▍ | 61/1355 [00:00<00:16, 76.79it/s]
|
1678 |
5%|▌ | 69/1355 [00:00<00:16, 77.27it/s]
|
1679 |
6%|▌ | 78/1355 [00:00<00:16, 78.60it/s]
|
1680 |
6%|▋ | 87/1355 [00:01<00:15, 79.45it/s]
|
1681 |
7%|▋ | 95/1355 [00:01<00:15, 79.17it/s]
|
1682 |
8%|▊ | 103/1355 [00:01<00:15, 79.17it/s]
|
1683 |
8%|▊ | 111/1355 [00:01<00:16, 77.62it/s]
|
1684 |
9%|▉ | 119/1355 [00:01<00:16, 76.95it/s]
|
1685 |
9%|▉ | 128/1355 [00:01<00:15, 77.99it/s]
|
1686 |
10%|█ | 137/1355 [00:01<00:15, 78.88it/s]
|
1687 |
11%|█ | 145/1355 [00:01<00:15, 76.54it/s]
|
1688 |
11%|█▏ | 153/1355 [00:01<00:16, 74.41it/s]
|
1689 |
12%|█▏ | 161/1355 [00:02<00:17, 70.18it/s]
|
1690 |
12%|█▏ | 169/1355 [00:02<00:16, 71.76it/s]
|
1691 |
13%|█▎ | 178/1355 [00:02<00:15, 74.18it/s]
|
1692 |
14%|█▍ | 187/1355 [00:02<00:15, 76.84it/s]
|
1693 |
14%|█▍ | 196/1355 [00:02<00:14, 78.67it/s]
|
1694 |
15%|█▌ | 205/1355 [00:02<00:14, 79.95it/s]
|
1695 |
16%|█▌ | 214/1355 [00:02<00:14, 81.03it/s]
|
1696 |
16%|█▋ | 223/1355 [00:02<00:14, 79.30it/s]
|
1697 |
17%|█▋ | 232/1355 [00:02<00:14, 79.76it/s]
|
1698 |
18%|█▊ | 240/1355 [00:03<00:14, 78.82it/s]
|
1699 |
18%|█▊ | 249/1355 [00:03<00:13, 79.55it/s]
|
1700 |
19%|█▉ | 258/1355 [00:03<00:13, 80.77it/s]
|
1701 |
20%|█▉ | 267/1355 [00:03<00:13, 82.26it/s]
|
1702 |
20%|██ | 276/1355 [00:03<00:12, 83.16it/s]
|
1703 |
21%|██ | 285/1355 [00:03<00:13, 81.80it/s]
|
1704 |
22%|██▏ | 294/1355 [00:03<00:13, 81.16it/s]
|
1705 |
22%|██▏ | 303/1355 [00:03<00:12, 82.02it/s]
|
1706 |
23%|██▎ | 312/1355 [00:03<00:12, 83.08it/s]
|
1707 |
24%|██▎ | 321/1355 [00:04<00:12, 83.67it/s]
|
1708 |
24%|██▍ | 330/1355 [00:04<00:12, 84.72it/s]
|
1709 |
25%|██▌ | 339/1355 [00:04<00:11, 85.11it/s]
|
1710 |
26%|██▌ | 348/1355 [00:04<00:11, 85.47it/s]
|
1711 |
26%|██▋ | 357/1355 [00:04<00:11, 85.91it/s]
|
1712 |
27%|██▋ | 366/1355 [00:04<00:11, 86.27it/s]
|
1713 |
28%|██▊ | 375/1355 [00:04<00:11, 85.79it/s]
|
1714 |
28%|██▊ | 384/1355 [00:04<00:11, 85.61it/s]
|
1715 |
29%|██▉ | 393/1355 [00:04<00:11, 85.43it/s]
|
1716 |
30%|██▉ | 402/1355 [00:04<00:11, 85.11it/s]
|
1717 |
30%|███ | 411/1355 [00:05<00:11, 85.66it/s]
|
1718 |
31%|███ | 420/1355 [00:05<00:11, 84.67it/s]
|
1719 |
32%|███▏ | 429/1355 [00:05<00:10, 85.02it/s]
|
1720 |
32%|███▏ | 438/1355 [00:05<00:10, 84.20it/s]
|
1721 |
33%|███▎ | 447/1355 [00:05<00:10, 84.53it/s]
|
1722 |
34%|███▎ | 456/1355 [00:05<00:11, 81.47it/s]
|
1723 |
34%|███▍ | 465/1355 [00:05<00:10, 81.24it/s]
|
1724 |
35%|███▍ | 474/1355 [00:05<00:10, 81.95it/s]
|
1725 |
36%|███▌ | 483/1355 [00:05<00:10, 82.64it/s]
|
1726 |
36%|███▋ | 492/1355 [00:06<00:10, 83.13it/s]
|
1727 |
37%|███▋ | 501/1355 [00:06<00:10, 82.28it/s]
|
1728 |
38%|███▊ | 510/1355 [00:06<00:10, 79.36it/s]
|
1729 |
38%|███▊ | 518/1355 [00:06<00:10, 78.05it/s]
|
1730 |
39%|███▉ | 527/1355 [00:06<00:10, 79.16it/s]
|
1731 |
40%|███▉ | 536/1355 [00:06<00:10, 79.94it/s]
|
1732 |
40%|████ | 545/1355 [00:06<00:10, 78.66it/s]
|
1733 |
41%|████ | 554/1355 [00:06<00:10, 80.00it/s]
|
1734 |
42%|████▏ | 563/1355 [00:06<00:09, 81.81it/s]
|
1735 |
42%|████▏ | 572/1355 [00:07<00:09, 82.19it/s]
|
1736 |
43%|████▎ | 581/1355 [00:07<00:09, 80.37it/s]
|
1737 |
44%|████▎ | 590/1355 [00:07<00:09, 81.18it/s]
|
1738 |
44%|████▍ | 599/1355 [00:07<00:09, 80.25it/s]
|
1739 |
45%|████▍ | 608/1355 [00:07<00:09, 77.59it/s]
|
1740 |
46%|████▌ | 617/1355 [00:07<00:09, 79.00it/s]
|
1741 |
46%|████▌ | 625/1355 [00:07<00:09, 78.08it/s]
|
1742 |
47%|████▋ | 633/1355 [00:07<00:09, 78.24it/s]
|
1743 |
47%|████▋ | 642/1355 [00:07<00:08, 79.32it/s]
|
1744 |
48%|████▊ | 651/1355 [00:08<00:08, 80.44it/s]
|
1745 |
49%|████▊ | 660/1355 [00:08<00:08, 82.22it/s]
|
1746 |
49%|████▉ | 669/1355 [00:08<00:09, 76.21it/s]
|
1747 |
50%|█████ | 678/1355 [00:08<00:08, 77.55it/s]
|
1748 |
51%|█████ | 687/1355 [00:08<00:08, 78.58it/s]
|
1749 |
51%|█████▏ | 696/1355 [00:08<00:08, 79.87it/s]
|
1750 |
52%|█████▏ | 705/1355 [00:08<00:08, 78.11it/s]
|
1751 |
53%|█████▎ | 714/1355 [00:08<00:07, 80.50it/s]
|
1752 |
53%|█████▎ | 723/1355 [00:08<00:07, 80.12it/s]
|
1753 |
54%|█████▍ | 732/1355 [00:09<00:07, 80.88it/s]
|
1754 |
55%|█████▍ | 741/1355 [00:09<00:07, 78.39it/s]
|
1755 |
55%|█████▌ | 750/1355 [00:09<00:07, 79.64it/s]
|
1756 |
56%|█████▌ | 758/1355 [00:09<00:07, 78.30it/s]
|
1757 |
57%|█████▋ | 766/1355 [00:09<00:07, 78.43it/s]
|
1758 |
57%|█████▋ | 775/1355 [00:09<00:07, 81.16it/s]
|
1759 |
58%|█████▊ | 784/1355 [00:09<00:07, 76.55it/s]
|
1760 |
59%|█████▊ | 793/1355 [00:09<00:07, 78.18it/s]
|
1761 |
59%|█████▉ | 801/1355 [00:09<00:07, 78.59it/s]
|
1762 |
60%|█████▉ | 810/1355 [00:10<00:06, 81.02it/s]
|
1763 |
60%|██████ | 819/1355 [00:10<00:06, 81.88it/s]
|
1764 |
61%|██████ | 828/1355 [00:10<00:06, 81.33it/s]
|
1765 |
62%|██████▏ | 837/1355 [00:10<00:06, 80.79it/s]
|
1766 |
62%|██████▏ | 846/1355 [00:10<00:06, 80.11it/s]
|
1767 |
63%|██████▎ | 855/1355 [00:10<00:06, 80.41it/s]
|
1768 |
64%|██████▍ | 864/1355 [00:10<00:06, 81.38it/s]
|
1769 |
64%|██████▍ | 873/1355 [00:10<00:05, 82.00it/s]
|
1770 |
65%|██████▌ | 882/1355 [00:10<00:05, 82.54it/s]
|
1771 |
66%|██████▌ | 891/1355 [00:11<00:05, 82.98it/s]
|
1772 |
66%|██████▋ | 900/1355 [00:11<00:05, 82.84it/s]
|
1773 |
67%|██████▋ | 909/1355 [00:11<00:05, 83.28it/s]
|
1774 |
68%|██████▊ | 918/1355 [00:11<00:05, 83.30it/s]
|
1775 |
68%|██████▊ | 927/1355 [00:11<00:05, 80.41it/s]
|
1776 |
69%|██████▉ | 936/1355 [00:11<00:05, 79.47it/s]
|
1777 |
70%|██████▉ | 945/1355 [00:11<00:05, 80.81it/s]
|
1778 |
70%|███████ | 954/1355 [00:11<00:04, 81.93it/s]
|
1779 |
71%|███████ | 963/1355 [00:11<00:04, 80.07it/s]
|
1780 |
72%|███████▏ | 972/1355 [00:12<00:05, 76.41it/s]
|
1781 |
72%|███████▏ | 980/1355 [00:12<00:04, 77.20it/s]
|
1782 |
73%|███████▎ | 988/1355 [00:12<00:04, 77.77it/s]
|
1783 |
74%|███████▎ | 996/1355 [00:12<00:04, 78.35it/s]
|
1784 |
74%|███████▍ | 1005/1355 [00:12<00:04, 80.44it/s]
|
1785 |
75%|███████▍ | 1014/1355 [00:12<00:04, 79.12it/s]
|
1786 |
75%|███████▌ | 1022/1355 [00:12<00:04, 78.30it/s]
|
1787 |
76%|███████▌ | 1030/1355 [00:12<00:04, 78.23it/s]
|
1788 |
77%|███████▋ | 1039/1355 [00:12<00:03, 79.57it/s]
|
1789 |
77%|███████▋ | 1048/1355 [00:13<00:03, 81.05it/s]
|
1790 |
78%|███████▊ | 1057/1355 [00:13<00:03, 79.79it/s]
|
1791 |
79%|███████▊ | 1066/1355 [00:13<00:03, 81.05it/s]
|
1792 |
79%|███████▉ | 1075/1355 [00:13<00:03, 81.44it/s]
|
1793 |
80%|████████ | 1084/1355 [00:13<00:03, 80.30it/s]
|
1794 |
81%|████████ | 1093/1355 [00:13<00:03, 80.00it/s]
|
1795 |
81%|████████▏ | 1102/1355 [00:13<00:03, 80.32it/s]
|
1796 |
82%|████████▏ | 1111/1355 [00:13<00:03, 76.79it/s]
|
1797 |
83%|████████▎ | 1119/1355 [00:13<00:03, 76.96it/s]
|
1798 |
83%|████████▎ | 1127/1355 [00:14<00:02, 76.68it/s]
|
1799 |
84%|████████▍ | 1136/1355 [00:14<00:02, 76.22it/s]
|
1800 |
85%|████████▍ | 1145/1355 [00:14<00:02, 78.00it/s]
|
1801 |
85%|████████▌ | 1153/1355 [00:14<00:02, 77.95it/s]
|
1802 |
86%|████████▌ | 1161/1355 [00:14<00:02, 76.16it/s]
|
1803 |
86%|████████▋ | 1169/1355 [00:14<00:02, 76.13it/s]
|
1804 |
87%|████████▋ | 1177/1355 [00:14<00:02, 77.20it/s]
|
1805 |
88%|████████▊ | 1186/1355 [00:14<00:02, 77.96it/s]
|
1806 |
88%|████████▊ | 1195/1355 [00:14<00:02, 78.39it/s]
|
1807 |
89%|████████▉ | 1203/1355 [00:15<00:01, 78.18it/s]
|
1808 |
89%|████████▉ | 1212/1355 [00:15<00:01, 79.03it/s]
|
1809 |
90%|█████████ | 1220/1355 [00:15<00:01, 76.62it/s]
|
1810 |
91%|█████████ | 1229/1355 [00:15<00:01, 77.65it/s]
|
1811 |
91%|█████████▏| 1237/1355 [00:15<00:01, 77.55it/s]
|
1812 |
92%|█████████▏| 1245/1355 [00:15<00:01, 77.78it/s]
|
1813 |
92%|█████████▏| 1253/1355 [00:15<00:01, 78.26it/s]
|
1814 |
93%|█████████▎| 1262/1355 [00:15<00:01, 79.17it/s]
|
1815 |
94%|█████████▎| 1270/1355 [00:15<00:01, 79.22it/s]
|
1816 |
94%|█████████▍| 1279/1355 [00:16<00:00, 80.05it/s]
|
1817 |
95%|█████████▌| 1288/1355 [00:16<00:00, 81.28it/s]
|
1818 |
96%|█████████▌| 1297/1355 [00:16<00:00, 80.80it/s]
|
1819 |
96%|█████████▋| 1306/1355 [00:16<00:00, 79.04it/s]
|
1820 |
97%|█████████▋| 1314/1355 [00:16<00:00, 79.26it/s]
|
1821 |
98%|█████████▊| 1322/1355 [00:16<00:00, 79.15it/s]
|
1822 |
98%|█████████▊| 1330/1355 [00:16<00:00, 79.26it/s]
|
1823 |
99%|█████████▊| 1338/1355 [00:16<00:00, 76.91it/s]
|
1824 |
99%|█████████▉| 1346/1355 [00:16<00:00, 77.65it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1562 |
{'eval_loss': 0.05025585740804672, 'eval_precision': 0.8399232245681382, 'eval_recall': 0.8617565970854667, 'eval_f1': 0.8506998444790046, 'eval_accuracy': 0.9916486929514279, 'eval_runtime': 16.4037, 'eval_samples_per_second': 448.314, 'eval_steps_per_second': 56.085, 'epoch': 10.0}
|
1563 |
{'train_runtime': 1413.3317, 'train_samples_per_second': 231.191, 'train_steps_per_second': 3.616, 'train_loss': 0.009731636565258824, 'epoch': 10.0}
|
1564 |
|
1565 |
+
***** train metrics *****
|
1566 |
+
epoch = 10.0
|
1567 |
+
total_flos = 14314298GF
|
1568 |
+
train_loss = 0.0097
|
1569 |
+
train_runtime = 0:23:33.33
|
1570 |
+
train_samples = 32675
|
1571 |
+
train_samples_per_second = 231.191
|
1572 |
+
train_steps_per_second = 3.616
|
1573 |
+
09/05/2024 22:04:44 - INFO - __main__ - *** Evaluate ***
|
1574 |
+
[INFO|trainer.py:811] 2024-09-05 22:04:44,308 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
1575 |
+
[INFO|trainer.py:3819] 2024-09-05 22:04:44,311 >>
|
1576 |
+
***** Running Evaluation *****
|
1577 |
+
[INFO|trainer.py:3821] 2024-09-05 22:04:44,311 >> Num examples = 7354
|
1578 |
+
[INFO|trainer.py:3824] 2024-09-05 22:04:44,311 >> Batch size = 8
|
1579 |
+
|
1580 |
0%| | 0/920 [00:00<?, ?it/s]
|
1581 |
1%| | 9/920 [00:00<00:10, 84.54it/s]
|
1582 |
2%|▏ | 18/920 [00:00<00:12, 73.05it/s]
|
1583 |
3%|▎ | 26/920 [00:00<00:11, 74.96it/s]
|
1584 |
4%|▍ | 35/920 [00:00<00:11, 77.46it/s]
|
1585 |
5%|▍ | 43/920 [00:00<00:11, 77.65it/s]
|
1586 |
6%|▌ | 52/920 [00:00<00:10, 79.04it/s]
|
1587 |
7%|▋ | 60/920 [00:00<00:11, 75.68it/s]
|
1588 |
7%|▋ | 68/920 [00:00<00:11, 76.64it/s]
|
1589 |
8%|▊ | 77/920 [00:00<00:10, 78.76it/s]
|
1590 |
9%|▉ | 85/920 [00:01<00:10, 79.00it/s]
|
1591 |
10%|█ | 94/920 [00:01<00:10, 79.41it/s]
|
1592 |
11%|█ | 103/920 [00:01<00:10, 79.93it/s]
|
1593 |
12%|█▏ | 111/920 [00:01<00:10, 79.52it/s]
|
1594 |
13%|█▎ | 120/920 [00:01<00:09, 80.07it/s]
|
1595 |
14%|█▍ | 129/920 [00:01<00:09, 81.24it/s]
|
1596 |
15%|█▌ | 138/920 [00:01<00:09, 81.96it/s]
|
1597 |
16%|█▌ | 147/920 [00:01<00:09, 82.29it/s]
|
1598 |
17%|█▋ | 156/920 [00:01<00:09, 80.75it/s]
|
1599 |
18%|█▊ | 165/920 [00:02<00:09, 81.61it/s]
|
1600 |
19%|█▉ | 174/920 [00:02<00:09, 82.72it/s]
|
1601 |
20%|█▉ | 183/920 [00:02<00:08, 82.84it/s]
|
1602 |
21%|██ | 192/920 [00:02<00:08, 81.41it/s]
|
1603 |
22%|██▏ | 201/920 [00:02<00:08, 80.35it/s]
|
1604 |
23%|██▎ | 210/920 [00:02<00:09, 77.43it/s]
|
1605 |
24%|██▍ | 219/920 [00:02<00:08, 79.21it/s]
|
1606 |
25%|██▍ | 228/920 [00:02<00:08, 80.10it/s]
|
1607 |
26%|██▌ | 237/920 [00:02<00:08, 81.55it/s]
|
1608 |
27%|██▋ | 246/920 [00:03<00:08, 82.11it/s]
|
1609 |
28%|██▊ | 255/920 [00:03<00:08, 81.17it/s]
|
1610 |
29%|██▊ | 264/920 [00:03<00:08, 78.82it/s]
|
1611 |
30%|██▉ | 272/920 [00:03<00:08, 78.57it/s]
|
1612 |
30%|███ | 280/920 [00:03<00:08, 78.09it/s]
|
1613 |
31%|███▏ | 288/920 [00:03<00:08, 78.11it/s]
|
1614 |
32%|███▏ | 297/920 [00:03<00:07, 78.87it/s]
|
1615 |
33%|███▎ | 306/920 [00:03<00:07, 79.62it/s]
|
1616 |
34%|███▍ | 315/920 [00:03<00:07, 80.07it/s]
|
1617 |
35%|███▌ | 324/920 [00:04<00:07, 79.49it/s]
|
1618 |
36%|███▌ | 332/920 [00:04<00:07, 78.63it/s]
|
1619 |
37%|███▋ | 340/920 [00:04<00:07, 77.32it/s]
|
1620 |
38%|███▊ | 349/920 [00:04<00:07, 78.42it/s]
|
1621 |
39%|███▉ | 357/920 [00:04<00:07, 78.09it/s]
|
1622 |
40%|███▉ | 365/920 [00:04<00:07, 78.40it/s]
|
1623 |
41%|████ | 374/920 [00:04<00:06, 79.90it/s]
|
1624 |
42%|████▏ | 383/920 [00:04<00:06, 81.28it/s]
|
1625 |
43%|████▎ | 392/920 [00:04<00:06, 78.97it/s]
|
1626 |
43%|████▎ | 400/920 [00:05<00:06, 77.62it/s]
|
1627 |
44%|████▍ | 408/920 [00:05<00:06, 76.16it/s]
|
1628 |
45%|████▌ | 417/920 [00:05<00:06, 77.84it/s]
|
1629 |
46%|████▋ | 426/920 [00:05<00:06, 78.34it/s]
|
1630 |
47%|████▋ | 435/920 [00:05<00:06, 79.22it/s]
|
1631 |
48%|████▊ | 444/920 [00:05<00:05, 80.13it/s]
|
1632 |
49%|████▉ | 453/920 [00:05<00:05, 81.19it/s]
|
1633 |
50%|█████ | 462/920 [00:05<00:05, 81.32it/s]
|
1634 |
51%|█████ | 471/920 [00:05<00:05, 80.10it/s]
|
1635 |
52%|█████▏ | 480/920 [00:06<00:05, 81.21it/s]
|
1636 |
53%|█████▎ | 489/920 [00:06<00:05, 81.06it/s]
|
1637 |
54%|█████▍ | 498/920 [00:06<00:05, 81.04it/s]
|
1638 |
55%|█████▌ | 507/920 [00:06<00:05, 81.72it/s]
|
1639 |
56%|█████▌ | 516/920 [00:06<00:04, 81.77it/s]
|
1640 |
57%|█████▋ | 525/920 [00:06<00:04, 82.08it/s]
|
1641 |
58%|█████▊ | 534/920 [00:06<00:04, 81.28it/s]
|
1642 |
59%|█████▉ | 543/920 [00:06<00:04, 81.25it/s]
|
1643 |
60%|██████ | 552/920 [00:06<00:04, 78.20it/s]
|
1644 |
61%|██████ | 560/920 [00:07<00:04, 75.62it/s]
|
1645 |
62%|██████▏ | 568/920 [00:07<00:04, 76.50it/s]
|
1646 |
63%|██████▎ | 577/920 [00:07<00:04, 78.56it/s]
|
1647 |
64%|██████▎ | 585/920 [00:07<00:04, 77.87it/s]
|
1648 |
65%|██████▍ | 594/920 [00:07<00:04, 79.73it/s]
|
1649 |
66%|██████▌ | 603/920 [00:07<00:03, 81.43it/s]
|
1650 |
67%|██████▋ | 612/920 [00:07<00:03, 81.46it/s]
|
1651 |
68%|██████▊ | 621/920 [00:07<00:03, 80.50it/s]
|
1652 |
68%|██████▊ | 630/920 [00:07<00:03, 79.92it/s]
|
1653 |
69%|██████▉ | 639/920 [00:08<00:03, 80.53it/s]
|
1654 |
70%|███████ | 648/920 [00:08<00:03, 81.62it/s]
|
1655 |
71%|███████▏ | 657/920 [00:08<00:03, 81.38it/s]
|
1656 |
72%|███████▏ | 666/920 [00:08<00:03, 81.64it/s]
|
1657 |
73%|███████▎ | 675/920 [00:08<00:02, 82.37it/s]
|
1658 |
74%|███████▍ | 684/920 [00:08<00:02, 83.02it/s]
|
1659 |
75%|███████▌ | 693/920 [00:08<00:02, 81.40it/s]
|
1660 |
76%|███████▋ | 702/920 [00:08<00:02, 81.01it/s]
|
1661 |
77%|███████▋ | 711/920 [00:08<00:02, 80.04it/s]
|
1662 |
78%|███████▊ | 720/920 [00:09<00:02, 81.29it/s]
|
1663 |
79%|███████▉ | 729/920 [00:09<00:02, 82.40it/s]
|
1664 |
80%|████████ | 738/920 [00:09<00:02, 81.32it/s]
|
1665 |
81%|████████ | 747/920 [00:09<00:02, 78.84it/s]
|
1666 |
82%|████████▏ | 756/920 [00:09<00:02, 79.45it/s]
|
1667 |
83%|████████▎ | 765/920 [00:09<00:01, 80.42it/s]
|
1668 |
84%|████████▍ | 774/920 [00:09<00:01, 81.22it/s]
|
1669 |
85%|████████▌ | 783/920 [00:09<00:01, 80.34it/s]
|
1670 |
86%|████████▌ | 792/920 [00:09<00:01, 79.82it/s]
|
1671 |
87%|████████▋ | 800/920 [00:10<00:01, 79.49it/s]
|
1672 |
88%|████████▊ | 808/920 [00:10<00:01, 78.30it/s]
|
1673 |
89%|████████▉ | 817/920 [00:10<00:01, 79.52it/s]
|
1674 |
90%|████████▉ | 826/920 [00:10<00:01, 80.37it/s]
|
1675 |
91%|█████████ | 835/920 [00:10<00:01, 81.87it/s]
|
1676 |
92%|█████████▏| 844/920 [00:10<00:00, 80.79it/s]
|
1677 |
93%|█████████▎| 853/920 [00:10<00:00, 76.52it/s]
|
1678 |
94%|█████████▎| 861/920 [00:10<00:00, 75.56it/s]
|
1679 |
95%|█████████▍| 870/920 [00:10<00:00, 77.00it/s]
|
1680 |
96%|█████████▌| 879/920 [00:11<00:00, 78.67it/s]
|
1681 |
97%|█████████▋| 888/920 [00:11<00:00, 79.25it/s]
|
1682 |
97%|█████████▋| 896/920 [00:11<00:00, 76.28it/s]
|
1683 |
98%|█████████▊| 905/920 [00:11<00:00, 77.41it/s]
|
1684 |
99%|█████████▉| 914/920 [00:11<00:00, 79.26it/s]
|
1685 |
+
***** eval metrics *****
|
1686 |
+
epoch = 10.0
|
1687 |
+
eval_accuracy = 0.9917
|
1688 |
+
eval_f1 = 0.8507
|
1689 |
+
eval_loss = 0.0496
|
1690 |
+
eval_precision = 0.8399
|
1691 |
+
eval_recall = 0.8618
|
1692 |
+
eval_runtime = 0:00:15.54
|
1693 |
+
eval_samples = 7354
|
1694 |
+
eval_samples_per_second = 473.133
|
1695 |
+
eval_steps_per_second = 59.19
|
1696 |
+
09/05/2024 22:04:59 - INFO - __main__ - *** Predict ***
|
1697 |
+
[INFO|trainer.py:811] 2024-09-05 22:04:59,857 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
1698 |
+
[INFO|trainer.py:3819] 2024-09-05 22:04:59,859 >>
|
1699 |
+
***** Running Prediction *****
|
1700 |
+
[INFO|trainer.py:3821] 2024-09-05 22:04:59,859 >> Num examples = 10838
|
1701 |
+
[INFO|trainer.py:3824] 2024-09-05 22:04:59,859 >> Batch size = 8
|
1702 |
+
|
1703 |
0%| | 0/1355 [00:00<?, ?it/s]
|
1704 |
1%| | 9/1355 [00:00<00:15, 89.65it/s]
|
1705 |
1%|▏ | 18/1355 [00:00<00:16, 78.98it/s]
|
1706 |
2%|▏ | 27/1355 [00:00<00:16, 80.91it/s]
|
1707 |
3%|▎ | 36/1355 [00:00<00:16, 80.26it/s]
|
1708 |
3%|▎ | 45/1355 [00:00<00:16, 79.74it/s]
|
1709 |
4%|▍ | 53/1355 [00:00<00:17, 76.43it/s]
|
1710 |
5%|▍ | 61/1355 [00:00<00:16, 76.79it/s]
|
1711 |
5%|▌ | 69/1355 [00:00<00:16, 77.27it/s]
|
1712 |
6%|▌ | 78/1355 [00:00<00:16, 78.60it/s]
|
1713 |
6%|▋ | 87/1355 [00:01<00:15, 79.45it/s]
|
1714 |
7%|▋ | 95/1355 [00:01<00:15, 79.17it/s]
|
1715 |
8%|▊ | 103/1355 [00:01<00:15, 79.17it/s]
|
1716 |
8%|▊ | 111/1355 [00:01<00:16, 77.62it/s]
|
1717 |
9%|▉ | 119/1355 [00:01<00:16, 76.95it/s]
|
1718 |
9%|▉ | 128/1355 [00:01<00:15, 77.99it/s]
|
1719 |
10%|█ | 137/1355 [00:01<00:15, 78.88it/s]
|
1720 |
11%|█ | 145/1355 [00:01<00:15, 76.54it/s]
|
1721 |
11%|█▏ | 153/1355 [00:01<00:16, 74.41it/s]
|
1722 |
12%|█▏ | 161/1355 [00:02<00:17, 70.18it/s]
|
1723 |
12%|█▏ | 169/1355 [00:02<00:16, 71.76it/s]
|
1724 |
13%|█▎ | 178/1355 [00:02<00:15, 74.18it/s]
|
1725 |
14%|█▍ | 187/1355 [00:02<00:15, 76.84it/s]
|
1726 |
14%|█▍ | 196/1355 [00:02<00:14, 78.67it/s]
|
1727 |
15%|█▌ | 205/1355 [00:02<00:14, 79.95it/s]
|
1728 |
16%|█▌ | 214/1355 [00:02<00:14, 81.03it/s]
|
1729 |
16%|█▋ | 223/1355 [00:02<00:14, 79.30it/s]
|
1730 |
17%|█▋ | 232/1355 [00:02<00:14, 79.76it/s]
|
1731 |
18%|█▊ | 240/1355 [00:03<00:14, 78.82it/s]
|
1732 |
18%|█▊ | 249/1355 [00:03<00:13, 79.55it/s]
|
1733 |
19%|█▉ | 258/1355 [00:03<00:13, 80.77it/s]
|
1734 |
20%|█▉ | 267/1355 [00:03<00:13, 82.26it/s]
|
1735 |
20%|██ | 276/1355 [00:03<00:12, 83.16it/s]
|
1736 |
21%|██ | 285/1355 [00:03<00:13, 81.80it/s]
|
1737 |
22%|██▏ | 294/1355 [00:03<00:13, 81.16it/s]
|
1738 |
22%|██▏ | 303/1355 [00:03<00:12, 82.02it/s]
|
1739 |
23%|██▎ | 312/1355 [00:03<00:12, 83.08it/s]
|
1740 |
24%|██▎ | 321/1355 [00:04<00:12, 83.67it/s]
|
1741 |
24%|██▍ | 330/1355 [00:04<00:12, 84.72it/s]
|
1742 |
25%|██▌ | 339/1355 [00:04<00:11, 85.11it/s]
|
1743 |
26%|██▌ | 348/1355 [00:04<00:11, 85.47it/s]
|
1744 |
26%|██▋ | 357/1355 [00:04<00:11, 85.91it/s]
|
1745 |
27%|██▋ | 366/1355 [00:04<00:11, 86.27it/s]
|
1746 |
28%|██▊ | 375/1355 [00:04<00:11, 85.79it/s]
|
1747 |
28%|██▊ | 384/1355 [00:04<00:11, 85.61it/s]
|
1748 |
29%|██▉ | 393/1355 [00:04<00:11, 85.43it/s]
|
1749 |
30%|██▉ | 402/1355 [00:04<00:11, 85.11it/s]
|
1750 |
30%|███ | 411/1355 [00:05<00:11, 85.66it/s]
|
1751 |
31%|███ | 420/1355 [00:05<00:11, 84.67it/s]
|
1752 |
32%|███▏ | 429/1355 [00:05<00:10, 85.02it/s]
|
1753 |
32%|███▏ | 438/1355 [00:05<00:10, 84.20it/s]
|
1754 |
33%|███▎ | 447/1355 [00:05<00:10, 84.53it/s]
|
1755 |
34%|███▎ | 456/1355 [00:05<00:11, 81.47it/s]
|
1756 |
34%|███▍ | 465/1355 [00:05<00:10, 81.24it/s]
|
1757 |
35%|███▍ | 474/1355 [00:05<00:10, 81.95it/s]
|
1758 |
36%|███▌ | 483/1355 [00:05<00:10, 82.64it/s]
|
1759 |
36%|███▋ | 492/1355 [00:06<00:10, 83.13it/s]
|
1760 |
37%|███▋ | 501/1355 [00:06<00:10, 82.28it/s]
|
1761 |
38%|███▊ | 510/1355 [00:06<00:10, 79.36it/s]
|
1762 |
38%|███▊ | 518/1355 [00:06<00:10, 78.05it/s]
|
1763 |
39%|███▉ | 527/1355 [00:06<00:10, 79.16it/s]
|
1764 |
40%|███▉ | 536/1355 [00:06<00:10, 79.94it/s]
|
1765 |
40%|████ | 545/1355 [00:06<00:10, 78.66it/s]
|
1766 |
41%|████ | 554/1355 [00:06<00:10, 80.00it/s]
|
1767 |
42%|████▏ | 563/1355 [00:06<00:09, 81.81it/s]
|
1768 |
42%|████▏ | 572/1355 [00:07<00:09, 82.19it/s]
|
1769 |
43%|████▎ | 581/1355 [00:07<00:09, 80.37it/s]
|
1770 |
44%|████▎ | 590/1355 [00:07<00:09, 81.18it/s]
|
1771 |
44%|████▍ | 599/1355 [00:07<00:09, 80.25it/s]
|
1772 |
45%|████▍ | 608/1355 [00:07<00:09, 77.59it/s]
|
1773 |
46%|████▌ | 617/1355 [00:07<00:09, 79.00it/s]
|
1774 |
46%|████▌ | 625/1355 [00:07<00:09, 78.08it/s]
|
1775 |
47%|████▋ | 633/1355 [00:07<00:09, 78.24it/s]
|
1776 |
47%|████▋ | 642/1355 [00:07<00:08, 79.32it/s]
|
1777 |
48%|████▊ | 651/1355 [00:08<00:08, 80.44it/s]
|
1778 |
49%|████▊ | 660/1355 [00:08<00:08, 82.22it/s]
|
1779 |
49%|████▉ | 669/1355 [00:08<00:09, 76.21it/s]
|
1780 |
50%|█████ | 678/1355 [00:08<00:08, 77.55it/s]
|
1781 |
51%|█████ | 687/1355 [00:08<00:08, 78.58it/s]
|
1782 |
51%|█████▏ | 696/1355 [00:08<00:08, 79.87it/s]
|
1783 |
52%|█████▏ | 705/1355 [00:08<00:08, 78.11it/s]
|
1784 |
53%|█████▎ | 714/1355 [00:08<00:07, 80.50it/s]
|
1785 |
53%|█████▎ | 723/1355 [00:08<00:07, 80.12it/s]
|
1786 |
54%|█████▍ | 732/1355 [00:09<00:07, 80.88it/s]
|
1787 |
55%|█████▍ | 741/1355 [00:09<00:07, 78.39it/s]
|
1788 |
55%|█████▌ | 750/1355 [00:09<00:07, 79.64it/s]
|
1789 |
56%|█████▌ | 758/1355 [00:09<00:07, 78.30it/s]
|
1790 |
57%|█████▋ | 766/1355 [00:09<00:07, 78.43it/s]
|
1791 |
57%|█████▋ | 775/1355 [00:09<00:07, 81.16it/s]
|
1792 |
58%|█████▊ | 784/1355 [00:09<00:07, 76.55it/s]
|
1793 |
59%|█████▊ | 793/1355 [00:09<00:07, 78.18it/s]
|
1794 |
59%|█████▉ | 801/1355 [00:09<00:07, 78.59it/s]
|
1795 |
60%|█████▉ | 810/1355 [00:10<00:06, 81.02it/s]
|
1796 |
60%|██████ | 819/1355 [00:10<00:06, 81.88it/s]
|
1797 |
61%|██████ | 828/1355 [00:10<00:06, 81.33it/s]
|
1798 |
62%|██████▏ | 837/1355 [00:10<00:06, 80.79it/s]
|
1799 |
62%|██████▏ | 846/1355 [00:10<00:06, 80.11it/s]
|
1800 |
63%|██████▎ | 855/1355 [00:10<00:06, 80.41it/s]
|
1801 |
64%|██████▍ | 864/1355 [00:10<00:06, 81.38it/s]
|
1802 |
64%|██████▍ | 873/1355 [00:10<00:05, 82.00it/s]
|
1803 |
65%|██████▌ | 882/1355 [00:10<00:05, 82.54it/s]
|
1804 |
66%|██████▌ | 891/1355 [00:11<00:05, 82.98it/s]
|
1805 |
66%|██████▋ | 900/1355 [00:11<00:05, 82.84it/s]
|
1806 |
67%|██████▋ | 909/1355 [00:11<00:05, 83.28it/s]
|
1807 |
68%|██████▊ | 918/1355 [00:11<00:05, 83.30it/s]
|
1808 |
68%|██████▊ | 927/1355 [00:11<00:05, 80.41it/s]
|
1809 |
69%|██████▉ | 936/1355 [00:11<00:05, 79.47it/s]
|
1810 |
70%|██████▉ | 945/1355 [00:11<00:05, 80.81it/s]
|
1811 |
70%|███████ | 954/1355 [00:11<00:04, 81.93it/s]
|
1812 |
71%|███████ | 963/1355 [00:11<00:04, 80.07it/s]
|
1813 |
72%|███████▏ | 972/1355 [00:12<00:05, 76.41it/s]
|
1814 |
72%|███████▏ | 980/1355 [00:12<00:04, 77.20it/s]
|
1815 |
73%|███████▎ | 988/1355 [00:12<00:04, 77.77it/s]
|
1816 |
74%|███████▎ | 996/1355 [00:12<00:04, 78.35it/s]
|
1817 |
74%|███████▍ | 1005/1355 [00:12<00:04, 80.44it/s]
|
1818 |
75%|███████▍ | 1014/1355 [00:12<00:04, 79.12it/s]
|
1819 |
75%|███████▌ | 1022/1355 [00:12<00:04, 78.30it/s]
|
1820 |
76%|███████▌ | 1030/1355 [00:12<00:04, 78.23it/s]
|
1821 |
77%|███████▋ | 1039/1355 [00:12<00:03, 79.57it/s]
|
1822 |
77%|███████▋ | 1048/1355 [00:13<00:03, 81.05it/s]
|
1823 |
78%|███████▊ | 1057/1355 [00:13<00:03, 79.79it/s]
|
1824 |
79%|███████▊ | 1066/1355 [00:13<00:03, 81.05it/s]
|
1825 |
79%|███████▉ | 1075/1355 [00:13<00:03, 81.44it/s]
|
1826 |
80%|████████ | 1084/1355 [00:13<00:03, 80.30it/s]
|
1827 |
81%|████████ | 1093/1355 [00:13<00:03, 80.00it/s]
|
1828 |
81%|████████▏ | 1102/1355 [00:13<00:03, 80.32it/s]
|
1829 |
82%|████████▏ | 1111/1355 [00:13<00:03, 76.79it/s]
|
1830 |
83%|████████▎ | 1119/1355 [00:13<00:03, 76.96it/s]
|
1831 |
83%|████████▎ | 1127/1355 [00:14<00:02, 76.68it/s]
|
1832 |
84%|████████▍ | 1136/1355 [00:14<00:02, 76.22it/s]
|
1833 |
85%|████████▍ | 1145/1355 [00:14<00:02, 78.00it/s]
|
1834 |
85%|████████▌ | 1153/1355 [00:14<00:02, 77.95it/s]
|
1835 |
86%|████████▌ | 1161/1355 [00:14<00:02, 76.16it/s]
|
1836 |
86%|████████▋ | 1169/1355 [00:14<00:02, 76.13it/s]
|
1837 |
87%|████████▋ | 1177/1355 [00:14<00:02, 77.20it/s]
|
1838 |
88%|████████▊ | 1186/1355 [00:14<00:02, 77.96it/s]
|
1839 |
88%|████████▊ | 1195/1355 [00:14<00:02, 78.39it/s]
|
1840 |
89%|████████▉ | 1203/1355 [00:15<00:01, 78.18it/s]
|
1841 |
89%|████████▉ | 1212/1355 [00:15<00:01, 79.03it/s]
|
1842 |
90%|█████████ | 1220/1355 [00:15<00:01, 76.62it/s]
|
1843 |
91%|█████████ | 1229/1355 [00:15<00:01, 77.65it/s]
|
1844 |
91%|█████████▏| 1237/1355 [00:15<00:01, 77.55it/s]
|
1845 |
92%|█████████▏| 1245/1355 [00:15<00:01, 77.78it/s]
|
1846 |
92%|█████████▏| 1253/1355 [00:15<00:01, 78.26it/s]
|
1847 |
93%|█████████▎| 1262/1355 [00:15<00:01, 79.17it/s]
|
1848 |
94%|█████████▎| 1270/1355 [00:15<00:01, 79.22it/s]
|
1849 |
94%|█████████▍| 1279/1355 [00:16<00:00, 80.05it/s]
|
1850 |
95%|█████████▌| 1288/1355 [00:16<00:00, 81.28it/s]
|
1851 |
96%|█████████▌| 1297/1355 [00:16<00:00, 80.80it/s]
|
1852 |
96%|█████████▋| 1306/1355 [00:16<00:00, 79.04it/s]
|
1853 |
97%|█████████▋| 1314/1355 [00:16<00:00, 79.26it/s]
|
1854 |
98%|█████████▊| 1322/1355 [00:16<00:00, 79.15it/s]
|
1855 |
98%|█████████▊| 1330/1355 [00:16<00:00, 79.26it/s]
|
1856 |
99%|█████████▊| 1338/1355 [00:16<00:00, 76.91it/s]
|
1857 |
99%|█████████▉| 1346/1355 [00:16<00:00, 77.65it/s]
|
1858 |
+
[INFO|trainer.py:3503] 2024-09-05 22:05:23,351 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
1859 |
+
[INFO|configuration_utils.py:472] 2024-09-05 22:05:23,353 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
1860 |
+
[INFO|modeling_utils.py:2799] 2024-09-05 22:05:24,590 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
1861 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-05 22:05:24,591 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
1862 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-05 22:05:24,591 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
1863 |
+
***** predict metrics *****
|
1864 |
+
predict_accuracy = 0.992
|
1865 |
+
predict_f1 = 0.8562
|
1866 |
+
predict_loss = 0.047
|
1867 |
+
predict_precision = 0.8359
|
1868 |
+
predict_recall = 0.8774
|
1869 |
+
predict_runtime = 0:00:23.00
|
1870 |
+
predict_samples_per_second = 471.149
|
1871 |
+
predict_steps_per_second = 58.904
|
1872 |
+
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"total_flos": 1.5369860684670966e+16,
|
4 |
+
"train_loss": 0.009731636565258824,
|
5 |
+
"train_runtime": 1413.3317,
|
6 |
+
"train_samples": 32675,
|
7 |
+
"train_samples_per_second": 231.191,
|
8 |
+
"train_steps_per_second": 3.616
|
9 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8506998444790046,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4599",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 5110,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9784735812133072,
|
13 |
+
"grad_norm": 0.2284504771232605,
|
14 |
+
"learning_rate": 4.510763209393347e-05,
|
15 |
+
"loss": 0.0542,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.9894745923561384,
|
21 |
+
"eval_f1": 0.7720770551211139,
|
22 |
+
"eval_loss": 0.027118448168039322,
|
23 |
+
"eval_precision": 0.7485207100591716,
|
24 |
+
"eval_recall": 0.7971642378889326,
|
25 |
+
"eval_runtime": 15.6605,
|
26 |
+
"eval_samples_per_second": 469.589,
|
27 |
+
"eval_steps_per_second": 58.746,
|
28 |
+
"step": 511
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 1.9569471624266144,
|
32 |
+
"grad_norm": 0.6956239342689514,
|
33 |
+
"learning_rate": 4.021526418786693e-05,
|
34 |
+
"loss": 0.0184,
|
35 |
+
"step": 1000
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.9906364133091766,
|
40 |
+
"eval_f1": 0.8196286472148541,
|
41 |
+
"eval_loss": 0.027706459164619446,
|
42 |
+
"eval_precision": 0.7897042716319824,
|
43 |
+
"eval_recall": 0.8519102008664828,
|
44 |
+
"eval_runtime": 15.4779,
|
45 |
+
"eval_samples_per_second": 475.128,
|
46 |
+
"eval_steps_per_second": 59.439,
|
47 |
+
"step": 1022
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.935420743639922,
|
51 |
+
"grad_norm": 0.465753972530365,
|
52 |
+
"learning_rate": 3.53228962818004e-05,
|
53 |
+
"loss": 0.0103,
|
54 |
+
"step": 1500
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 3.0,
|
58 |
+
"eval_accuracy": 0.9913726166853594,
|
59 |
+
"eval_f1": 0.8360814742967991,
|
60 |
+
"eval_loss": 0.03051806427538395,
|
61 |
+
"eval_precision": 0.8237767584097859,
|
62 |
+
"eval_recall": 0.848759354076408,
|
63 |
+
"eval_runtime": 15.7367,
|
64 |
+
"eval_samples_per_second": 467.314,
|
65 |
+
"eval_steps_per_second": 58.462,
|
66 |
+
"step": 1533
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 3.9138943248532287,
|
70 |
+
"grad_norm": 0.46147385239601135,
|
71 |
+
"learning_rate": 3.0430528375733857e-05,
|
72 |
+
"loss": 0.0058,
|
73 |
+
"step": 2000
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 4.0,
|
77 |
+
"eval_accuracy": 0.9913208523854715,
|
78 |
+
"eval_f1": 0.8364197530864197,
|
79 |
+
"eval_loss": 0.032038912177085876,
|
80 |
+
"eval_precision": 0.8196597353497165,
|
81 |
+
"eval_recall": 0.8538794801102796,
|
82 |
+
"eval_runtime": 15.567,
|
83 |
+
"eval_samples_per_second": 472.41,
|
84 |
+
"eval_steps_per_second": 59.099,
|
85 |
+
"step": 2044
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 4.892367906066536,
|
89 |
+
"grad_norm": 0.1297295242547989,
|
90 |
+
"learning_rate": 2.553816046966732e-05,
|
91 |
+
"loss": 0.0041,
|
92 |
+
"step": 2500
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 5.0,
|
96 |
+
"eval_accuracy": 0.9916544445403043,
|
97 |
+
"eval_f1": 0.8406766325727772,
|
98 |
+
"eval_loss": 0.037402600049972534,
|
99 |
+
"eval_precision": 0.8396856581532417,
|
100 |
+
"eval_recall": 0.8416699487987397,
|
101 |
+
"eval_runtime": 15.6627,
|
102 |
+
"eval_samples_per_second": 469.523,
|
103 |
+
"eval_steps_per_second": 58.738,
|
104 |
+
"step": 2555
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 5.870841487279844,
|
108 |
+
"grad_norm": 0.04726821184158325,
|
109 |
+
"learning_rate": 2.064579256360078e-05,
|
110 |
+
"loss": 0.0026,
|
111 |
+
"step": 3000
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"epoch": 6.0,
|
115 |
+
"eval_accuracy": 0.9916601961291807,
|
116 |
+
"eval_f1": 0.8435241258058215,
|
117 |
+
"eval_loss": 0.04270775243639946,
|
118 |
+
"eval_precision": 0.8368217054263566,
|
119 |
+
"eval_recall": 0.8503347774714455,
|
120 |
+
"eval_runtime": 15.5727,
|
121 |
+
"eval_samples_per_second": 472.237,
|
122 |
+
"eval_steps_per_second": 59.078,
|
123 |
+
"step": 3066
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"epoch": 6.8493150684931505,
|
127 |
+
"grad_norm": 0.25813549757003784,
|
128 |
+
"learning_rate": 1.5753424657534248e-05,
|
129 |
+
"loss": 0.0015,
|
130 |
+
"step": 3500
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"epoch": 7.0,
|
134 |
+
"eval_accuracy": 0.9911828142524373,
|
135 |
+
"eval_f1": 0.8397768801692633,
|
136 |
+
"eval_loss": 0.045068006962537766,
|
137 |
+
"eval_precision": 0.8206766917293233,
|
138 |
+
"eval_recall": 0.85978731784167,
|
139 |
+
"eval_runtime": 15.5993,
|
140 |
+
"eval_samples_per_second": 471.433,
|
141 |
+
"eval_steps_per_second": 58.977,
|
142 |
+
"step": 3577
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 7.8277886497064575,
|
146 |
+
"grad_norm": 0.22853463888168335,
|
147 |
+
"learning_rate": 1.086105675146771e-05,
|
148 |
+
"loss": 0.0013,
|
149 |
+
"step": 4000
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 8.0,
|
153 |
+
"eval_accuracy": 0.991637189773675,
|
154 |
+
"eval_f1": 0.8470906630581867,
|
155 |
+
"eval_loss": 0.04481955245137215,
|
156 |
+
"eval_precision": 0.8318147304479878,
|
157 |
+
"eval_recall": 0.8629381646317448,
|
158 |
+
"eval_runtime": 15.9638,
|
159 |
+
"eval_samples_per_second": 460.669,
|
160 |
+
"eval_steps_per_second": 57.631,
|
161 |
+
"step": 4088
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 8.806262230919765,
|
165 |
+
"grad_norm": 0.009220785461366177,
|
166 |
+
"learning_rate": 5.9686888454011745e-06,
|
167 |
+
"loss": 0.0007,
|
168 |
+
"step": 4500
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 9.0,
|
172 |
+
"eval_accuracy": 0.9916544445403043,
|
173 |
+
"eval_f1": 0.8506998444790046,
|
174 |
+
"eval_loss": 0.04956069961190224,
|
175 |
+
"eval_precision": 0.8399232245681382,
|
176 |
+
"eval_recall": 0.8617565970854667,
|
177 |
+
"eval_runtime": 15.6877,
|
178 |
+
"eval_samples_per_second": 468.774,
|
179 |
+
"eval_steps_per_second": 58.645,
|
180 |
+
"step": 4599
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"epoch": 9.784735812133073,
|
184 |
+
"grad_norm": 0.5473453402519226,
|
185 |
+
"learning_rate": 1.076320939334638e-06,
|
186 |
+
"loss": 0.0006,
|
187 |
+
"step": 5000
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"epoch": 10.0,
|
191 |
+
"eval_accuracy": 0.9916486929514279,
|
192 |
+
"eval_f1": 0.8506998444790046,
|
193 |
+
"eval_loss": 0.05025585740804672,
|
194 |
+
"eval_precision": 0.8399232245681382,
|
195 |
+
"eval_recall": 0.8617565970854667,
|
196 |
+
"eval_runtime": 16.4037,
|
197 |
+
"eval_samples_per_second": 448.314,
|
198 |
+
"eval_steps_per_second": 56.085,
|
199 |
+
"step": 5110
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"epoch": 10.0,
|
203 |
+
"step": 5110,
|
204 |
+
"total_flos": 1.5369860684670966e+16,
|
205 |
+
"train_loss": 0.009731636565258824,
|
206 |
+
"train_runtime": 1413.3317,
|
207 |
+
"train_samples_per_second": 231.191,
|
208 |
+
"train_steps_per_second": 3.616
|
209 |
+
}
|
210 |
+
],
|
211 |
+
"logging_steps": 500,
|
212 |
+
"max_steps": 5110,
|
213 |
+
"num_input_tokens_seen": 0,
|
214 |
+
"num_train_epochs": 10,
|
215 |
+
"save_steps": 500,
|
216 |
+
"stateful_callbacks": {
|
217 |
+
"TrainerControl": {
|
218 |
+
"args": {
|
219 |
+
"should_epoch_stop": false,
|
220 |
+
"should_evaluate": false,
|
221 |
+
"should_log": false,
|
222 |
+
"should_save": true,
|
223 |
+
"should_training_stop": true
|
224 |
+
},
|
225 |
+
"attributes": {}
|
226 |
+
}
|
227 |
+
},
|
228 |
+
"total_flos": 1.5369860684670966e+16,
|
229 |
+
"train_batch_size": 32,
|
230 |
+
"trial_name": null,
|
231 |
+
"trial_params": null
|
232 |
+
}
|