End of training
Browse files- README.md +13 -12
- all_results.json +26 -0
- eval_results.json +12 -0
- predict_results.json +10 -0
- predictions.txt +0 -0
- tb/events.out.tfevents.1725475205.a5c501872057.1590.1 +3 -0
- train.log +48 -0
- train_results.json +9 -0
- trainer_state.json +190 -0
README.md
CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
|
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
-
- symptemist-75-ner
|
9 |
metrics:
|
10 |
- precision
|
11 |
- recall
|
@@ -18,24 +19,24 @@ model-index:
|
|
18 |
name: Token Classification
|
19 |
type: token-classification
|
20 |
dataset:
|
21 |
-
name: symptemist-75-ner
|
22 |
-
type: symptemist-75-ner
|
23 |
config: SympTEMIST NER
|
24 |
split: validation
|
25 |
args: SympTEMIST NER
|
26 |
metrics:
|
27 |
- name: Precision
|
28 |
type: precision
|
29 |
-
value: 0.
|
30 |
- name: Recall
|
31 |
type: recall
|
32 |
-
value: 0.
|
33 |
- name: F1
|
34 |
type: f1
|
35 |
-
value: 0.
|
36 |
- name: Accuracy
|
37 |
type: accuracy
|
38 |
-
value: 0.
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
43 |
|
44 |
# output
|
45 |
|
46 |
-
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-75-ner dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
-
- Loss: 0.
|
49 |
-
- Precision: 0.
|
50 |
-
- Recall: 0.
|
51 |
- F1: 0.6985
|
52 |
-
- Accuracy: 0.
|
53 |
|
54 |
## Model description
|
55 |
|
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
6 |
+
- token-classification
|
7 |
- generated_from_trainer
|
8 |
datasets:
|
9 |
+
- Rodrigo1771/symptemist-75-ner
|
10 |
metrics:
|
11 |
- precision
|
12 |
- recall
|
|
|
19 |
name: Token Classification
|
20 |
type: token-classification
|
21 |
dataset:
|
22 |
+
name: Rodrigo1771/symptemist-75-ner
|
23 |
+
type: Rodrigo1771/symptemist-75-ner
|
24 |
config: SympTEMIST NER
|
25 |
split: validation
|
26 |
args: SympTEMIST NER
|
27 |
metrics:
|
28 |
- name: Precision
|
29 |
type: precision
|
30 |
+
value: 0.6896
|
31 |
- name: Recall
|
32 |
type: recall
|
33 |
+
value: 0.7077175697865353
|
34 |
- name: F1
|
35 |
type: f1
|
36 |
+
value: 0.6985413290113451
|
37 |
- name: Accuracy
|
38 |
type: accuracy
|
39 |
+
value: 0.9496936058263018
|
40 |
---
|
41 |
|
42 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
44 |
|
45 |
# output
|
46 |
|
47 |
+
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-75-ner dataset.
|
48 |
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.3089
|
50 |
+
- Precision: 0.6896
|
51 |
+
- Recall: 0.7077
|
52 |
- F1: 0.6985
|
53 |
+
- Accuracy: 0.9497
|
54 |
|
55 |
## Model description
|
56 |
|
all_results.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9496936058263018,
|
4 |
+
"eval_f1": 0.6985413290113451,
|
5 |
+
"eval_loss": 0.3088673949241638,
|
6 |
+
"eval_precision": 0.6896,
|
7 |
+
"eval_recall": 0.7077175697865353,
|
8 |
+
"eval_runtime": 5.5622,
|
9 |
+
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 452.882,
|
11 |
+
"eval_steps_per_second": 56.633,
|
12 |
+
"predict_accuracy": 0.9466344311112421,
|
13 |
+
"predict_f1": 0.6937328822297406,
|
14 |
+
"predict_loss": 0.33691754937171936,
|
15 |
+
"predict_precision": 0.694516129032258,
|
16 |
+
"predict_recall": 0.6929514000643707,
|
17 |
+
"predict_runtime": 8.8756,
|
18 |
+
"predict_samples_per_second": 455.971,
|
19 |
+
"predict_steps_per_second": 57.01,
|
20 |
+
"total_flos": 7718163558521760.0,
|
21 |
+
"train_loss": 0.03527186407196906,
|
22 |
+
"train_runtime": 706.0488,
|
23 |
+
"train_samples": 15848,
|
24 |
+
"train_samples_per_second": 224.46,
|
25 |
+
"train_steps_per_second": 3.513
|
26 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9496936058263018,
|
4 |
+
"eval_f1": 0.6985413290113451,
|
5 |
+
"eval_loss": 0.3088673949241638,
|
6 |
+
"eval_precision": 0.6896,
|
7 |
+
"eval_recall": 0.7077175697865353,
|
8 |
+
"eval_runtime": 5.5622,
|
9 |
+
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 452.882,
|
11 |
+
"eval_steps_per_second": 56.633
|
12 |
+
}
|
predict_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_accuracy": 0.9466344311112421,
|
3 |
+
"predict_f1": 0.6937328822297406,
|
4 |
+
"predict_loss": 0.33691754937171936,
|
5 |
+
"predict_precision": 0.694516129032258,
|
6 |
+
"predict_recall": 0.6929514000643707,
|
7 |
+
"predict_runtime": 8.8756,
|
8 |
+
"predict_samples_per_second": 455.971,
|
9 |
+
"predict_steps_per_second": 57.01
|
10 |
+
}
|
predictions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725475205.a5c501872057.1590.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac1c4dd87538fa491bda1fedf86880bd54d355c94e361fe11b34dd49f825123b
|
3 |
+
size 560
|
train.log
CHANGED
@@ -860,3 +860,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
860 |
{'eval_loss': 0.33284899592399597, 'eval_precision': 0.6814159292035398, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6985058697972252, 'eval_accuracy': 0.9498861047835991, 'eval_runtime': 5.5975, 'eval_samples_per_second': 450.025, 'eval_steps_per_second': 56.276, 'epoch': 10.0}
|
861 |
{'train_runtime': 706.0488, 'train_samples_per_second': 224.46, 'train_steps_per_second': 3.513, 'train_loss': 0.03527186407196906, 'epoch': 10.0}
|
862 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
863 |
0%| | 0/315 [00:00<?, ?it/s]
|
864 |
3%|▎ | 9/315 [00:00<00:03, 80.04it/s]
|
865 |
6%|▌ | 18/315 [00:00<00:03, 81.66it/s]
|
866 |
9%|▊ | 27/315 [00:00<00:03, 81.02it/s]
|
867 |
11%|█▏ | 36/315 [00:00<00:03, 82.17it/s]
|
868 |
14%|█▍ | 45/315 [00:00<00:03, 82.54it/s]
|
869 |
17%|█▋ | 54/315 [00:00<00:03, 82.93it/s]
|
870 |
20%|██ | 63/315 [00:00<00:03, 80.55it/s]
|
871 |
23%|██▎ | 72/315 [00:00<00:02, 81.25it/s]
|
872 |
26%|██▌ | 81/315 [00:01<00:02, 79.65it/s]
|
873 |
28%|██▊ | 89/315 [00:01<00:02, 79.00it/s]
|
874 |
31%|███ | 97/315 [00:01<00:02, 78.04it/s]
|
875 |
34%|███▎ | 106/315 [00:01<00:02, 79.53it/s]
|
876 |
37%|███▋ | 115/315 [00:01<00:02, 80.97it/s]
|
877 |
39%|███▉ | 124/315 [00:01<00:02, 79.09it/s]
|
878 |
42%|████▏ | 133/315 [00:01<00:02, 79.74it/s]
|
879 |
45%|████▌ | 142/315 [00:01<00:02, 79.98it/s]
|
880 |
48%|████▊ | 151/315 [00:01<00:02, 81.79it/s]
|
881 |
51%|█████ | 160/315 [00:01<00:01, 81.15it/s]
|
882 |
54%|█████▎ | 169/315 [00:02<00:01, 81.16it/s]
|
883 |
57%|█████▋ | 178/315 [00:02<00:01, 81.46it/s]
|
884 |
59%|█████▉ | 187/315 [00:02<00:01, 81.11it/s]
|
885 |
62%|██████▏ | 196/315 [00:02<00:01, 80.61it/s]
|
886 |
65%|██████▌ | 205/315 [00:02<00:01, 78.27it/s]
|
887 |
68%|██████▊ | 214/315 [00:02<00:01, 80.07it/s]
|
888 |
71%|███████ | 223/315 [00:02<00:01, 80.98it/s]
|
889 |
74%|███████▎ | 232/315 [00:02<00:01, 82.35it/s]
|
890 |
77%|███████▋ | 241/315 [00:02<00:00, 81.14it/s]
|
891 |
79%|███████▉ | 250/315 [00:03<00:00, 81.51it/s]
|
892 |
82%|████████▏ | 259/315 [00:03<00:00, 81.21it/s]
|
893 |
85%|████████▌ | 268/315 [00:03<00:00, 81.50it/s]
|
894 |
88%|████████▊ | 277/315 [00:03<00:00, 82.85it/s]
|
895 |
91%|█████████ | 286/315 [00:03<00:00, 80.71it/s]
|
896 |
94%|█████████▎| 295/315 [00:03<00:00, 80.77it/s]
|
897 |
97%|█████████▋| 304/315 [00:03<00:00, 81.83it/s]
|
898 |
99%|█████████▉| 313/315 [00:03<00:00, 82.13it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
899 |
0%| | 0/506 [00:00<?, ?it/s]
|
900 |
2%|▏ | 10/506 [00:00<00:05, 95.19it/s]
|
901 |
4%|▍ | 20/506 [00:00<00:05, 85.84it/s]
|
902 |
6%|▌ | 29/506 [00:00<00:05, 85.46it/s]
|
903 |
8%|▊ | 38/506 [00:00<00:05, 83.57it/s]
|
904 |
9%|▉ | 47/506 [00:00<00:05, 83.68it/s]
|
905 |
11%|█ | 56/506 [00:00<00:05, 84.03it/s]
|
906 |
13%|█▎ | 65/506 [00:00<00:05, 82.78it/s]
|
907 |
15%|█▍ | 74/506 [00:00<00:05, 82.71it/s]
|
908 |
16%|█▋ | 83/506 [00:01<00:05, 77.99it/s]
|
909 |
18%|█▊ | 91/506 [00:01<00:05, 78.20it/s]
|
910 |
20%|█▉ | 100/506 [00:01<00:05, 79.97it/s]
|
911 |
22%|██▏ | 109/506 [00:01<00:04, 79.45it/s]
|
912 |
23%|██▎ | 118/506 [00:01<00:04, 80.16it/s]
|
913 |
25%|██▌ | 127/506 [00:01<00:04, 78.08it/s]
|
914 |
27%|██▋ | 135/506 [00:01<00:05, 72.06it/s]
|
915 |
28%|██▊ | 144/506 [00:01<00:04, 75.38it/s]
|
916 |
30%|███ | 152/506 [00:01<00:04, 75.99it/s]
|
917 |
32%|███▏ | 160/506 [00:02<00:04, 74.53it/s]
|
918 |
33%|███▎ | 168/506 [00:02<00:04, 76.02it/s]
|
919 |
35%|███▍ | 177/506 [00:02<00:04, 77.49it/s]
|
920 |
37%|███▋ | 186/506 [00:02<00:04, 79.41it/s]
|
921 |
39%|███▊ | 195/506 [00:02<00:03, 79.79it/s]
|
922 |
40%|████ | 204/506 [00:02<00:03, 80.39it/s]
|
923 |
42%|████▏ | 213/506 [00:02<00:03, 80.49it/s]
|
924 |
44%|████▍ | 222/506 [00:02<00:03, 78.46it/s]
|
925 |
45%|████▌ | 230/506 [00:02<00:03, 77.13it/s]
|
926 |
47%|████▋ | 238/506 [00:03<00:03, 77.47it/s]
|
927 |
49%|████▉ | 247/506 [00:03<00:03, 79.02it/s]
|
928 |
50%|█████ | 255/506 [00:03<00:03, 78.64it/s]
|
929 |
52%|█████▏ | 264/506 [00:03<00:03, 80.02it/s]
|
930 |
54%|█████▍ | 273/506 [00:03<00:02, 81.15it/s]
|
931 |
56%|█████▌ | 282/506 [00:03<00:02, 80.47it/s]
|
932 |
58%|█████▊ | 291/506 [00:03<00:02, 80.42it/s]
|
933 |
59%|█████▉ | 300/506 [00:03<00:02, 81.29it/s]
|
934 |
61%|██████ | 309/506 [00:03<00:02, 80.93it/s]
|
935 |
63%|██████▎ | 318/506 [00:03<00:02, 81.34it/s]
|
936 |
65%|██████▍ | 327/506 [00:04<00:02, 82.42it/s]
|
937 |
66%|██████▋ | 336/506 [00:04<00:02, 82.95it/s]
|
938 |
68%|██████▊ | 345/506 [00:04<00:01, 83.14it/s]
|
939 |
70%|██████▉ | 354/506 [00:04<00:01, 79.24it/s]
|
940 |
72%|███████▏ | 362/506 [00:04<00:01, 75.32it/s]
|
941 |
73%|███████▎ | 370/506 [00:04<00:01, 72.65it/s]
|
942 |
75%|███████▍ | 378/506 [00:04<00:01, 71.57it/s]
|
943 |
76%|███████▋ | 386/506 [00:04<00:01, 69.45it/s]
|
944 |
78%|███████▊ | 393/506 [00:05<00:01, 69.06it/s]
|
945 |
79%|███████▉ | 401/506 [00:05<00:01, 69.56it/s]
|
946 |
81%|████████ | 409/506 [00:05<00:01, 70.60it/s]
|
947 |
82%|████████▏ | 417/506 [00:05<00:01, 73.12it/s]
|
948 |
84%|████████▍ | 425/506 [00:05<00:01, 74.82it/s]
|
949 |
86%|████████▌ | 434/506 [00:05<00:00, 76.80it/s]
|
950 |
88%|████████▊ | 443/506 [00:05<00:00, 78.43it/s]
|
951 |
89%|████████▉ | 451/506 [00:05<00:00, 78.15it/s]
|
952 |
91%|█████████ | 460/506 [00:05<00:00, 80.40it/s]
|
953 |
93%|█████████▎| 469/506 [00:05<00:00, 81.42it/s]
|
954 |
94%|█████████▍| 478/506 [00:06<00:00, 82.24it/s]
|
955 |
96%|█████████▌| 487/506 [00:06<00:00, 80.66it/s]
|
956 |
98%|█████████▊| 496/506 [00:06<00:00, 81.16it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
860 |
{'eval_loss': 0.33284899592399597, 'eval_precision': 0.6814159292035398, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6985058697972252, 'eval_accuracy': 0.9498861047835991, 'eval_runtime': 5.5975, 'eval_samples_per_second': 450.025, 'eval_steps_per_second': 56.276, 'epoch': 10.0}
|
861 |
{'train_runtime': 706.0488, 'train_samples_per_second': 224.46, 'train_steps_per_second': 3.513, 'train_loss': 0.03527186407196906, 'epoch': 10.0}
|
862 |
|
863 |
+
***** train metrics *****
|
864 |
+
epoch = 10.0
|
865 |
+
total_flos = 7188099GF
|
866 |
+
train_loss = 0.0353
|
867 |
+
train_runtime = 0:11:46.04
|
868 |
+
train_samples = 15848
|
869 |
+
train_samples_per_second = 224.46
|
870 |
+
train_steps_per_second = 3.513
|
871 |
+
09/04/2024 18:39:59 - INFO - __main__ - *** Evaluate ***
|
872 |
+
[INFO|trainer.py:811] 2024-09-04 18:39:59,625 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
873 |
+
[INFO|trainer.py:3819] 2024-09-04 18:39:59,627 >>
|
874 |
+
***** Running Evaluation *****
|
875 |
+
[INFO|trainer.py:3821] 2024-09-04 18:39:59,627 >> Num examples = 2519
|
876 |
+
[INFO|trainer.py:3824] 2024-09-04 18:39:59,627 >> Batch size = 8
|
877 |
+
|
878 |
0%| | 0/315 [00:00<?, ?it/s]
|
879 |
3%|▎ | 9/315 [00:00<00:03, 80.04it/s]
|
880 |
6%|▌ | 18/315 [00:00<00:03, 81.66it/s]
|
881 |
9%|▊ | 27/315 [00:00<00:03, 81.02it/s]
|
882 |
11%|█▏ | 36/315 [00:00<00:03, 82.17it/s]
|
883 |
14%|█▍ | 45/315 [00:00<00:03, 82.54it/s]
|
884 |
17%|█▋ | 54/315 [00:00<00:03, 82.93it/s]
|
885 |
20%|██ | 63/315 [00:00<00:03, 80.55it/s]
|
886 |
23%|██▎ | 72/315 [00:00<00:02, 81.25it/s]
|
887 |
26%|██▌ | 81/315 [00:01<00:02, 79.65it/s]
|
888 |
28%|██▊ | 89/315 [00:01<00:02, 79.00it/s]
|
889 |
31%|███ | 97/315 [00:01<00:02, 78.04it/s]
|
890 |
34%|███▎ | 106/315 [00:01<00:02, 79.53it/s]
|
891 |
37%|███▋ | 115/315 [00:01<00:02, 80.97it/s]
|
892 |
39%|███▉ | 124/315 [00:01<00:02, 79.09it/s]
|
893 |
42%|████▏ | 133/315 [00:01<00:02, 79.74it/s]
|
894 |
45%|████▌ | 142/315 [00:01<00:02, 79.98it/s]
|
895 |
48%|████▊ | 151/315 [00:01<00:02, 81.79it/s]
|
896 |
51%|█████ | 160/315 [00:01<00:01, 81.15it/s]
|
897 |
54%|█████▎ | 169/315 [00:02<00:01, 81.16it/s]
|
898 |
57%|█████▋ | 178/315 [00:02<00:01, 81.46it/s]
|
899 |
59%|█████▉ | 187/315 [00:02<00:01, 81.11it/s]
|
900 |
62%|██████▏ | 196/315 [00:02<00:01, 80.61it/s]
|
901 |
65%|██████▌ | 205/315 [00:02<00:01, 78.27it/s]
|
902 |
68%|██████▊ | 214/315 [00:02<00:01, 80.07it/s]
|
903 |
71%|███████ | 223/315 [00:02<00:01, 80.98it/s]
|
904 |
74%|███████▎ | 232/315 [00:02<00:01, 82.35it/s]
|
905 |
77%|███████▋ | 241/315 [00:02<00:00, 81.14it/s]
|
906 |
79%|███████▉ | 250/315 [00:03<00:00, 81.51it/s]
|
907 |
82%|████████▏ | 259/315 [00:03<00:00, 81.21it/s]
|
908 |
85%|████████▌ | 268/315 [00:03<00:00, 81.50it/s]
|
909 |
88%|████████▊ | 277/315 [00:03<00:00, 82.85it/s]
|
910 |
91%|█████████ | 286/315 [00:03<00:00, 80.71it/s]
|
911 |
94%|█████████▎| 295/315 [00:03<00:00, 80.77it/s]
|
912 |
97%|█████████▋| 304/315 [00:03<00:00, 81.83it/s]
|
913 |
99%|█████████▉| 313/315 [00:03<00:00, 82.13it/s]
|
914 |
+
***** eval metrics *****
|
915 |
+
epoch = 10.0
|
916 |
+
eval_accuracy = 0.9497
|
917 |
+
eval_f1 = 0.6985
|
918 |
+
eval_loss = 0.3089
|
919 |
+
eval_precision = 0.6896
|
920 |
+
eval_recall = 0.7077
|
921 |
+
eval_runtime = 0:00:05.56
|
922 |
+
eval_samples = 2519
|
923 |
+
eval_samples_per_second = 452.882
|
924 |
+
eval_steps_per_second = 56.633
|
925 |
+
09/04/2024 18:40:05 - INFO - __main__ - *** Predict ***
|
926 |
+
[INFO|trainer.py:811] 2024-09-04 18:40:05,192 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
927 |
+
[INFO|trainer.py:3819] 2024-09-04 18:40:05,194 >>
|
928 |
+
***** Running Prediction *****
|
929 |
+
[INFO|trainer.py:3821] 2024-09-04 18:40:05,194 >> Num examples = 4047
|
930 |
+
[INFO|trainer.py:3824] 2024-09-04 18:40:05,194 >> Batch size = 8
|
931 |
+
|
932 |
0%| | 0/506 [00:00<?, ?it/s]
|
933 |
2%|▏ | 10/506 [00:00<00:05, 95.19it/s]
|
934 |
4%|▍ | 20/506 [00:00<00:05, 85.84it/s]
|
935 |
6%|▌ | 29/506 [00:00<00:05, 85.46it/s]
|
936 |
8%|▊ | 38/506 [00:00<00:05, 83.57it/s]
|
937 |
9%|▉ | 47/506 [00:00<00:05, 83.68it/s]
|
938 |
11%|█ | 56/506 [00:00<00:05, 84.03it/s]
|
939 |
13%|█▎ | 65/506 [00:00<00:05, 82.78it/s]
|
940 |
15%|█▍ | 74/506 [00:00<00:05, 82.71it/s]
|
941 |
16%|█▋ | 83/506 [00:01<00:05, 77.99it/s]
|
942 |
18%|█▊ | 91/506 [00:01<00:05, 78.20it/s]
|
943 |
20%|█▉ | 100/506 [00:01<00:05, 79.97it/s]
|
944 |
22%|██▏ | 109/506 [00:01<00:04, 79.45it/s]
|
945 |
23%|██▎ | 118/506 [00:01<00:04, 80.16it/s]
|
946 |
25%|██▌ | 127/506 [00:01<00:04, 78.08it/s]
|
947 |
27%|██▋ | 135/506 [00:01<00:05, 72.06it/s]
|
948 |
28%|██▊ | 144/506 [00:01<00:04, 75.38it/s]
|
949 |
30%|███ | 152/506 [00:01<00:04, 75.99it/s]
|
950 |
32%|███▏ | 160/506 [00:02<00:04, 74.53it/s]
|
951 |
33%|███▎ | 168/506 [00:02<00:04, 76.02it/s]
|
952 |
35%|███▍ | 177/506 [00:02<00:04, 77.49it/s]
|
953 |
37%|███▋ | 186/506 [00:02<00:04, 79.41it/s]
|
954 |
39%|███▊ | 195/506 [00:02<00:03, 79.79it/s]
|
955 |
40%|████ | 204/506 [00:02<00:03, 80.39it/s]
|
956 |
42%|████▏ | 213/506 [00:02<00:03, 80.49it/s]
|
957 |
44%|████▍ | 222/506 [00:02<00:03, 78.46it/s]
|
958 |
45%|████▌ | 230/506 [00:02<00:03, 77.13it/s]
|
959 |
47%|████▋ | 238/506 [00:03<00:03, 77.47it/s]
|
960 |
49%|████▉ | 247/506 [00:03<00:03, 79.02it/s]
|
961 |
50%|█████ | 255/506 [00:03<00:03, 78.64it/s]
|
962 |
52%|█████▏ | 264/506 [00:03<00:03, 80.02it/s]
|
963 |
54%|█████▍ | 273/506 [00:03<00:02, 81.15it/s]
|
964 |
56%|█████▌ | 282/506 [00:03<00:02, 80.47it/s]
|
965 |
58%|█████▊ | 291/506 [00:03<00:02, 80.42it/s]
|
966 |
59%|█████▉ | 300/506 [00:03<00:02, 81.29it/s]
|
967 |
61%|██████ | 309/506 [00:03<00:02, 80.93it/s]
|
968 |
63%|██████▎ | 318/506 [00:03<00:02, 81.34it/s]
|
969 |
65%|██████▍ | 327/506 [00:04<00:02, 82.42it/s]
|
970 |
66%|██████▋ | 336/506 [00:04<00:02, 82.95it/s]
|
971 |
68%|██████▊ | 345/506 [00:04<00:01, 83.14it/s]
|
972 |
70%|██████▉ | 354/506 [00:04<00:01, 79.24it/s]
|
973 |
72%|███████▏ | 362/506 [00:04<00:01, 75.32it/s]
|
974 |
73%|███████▎ | 370/506 [00:04<00:01, 72.65it/s]
|
975 |
75%|███████▍ | 378/506 [00:04<00:01, 71.57it/s]
|
976 |
76%|███████▋ | 386/506 [00:04<00:01, 69.45it/s]
|
977 |
78%|███████▊ | 393/506 [00:05<00:01, 69.06it/s]
|
978 |
79%|███████▉ | 401/506 [00:05<00:01, 69.56it/s]
|
979 |
81%|████████ | 409/506 [00:05<00:01, 70.60it/s]
|
980 |
82%|████████▏ | 417/506 [00:05<00:01, 73.12it/s]
|
981 |
84%|████████▍ | 425/506 [00:05<00:01, 74.82it/s]
|
982 |
86%|████████▌ | 434/506 [00:05<00:00, 76.80it/s]
|
983 |
88%|████████▊ | 443/506 [00:05<00:00, 78.43it/s]
|
984 |
89%|████████▉ | 451/506 [00:05<00:00, 78.15it/s]
|
985 |
91%|█████████ | 460/506 [00:05<00:00, 80.40it/s]
|
986 |
93%|█████████▎| 469/506 [00:05<00:00, 81.42it/s]
|
987 |
94%|█████████▍| 478/506 [00:06<00:00, 82.24it/s]
|
988 |
96%|█████████▌| 487/506 [00:06<00:00, 80.66it/s]
|
989 |
98%|█████████▊| 496/506 [00:06<00:00, 81.16it/s]
|
990 |
+
[INFO|trainer.py:3503] 2024-09-04 18:40:14,231 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
991 |
+
[INFO|configuration_utils.py:472] 2024-09-04 18:40:14,233 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
992 |
+
[INFO|modeling_utils.py:2799] 2024-09-04 18:40:15,628 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
993 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-04 18:40:15,629 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
994 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-04 18:40:15,629 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
995 |
+
***** predict metrics *****
|
996 |
+
predict_accuracy = 0.9466
|
997 |
+
predict_f1 = 0.6937
|
998 |
+
predict_loss = 0.3369
|
999 |
+
predict_precision = 0.6945
|
1000 |
+
predict_recall = 0.693
|
1001 |
+
predict_runtime = 0:00:08.87
|
1002 |
+
predict_samples_per_second = 455.971
|
1003 |
+
predict_steps_per_second = 57.01
|
1004 |
+
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"total_flos": 7718163558521760.0,
|
4 |
+
"train_loss": 0.03527186407196906,
|
5 |
+
"train_runtime": 706.0488,
|
6 |
+
"train_samples": 15848,
|
7 |
+
"train_samples_per_second": 224.46,
|
8 |
+
"train_steps_per_second": 3.513
|
9 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.6985413290113451,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1736",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 2480,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.9477686162533286,
|
14 |
+
"eval_f1": 0.63506625891947,
|
15 |
+
"eval_loss": 0.16486208140850067,
|
16 |
+
"eval_precision": 0.5941821649976157,
|
17 |
+
"eval_recall": 0.6819923371647509,
|
18 |
+
"eval_runtime": 5.5136,
|
19 |
+
"eval_samples_per_second": 456.871,
|
20 |
+
"eval_steps_per_second": 57.132,
|
21 |
+
"step": 248
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 2.0,
|
25 |
+
"eval_accuracy": 0.9476402836151304,
|
26 |
+
"eval_f1": 0.6630581867388362,
|
27 |
+
"eval_loss": 0.18148483335971832,
|
28 |
+
"eval_precision": 0.6557815845824411,
|
29 |
+
"eval_recall": 0.6704980842911877,
|
30 |
+
"eval_runtime": 5.4181,
|
31 |
+
"eval_samples_per_second": 464.925,
|
32 |
+
"eval_steps_per_second": 58.139,
|
33 |
+
"step": 496
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.0161290322580645,
|
37 |
+
"grad_norm": 0.6250831484794617,
|
38 |
+
"learning_rate": 3.991935483870968e-05,
|
39 |
+
"loss": 0.134,
|
40 |
+
"step": 500
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 3.0,
|
44 |
+
"eval_accuracy": 0.9491802752735089,
|
45 |
+
"eval_f1": 0.6810897435897436,
|
46 |
+
"eval_loss": 0.2111387550830841,
|
47 |
+
"eval_precision": 0.6651017214397497,
|
48 |
+
"eval_recall": 0.6978653530377669,
|
49 |
+
"eval_runtime": 5.4844,
|
50 |
+
"eval_samples_per_second": 459.302,
|
51 |
+
"eval_steps_per_second": 57.435,
|
52 |
+
"step": 744
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 4.0,
|
56 |
+
"eval_accuracy": 0.9488434020982386,
|
57 |
+
"eval_f1": 0.6900026518164943,
|
58 |
+
"eval_loss": 0.25230270624160767,
|
59 |
+
"eval_precision": 0.6692386831275721,
|
60 |
+
"eval_recall": 0.7120963327859879,
|
61 |
+
"eval_runtime": 5.4481,
|
62 |
+
"eval_samples_per_second": 462.36,
|
63 |
+
"eval_steps_per_second": 57.818,
|
64 |
+
"step": 992
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"epoch": 4.032258064516129,
|
68 |
+
"grad_norm": 0.7998089790344238,
|
69 |
+
"learning_rate": 2.9838709677419357e-05,
|
70 |
+
"loss": 0.026,
|
71 |
+
"step": 1000
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 5.0,
|
75 |
+
"eval_accuracy": 0.9490840257948603,
|
76 |
+
"eval_f1": 0.6847083552285864,
|
77 |
+
"eval_loss": 0.27709877490997314,
|
78 |
+
"eval_precision": 0.6584133400707428,
|
79 |
+
"eval_recall": 0.7131910235358512,
|
80 |
+
"eval_runtime": 5.6532,
|
81 |
+
"eval_samples_per_second": 445.585,
|
82 |
+
"eval_steps_per_second": 55.72,
|
83 |
+
"step": 1240
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 6.0,
|
87 |
+
"eval_accuracy": 0.9486348615611665,
|
88 |
+
"eval_f1": 0.6907651715039579,
|
89 |
+
"eval_loss": 0.2968369126319885,
|
90 |
+
"eval_precision": 0.6668364747834946,
|
91 |
+
"eval_recall": 0.7164750957854407,
|
92 |
+
"eval_runtime": 5.4549,
|
93 |
+
"eval_samples_per_second": 461.787,
|
94 |
+
"eval_steps_per_second": 57.746,
|
95 |
+
"step": 1488
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"epoch": 6.048387096774194,
|
99 |
+
"grad_norm": 0.15673314034938812,
|
100 |
+
"learning_rate": 1.975806451612903e-05,
|
101 |
+
"loss": 0.0084,
|
102 |
+
"step": 1500
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"epoch": 7.0,
|
106 |
+
"eval_accuracy": 0.9496936058263018,
|
107 |
+
"eval_f1": 0.6985413290113451,
|
108 |
+
"eval_loss": 0.3088673949241638,
|
109 |
+
"eval_precision": 0.6896,
|
110 |
+
"eval_recall": 0.7077175697865353,
|
111 |
+
"eval_runtime": 5.5771,
|
112 |
+
"eval_samples_per_second": 451.669,
|
113 |
+
"eval_steps_per_second": 56.481,
|
114 |
+
"step": 1736
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 8.0,
|
118 |
+
"eval_accuracy": 0.9498861047835991,
|
119 |
+
"eval_f1": 0.6946236559139785,
|
120 |
+
"eval_loss": 0.31877079606056213,
|
121 |
+
"eval_precision": 0.6825145272054939,
|
122 |
+
"eval_recall": 0.7071702244116037,
|
123 |
+
"eval_runtime": 5.3015,
|
124 |
+
"eval_samples_per_second": 475.15,
|
125 |
+
"eval_steps_per_second": 59.417,
|
126 |
+
"step": 1984
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 8.064516129032258,
|
130 |
+
"grad_norm": 0.22283445298671722,
|
131 |
+
"learning_rate": 9.67741935483871e-06,
|
132 |
+
"loss": 0.0042,
|
133 |
+
"step": 2000
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"epoch": 9.0,
|
137 |
+
"eval_accuracy": 0.9494529821296801,
|
138 |
+
"eval_f1": 0.6979722518676629,
|
139 |
+
"eval_loss": 0.3295721411705017,
|
140 |
+
"eval_precision": 0.6808953669963561,
|
141 |
+
"eval_recall": 0.715927750410509,
|
142 |
+
"eval_runtime": 5.4512,
|
143 |
+
"eval_samples_per_second": 462.102,
|
144 |
+
"eval_steps_per_second": 57.786,
|
145 |
+
"step": 2232
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"epoch": 10.0,
|
149 |
+
"eval_accuracy": 0.9498861047835991,
|
150 |
+
"eval_f1": 0.6985058697972252,
|
151 |
+
"eval_loss": 0.33284899592399597,
|
152 |
+
"eval_precision": 0.6814159292035398,
|
153 |
+
"eval_recall": 0.7164750957854407,
|
154 |
+
"eval_runtime": 5.5975,
|
155 |
+
"eval_samples_per_second": 450.025,
|
156 |
+
"eval_steps_per_second": 56.276,
|
157 |
+
"step": 2480
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"epoch": 10.0,
|
161 |
+
"step": 2480,
|
162 |
+
"total_flos": 7718163558521760.0,
|
163 |
+
"train_loss": 0.03527186407196906,
|
164 |
+
"train_runtime": 706.0488,
|
165 |
+
"train_samples_per_second": 224.46,
|
166 |
+
"train_steps_per_second": 3.513
|
167 |
+
}
|
168 |
+
],
|
169 |
+
"logging_steps": 500,
|
170 |
+
"max_steps": 2480,
|
171 |
+
"num_input_tokens_seen": 0,
|
172 |
+
"num_train_epochs": 10,
|
173 |
+
"save_steps": 500,
|
174 |
+
"stateful_callbacks": {
|
175 |
+
"TrainerControl": {
|
176 |
+
"args": {
|
177 |
+
"should_epoch_stop": false,
|
178 |
+
"should_evaluate": false,
|
179 |
+
"should_log": false,
|
180 |
+
"should_save": true,
|
181 |
+
"should_training_stop": true
|
182 |
+
},
|
183 |
+
"attributes": {}
|
184 |
+
}
|
185 |
+
},
|
186 |
+
"total_flos": 7718163558521760.0,
|
187 |
+
"train_batch_size": 32,
|
188 |
+
"trial_name": null,
|
189 |
+
"trial_params": null
|
190 |
+
}
|