Initial commit
Browse files- checkpoint.pt +3 -0
- loss.tsv +11 -0
- pytorch_model.bin +3 -0
- test.tsv +0 -0
- training.log +220 -0
- weights.txt +0 -0
checkpoint.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69c66a25142e5a518a96d679debadda50a1707b59b0c3d34a911eb2b15d5e876
|
3 |
+
size 1493635494
|
loss.tsv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS
|
2 |
+
1 07:56:20 0.0001 0.621609109068684
|
3 |
+
2 08:00:40 0.0000 0.18528141120861272
|
4 |
+
3 08:05:00 0.0000 0.1255173663716284
|
5 |
+
4 08:09:23 0.0000 0.09261398238630186
|
6 |
+
5 08:13:45 0.0000 0.06865140495198072
|
7 |
+
6 08:18:06 0.0000 0.047839101698062526
|
8 |
+
7 08:22:26 0.0000 0.03289963716871862
|
9 |
+
8 08:26:46 0.0000 0.02101287573210864
|
10 |
+
9 08:31:06 0.0000 0.014194353463989447
|
11 |
+
10 08:35:28 0.0000 0.008811923258925587
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c36701925a8fdf2df24d5cf95903953891f833839eb2da3a312a80893a50bbc1
|
3 |
+
size 1493636295
|
test.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training.log
ADDED
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-04-11 07:52:01,240 ----------------------------------------------------------------------------------------------------
|
2 |
+
2023-04-11 07:52:01,244 Model: "SequenceTagger(
|
3 |
+
(embeddings): TransformerWordEmbeddings(
|
4 |
+
(model): RobertaModel(
|
5 |
+
(embeddings): RobertaEmbeddings(
|
6 |
+
(word_embeddings): Embedding(50263, 768)
|
7 |
+
(position_embeddings): Embedding(514, 768, padding_idx=1)
|
8 |
+
(token_type_embeddings): Embedding(1, 768)
|
9 |
+
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
10 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
11 |
+
)
|
12 |
+
(encoder): RobertaEncoder(
|
13 |
+
(layer): ModuleList(
|
14 |
+
(0-11): 12 x RobertaLayer(
|
15 |
+
(attention): RobertaAttention(
|
16 |
+
(self): RobertaSelfAttention(
|
17 |
+
(query): Linear(in_features=768, out_features=768, bias=True)
|
18 |
+
(key): Linear(in_features=768, out_features=768, bias=True)
|
19 |
+
(value): Linear(in_features=768, out_features=768, bias=True)
|
20 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
21 |
+
)
|
22 |
+
(output): RobertaSelfOutput(
|
23 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
24 |
+
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
25 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
26 |
+
)
|
27 |
+
)
|
28 |
+
(intermediate): RobertaIntermediate(
|
29 |
+
(dense): Linear(in_features=768, out_features=3072, bias=True)
|
30 |
+
(intermediate_act_fn): GELUActivation()
|
31 |
+
)
|
32 |
+
(output): RobertaOutput(
|
33 |
+
(dense): Linear(in_features=3072, out_features=768, bias=True)
|
34 |
+
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
35 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
36 |
+
)
|
37 |
+
)
|
38 |
+
)
|
39 |
+
)
|
40 |
+
(pooler): RobertaPooler(
|
41 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
42 |
+
(activation): Tanh()
|
43 |
+
)
|
44 |
+
)
|
45 |
+
)
|
46 |
+
(locked_dropout): LockedDropout(p=0.5)
|
47 |
+
(linear): Linear(in_features=768, out_features=17, bias=True)
|
48 |
+
(loss_function): CrossEntropyLoss()
|
49 |
+
)"
|
50 |
+
2023-04-11 07:52:01,245 ----------------------------------------------------------------------------------------------------
|
51 |
+
2023-04-11 07:52:01,247 Corpus: "Corpus: 12554 train + 4549 dev + 4505 test sentences"
|
52 |
+
2023-04-11 07:52:01,248 ----------------------------------------------------------------------------------------------------
|
53 |
+
2023-04-11 07:52:01,250 Parameters:
|
54 |
+
2023-04-11 07:52:01,252 - learning_rate: "0.000050"
|
55 |
+
2023-04-11 07:52:01,253 - mini_batch_size: "4"
|
56 |
+
2023-04-11 07:52:01,254 - patience: "3"
|
57 |
+
2023-04-11 07:52:01,256 - anneal_factor: "0.5"
|
58 |
+
2023-04-11 07:52:01,257 - max_epochs: "10"
|
59 |
+
2023-04-11 07:52:01,258 - shuffle: "True"
|
60 |
+
2023-04-11 07:52:01,259 - train_with_dev: "True"
|
61 |
+
2023-04-11 07:52:01,260 - batch_growth_annealing: "False"
|
62 |
+
2023-04-11 07:52:01,262 ----------------------------------------------------------------------------------------------------
|
63 |
+
2023-04-11 07:52:01,264 Model training base path: "CREBMSP_results"
|
64 |
+
2023-04-11 07:52:01,265 ----------------------------------------------------------------------------------------------------
|
65 |
+
2023-04-11 07:52:01,266 Device: cuda
|
66 |
+
2023-04-11 07:52:01,267 ----------------------------------------------------------------------------------------------------
|
67 |
+
2023-04-11 07:52:01,269 Embeddings storage mode: none
|
68 |
+
2023-04-11 07:52:01,270 ----------------------------------------------------------------------------------------------------
|
69 |
+
2023-04-11 07:52:31,267 epoch 1 - iter 427/4276 - loss 1.87909215 - time (sec): 30.00 - samples/sec: 1446.87 - lr: 0.000005
|
70 |
+
2023-04-11 07:52:57,233 epoch 1 - iter 854/4276 - loss 1.32536726 - time (sec): 55.96 - samples/sec: 1540.07 - lr: 0.000010
|
71 |
+
2023-04-11 07:53:22,647 epoch 1 - iter 1281/4276 - loss 1.12000789 - time (sec): 81.38 - samples/sec: 1412.04 - lr: 0.000015
|
72 |
+
2023-04-11 07:53:48,118 epoch 1 - iter 1708/4276 - loss 1.00885882 - time (sec): 106.85 - samples/sec: 1268.03 - lr: 0.000020
|
73 |
+
2023-04-11 07:54:13,232 epoch 1 - iter 2135/4276 - loss 0.90793861 - time (sec): 131.96 - samples/sec: 1192.25 - lr: 0.000025
|
74 |
+
2023-04-11 07:54:38,606 epoch 1 - iter 2562/4276 - loss 0.83160292 - time (sec): 157.33 - samples/sec: 1137.84 - lr: 0.000030
|
75 |
+
2023-04-11 07:55:03,961 epoch 1 - iter 2989/4276 - loss 0.76685321 - time (sec): 182.69 - samples/sec: 1097.97 - lr: 0.000035
|
76 |
+
2023-04-11 07:55:29,860 epoch 1 - iter 3416/4276 - loss 0.68896532 - time (sec): 208.59 - samples/sec: 1129.83 - lr: 0.000040
|
77 |
+
2023-04-11 07:55:55,140 epoch 1 - iter 3843/4276 - loss 0.64980627 - time (sec): 233.87 - samples/sec: 1106.99 - lr: 0.000045
|
78 |
+
2023-04-11 07:56:20,160 epoch 1 - iter 4270/4276 - loss 0.62203959 - time (sec): 258.89 - samples/sec: 1070.06 - lr: 0.000050
|
79 |
+
2023-04-11 07:56:20,508 ----------------------------------------------------------------------------------------------------
|
80 |
+
2023-04-11 07:56:20,510 EPOCH 1 done: loss 0.6216 - lr 0.000050
|
81 |
+
2023-04-11 07:56:22,961 ----------------------------------------------------------------------------------------------------
|
82 |
+
2023-04-11 07:56:48,475 epoch 2 - iter 427/4276 - loss 0.20826646 - time (sec): 25.51 - samples/sec: 1089.16 - lr: 0.000049
|
83 |
+
2023-04-11 07:57:14,086 epoch 2 - iter 854/4276 - loss 0.19309402 - time (sec): 51.12 - samples/sec: 1086.38 - lr: 0.000049
|
84 |
+
2023-04-11 07:57:39,771 epoch 2 - iter 1281/4276 - loss 0.19314959 - time (sec): 76.81 - samples/sec: 1082.26 - lr: 0.000048
|
85 |
+
2023-04-11 07:58:05,813 epoch 2 - iter 1708/4276 - loss 0.18982202 - time (sec): 102.85 - samples/sec: 1076.96 - lr: 0.000048
|
86 |
+
2023-04-11 07:58:31,469 epoch 2 - iter 2135/4276 - loss 0.18835936 - time (sec): 128.51 - samples/sec: 1075.89 - lr: 0.000047
|
87 |
+
2023-04-11 07:58:57,254 epoch 2 - iter 2562/4276 - loss 0.18721166 - time (sec): 154.29 - samples/sec: 1077.05 - lr: 0.000047
|
88 |
+
2023-04-11 07:59:22,930 epoch 2 - iter 2989/4276 - loss 0.18831955 - time (sec): 179.97 - samples/sec: 1077.28 - lr: 0.000046
|
89 |
+
2023-04-11 07:59:48,986 epoch 2 - iter 3416/4276 - loss 0.18784028 - time (sec): 206.02 - samples/sec: 1073.12 - lr: 0.000046
|
90 |
+
2023-04-11 08:00:14,438 epoch 2 - iter 3843/4276 - loss 0.18631720 - time (sec): 231.48 - samples/sec: 1075.27 - lr: 0.000045
|
91 |
+
2023-04-11 08:00:40,029 epoch 2 - iter 4270/4276 - loss 0.18545112 - time (sec): 257.07 - samples/sec: 1077.05 - lr: 0.000044
|
92 |
+
2023-04-11 08:00:40,402 ----------------------------------------------------------------------------------------------------
|
93 |
+
2023-04-11 08:00:40,404 EPOCH 2 done: loss 0.1853 - lr 0.000044
|
94 |
+
2023-04-11 08:00:43,081 ----------------------------------------------------------------------------------------------------
|
95 |
+
2023-04-11 08:01:08,689 epoch 3 - iter 427/4276 - loss 0.10756568 - time (sec): 25.61 - samples/sec: 1077.73 - lr: 0.000044
|
96 |
+
2023-04-11 08:01:34,223 epoch 3 - iter 854/4276 - loss 0.11256584 - time (sec): 51.14 - samples/sec: 1067.61 - lr: 0.000043
|
97 |
+
2023-04-11 08:01:59,709 epoch 3 - iter 1281/4276 - loss 0.11766577 - time (sec): 76.63 - samples/sec: 1063.30 - lr: 0.000043
|
98 |
+
2023-04-11 08:02:25,508 epoch 3 - iter 1708/4276 - loss 0.11967896 - time (sec): 102.42 - samples/sec: 1069.08 - lr: 0.000042
|
99 |
+
2023-04-11 08:02:51,126 epoch 3 - iter 2135/4276 - loss 0.12272097 - time (sec): 128.04 - samples/sec: 1068.42 - lr: 0.000042
|
100 |
+
2023-04-11 08:03:16,785 epoch 3 - iter 2562/4276 - loss 0.12613423 - time (sec): 153.70 - samples/sec: 1070.88 - lr: 0.000041
|
101 |
+
2023-04-11 08:03:42,674 epoch 3 - iter 2989/4276 - loss 0.12434777 - time (sec): 179.59 - samples/sec: 1073.70 - lr: 0.000041
|
102 |
+
2023-04-11 08:04:08,548 epoch 3 - iter 3416/4276 - loss 0.12561538 - time (sec): 205.46 - samples/sec: 1076.38 - lr: 0.000040
|
103 |
+
2023-04-11 08:04:34,388 epoch 3 - iter 3843/4276 - loss 0.12639782 - time (sec): 231.31 - samples/sec: 1077.42 - lr: 0.000039
|
104 |
+
2023-04-11 08:05:00,280 epoch 3 - iter 4270/4276 - loss 0.12565441 - time (sec): 257.20 - samples/sec: 1077.04 - lr: 0.000039
|
105 |
+
2023-04-11 08:05:00,628 ----------------------------------------------------------------------------------------------------
|
106 |
+
2023-04-11 08:05:00,630 EPOCH 3 done: loss 0.1255 - lr 0.000039
|
107 |
+
2023-04-11 08:05:03,316 ----------------------------------------------------------------------------------------------------
|
108 |
+
2023-04-11 08:05:29,064 epoch 4 - iter 427/4276 - loss 0.07937009 - time (sec): 25.75 - samples/sec: 1093.59 - lr: 0.000038
|
109 |
+
2023-04-11 08:05:55,266 epoch 4 - iter 854/4276 - loss 0.08553328 - time (sec): 51.95 - samples/sec: 1096.96 - lr: 0.000038
|
110 |
+
2023-04-11 08:06:21,370 epoch 4 - iter 1281/4276 - loss 0.08226230 - time (sec): 78.05 - samples/sec: 1077.95 - lr: 0.000037
|
111 |
+
2023-04-11 08:06:47,652 epoch 4 - iter 1708/4276 - loss 0.08759891 - time (sec): 104.33 - samples/sec: 1073.69 - lr: 0.000037
|
112 |
+
2023-04-11 08:07:13,692 epoch 4 - iter 2135/4276 - loss 0.08892818 - time (sec): 130.37 - samples/sec: 1075.03 - lr: 0.000036
|
113 |
+
2023-04-11 08:07:39,673 epoch 4 - iter 2562/4276 - loss 0.09054387 - time (sec): 156.36 - samples/sec: 1070.47 - lr: 0.000036
|
114 |
+
2023-04-11 08:08:05,603 epoch 4 - iter 2989/4276 - loss 0.09010262 - time (sec): 182.29 - samples/sec: 1068.84 - lr: 0.000035
|
115 |
+
2023-04-11 08:08:31,466 epoch 4 - iter 3416/4276 - loss 0.09103521 - time (sec): 208.15 - samples/sec: 1064.43 - lr: 0.000034
|
116 |
+
2023-04-11 08:08:57,317 epoch 4 - iter 3843/4276 - loss 0.09209534 - time (sec): 234.00 - samples/sec: 1065.67 - lr: 0.000034
|
117 |
+
2023-04-11 08:09:23,268 epoch 4 - iter 4270/4276 - loss 0.09259541 - time (sec): 259.95 - samples/sec: 1065.57 - lr: 0.000033
|
118 |
+
2023-04-11 08:09:23,618 ----------------------------------------------------------------------------------------------------
|
119 |
+
2023-04-11 08:09:23,619 EPOCH 4 done: loss 0.0926 - lr 0.000033
|
120 |
+
2023-04-11 08:09:26,348 ----------------------------------------------------------------------------------------------------
|
121 |
+
2023-04-11 08:09:52,083 epoch 5 - iter 427/4276 - loss 0.05592755 - time (sec): 25.73 - samples/sec: 1089.14 - lr: 0.000033
|
122 |
+
2023-04-11 08:10:17,950 epoch 5 - iter 854/4276 - loss 0.06527284 - time (sec): 51.60 - samples/sec: 1056.57 - lr: 0.000032
|
123 |
+
2023-04-11 08:10:43,825 epoch 5 - iter 1281/4276 - loss 0.06153976 - time (sec): 77.47 - samples/sec: 1056.26 - lr: 0.000032
|
124 |
+
2023-04-11 08:11:09,692 epoch 5 - iter 1708/4276 - loss 0.06749125 - time (sec): 103.34 - samples/sec: 1063.57 - lr: 0.000031
|
125 |
+
2023-04-11 08:11:35,614 epoch 5 - iter 2135/4276 - loss 0.06839364 - time (sec): 129.26 - samples/sec: 1068.27 - lr: 0.000031
|
126 |
+
2023-04-11 08:12:01,303 epoch 5 - iter 2562/4276 - loss 0.06963346 - time (sec): 154.95 - samples/sec: 1066.16 - lr: 0.000030
|
127 |
+
2023-04-11 08:12:27,328 epoch 5 - iter 2989/4276 - loss 0.06933764 - time (sec): 180.98 - samples/sec: 1070.11 - lr: 0.000029
|
128 |
+
2023-04-11 08:12:53,272 epoch 5 - iter 3416/4276 - loss 0.06831147 - time (sec): 206.92 - samples/sec: 1068.24 - lr: 0.000029
|
129 |
+
2023-04-11 08:13:19,128 epoch 5 - iter 3843/4276 - loss 0.06885265 - time (sec): 232.78 - samples/sec: 1069.77 - lr: 0.000028
|
130 |
+
2023-04-11 08:13:44,881 epoch 5 - iter 4270/4276 - loss 0.06861645 - time (sec): 258.53 - samples/sec: 1071.43 - lr: 0.000028
|
131 |
+
2023-04-11 08:13:45,250 ----------------------------------------------------------------------------------------------------
|
132 |
+
2023-04-11 08:13:45,251 EPOCH 5 done: loss 0.0687 - lr 0.000028
|
133 |
+
2023-04-11 08:13:47,855 ----------------------------------------------------------------------------------------------------
|
134 |
+
2023-04-11 08:14:13,715 epoch 6 - iter 427/4276 - loss 0.04965217 - time (sec): 25.86 - samples/sec: 1047.26 - lr: 0.000027
|
135 |
+
2023-04-11 08:14:39,500 epoch 6 - iter 854/4276 - loss 0.05200554 - time (sec): 51.64 - samples/sec: 1043.00 - lr: 0.000027
|
136 |
+
2023-04-11 08:15:05,494 epoch 6 - iter 1281/4276 - loss 0.04883649 - time (sec): 77.63 - samples/sec: 1053.18 - lr: 0.000026
|
137 |
+
2023-04-11 08:15:31,675 epoch 6 - iter 1708/4276 - loss 0.04860057 - time (sec): 103.82 - samples/sec: 1062.16 - lr: 0.000026
|
138 |
+
2023-04-11 08:15:57,397 epoch 6 - iter 2135/4276 - loss 0.04686293 - time (sec): 129.54 - samples/sec: 1064.28 - lr: 0.000025
|
139 |
+
2023-04-11 08:16:23,066 epoch 6 - iter 2562/4276 - loss 0.04688968 - time (sec): 155.21 - samples/sec: 1075.47 - lr: 0.000024
|
140 |
+
2023-04-11 08:16:48,784 epoch 6 - iter 2989/4276 - loss 0.04738732 - time (sec): 180.92 - samples/sec: 1076.18 - lr: 0.000024
|
141 |
+
2023-04-11 08:17:14,472 epoch 6 - iter 3416/4276 - loss 0.04857132 - time (sec): 206.61 - samples/sec: 1078.35 - lr: 0.000023
|
142 |
+
2023-04-11 08:17:40,237 epoch 6 - iter 3843/4276 - loss 0.04764392 - time (sec): 232.38 - samples/sec: 1078.25 - lr: 0.000023
|
143 |
+
2023-04-11 08:18:05,989 epoch 6 - iter 4270/4276 - loss 0.04784009 - time (sec): 258.13 - samples/sec: 1072.85 - lr: 0.000022
|
144 |
+
2023-04-11 08:18:06,327 ----------------------------------------------------------------------------------------------------
|
145 |
+
2023-04-11 08:18:06,329 EPOCH 6 done: loss 0.0478 - lr 0.000022
|
146 |
+
2023-04-11 08:18:08,965 ----------------------------------------------------------------------------------------------------
|
147 |
+
2023-04-11 08:18:34,621 epoch 7 - iter 427/4276 - loss 0.04169676 - time (sec): 25.65 - samples/sec: 1078.45 - lr: 0.000022
|
148 |
+
2023-04-11 08:19:00,288 epoch 7 - iter 854/4276 - loss 0.03889063 - time (sec): 51.32 - samples/sec: 1079.22 - lr: 0.000021
|
149 |
+
2023-04-11 08:19:25,845 epoch 7 - iter 1281/4276 - loss 0.03600230 - time (sec): 76.88 - samples/sec: 1074.59 - lr: 0.000021
|
150 |
+
2023-04-11 08:19:51,633 epoch 7 - iter 1708/4276 - loss 0.03408375 - time (sec): 102.67 - samples/sec: 1069.48 - lr: 0.000020
|
151 |
+
2023-04-11 08:20:17,371 epoch 7 - iter 2135/4276 - loss 0.03496732 - time (sec): 128.40 - samples/sec: 1071.00 - lr: 0.000019
|
152 |
+
2023-04-11 08:20:43,117 epoch 7 - iter 2562/4276 - loss 0.03456081 - time (sec): 154.15 - samples/sec: 1076.58 - lr: 0.000019
|
153 |
+
2023-04-11 08:21:08,941 epoch 7 - iter 2989/4276 - loss 0.03472130 - time (sec): 179.97 - samples/sec: 1080.88 - lr: 0.000018
|
154 |
+
2023-04-11 08:21:34,633 epoch 7 - iter 3416/4276 - loss 0.03388419 - time (sec): 205.67 - samples/sec: 1082.92 - lr: 0.000018
|
155 |
+
2023-04-11 08:22:00,268 epoch 7 - iter 3843/4276 - loss 0.03321656 - time (sec): 231.30 - samples/sec: 1079.07 - lr: 0.000017
|
156 |
+
2023-04-11 08:22:26,001 epoch 7 - iter 4270/4276 - loss 0.03294924 - time (sec): 257.03 - samples/sec: 1077.38 - lr: 0.000017
|
157 |
+
2023-04-11 08:22:26,358 ----------------------------------------------------------------------------------------------------
|
158 |
+
2023-04-11 08:22:26,359 EPOCH 7 done: loss 0.0329 - lr 0.000017
|
159 |
+
2023-04-11 08:22:28,991 ----------------------------------------------------------------------------------------------------
|
160 |
+
2023-04-11 08:22:54,759 epoch 8 - iter 427/4276 - loss 0.01991391 - time (sec): 25.77 - samples/sec: 1091.09 - lr: 0.000016
|
161 |
+
2023-04-11 08:23:20,455 epoch 8 - iter 854/4276 - loss 0.02008748 - time (sec): 51.46 - samples/sec: 1087.44 - lr: 0.000016
|
162 |
+
2023-04-11 08:23:46,301 epoch 8 - iter 1281/4276 - loss 0.02071964 - time (sec): 77.31 - samples/sec: 1091.13 - lr: 0.000015
|
163 |
+
2023-04-11 08:24:12,005 epoch 8 - iter 1708/4276 - loss 0.02060885 - time (sec): 103.01 - samples/sec: 1086.76 - lr: 0.000014
|
164 |
+
2023-04-11 08:24:37,602 epoch 8 - iter 2135/4276 - loss 0.02230171 - time (sec): 128.61 - samples/sec: 1081.19 - lr: 0.000014
|
165 |
+
2023-04-11 08:25:03,104 epoch 8 - iter 2562/4276 - loss 0.02194943 - time (sec): 154.11 - samples/sec: 1081.02 - lr: 0.000013
|
166 |
+
2023-04-11 08:25:28,792 epoch 8 - iter 2989/4276 - loss 0.02166994 - time (sec): 179.80 - samples/sec: 1081.85 - lr: 0.000013
|
167 |
+
2023-04-11 08:25:54,314 epoch 8 - iter 3416/4276 - loss 0.02079076 - time (sec): 205.32 - samples/sec: 1078.58 - lr: 0.000012
|
168 |
+
2023-04-11 08:26:19,932 epoch 8 - iter 3843/4276 - loss 0.02085187 - time (sec): 230.94 - samples/sec: 1077.62 - lr: 0.000012
|
169 |
+
2023-04-11 08:26:45,880 epoch 8 - iter 4270/4276 - loss 0.02104430 - time (sec): 256.89 - samples/sec: 1077.94 - lr: 0.000011
|
170 |
+
2023-04-11 08:26:46,229 ----------------------------------------------------------------------------------------------------
|
171 |
+
2023-04-11 08:26:46,231 EPOCH 8 done: loss 0.0210 - lr 0.000011
|
172 |
+
2023-04-11 08:26:48,847 ----------------------------------------------------------------------------------------------------
|
173 |
+
2023-04-11 08:27:14,788 epoch 9 - iter 427/4276 - loss 0.01704588 - time (sec): 25.94 - samples/sec: 1092.91 - lr: 0.000011
|
174 |
+
2023-04-11 08:27:40,564 epoch 9 - iter 854/4276 - loss 0.01373665 - time (sec): 51.72 - samples/sec: 1083.59 - lr: 0.000010
|
175 |
+
2023-04-11 08:28:06,247 epoch 9 - iter 1281/4276 - loss 0.01269875 - time (sec): 77.40 - samples/sec: 1099.73 - lr: 0.000009
|
176 |
+
2023-04-11 08:28:31,749 epoch 9 - iter 1708/4276 - loss 0.01307406 - time (sec): 102.90 - samples/sec: 1092.49 - lr: 0.000009
|
177 |
+
2023-04-11 08:28:57,340 epoch 9 - iter 2135/4276 - loss 0.01330464 - time (sec): 128.49 - samples/sec: 1083.63 - lr: 0.000008
|
178 |
+
2023-04-11 08:29:23,005 epoch 9 - iter 2562/4276 - loss 0.01323370 - time (sec): 154.16 - samples/sec: 1084.86 - lr: 0.000008
|
179 |
+
2023-04-11 08:29:48,714 epoch 9 - iter 2989/4276 - loss 0.01356354 - time (sec): 179.87 - samples/sec: 1081.40 - lr: 0.000007
|
180 |
+
2023-04-11 08:30:14,522 epoch 9 - iter 3416/4276 - loss 0.01333538 - time (sec): 205.67 - samples/sec: 1080.12 - lr: 0.000007
|
181 |
+
2023-04-11 08:30:40,139 epoch 9 - iter 3843/4276 - loss 0.01382847 - time (sec): 231.29 - samples/sec: 1076.78 - lr: 0.000006
|
182 |
+
2023-04-11 08:31:05,963 epoch 9 - iter 4270/4276 - loss 0.01417043 - time (sec): 257.11 - samples/sec: 1077.64 - lr: 0.000006
|
183 |
+
2023-04-11 08:31:06,310 ----------------------------------------------------------------------------------------------------
|
184 |
+
2023-04-11 08:31:06,312 EPOCH 9 done: loss 0.0142 - lr 0.000006
|
185 |
+
2023-04-11 08:31:08,911 ----------------------------------------------------------------------------------------------------
|
186 |
+
2023-04-11 08:31:34,627 epoch 10 - iter 427/4276 - loss 0.00788266 - time (sec): 25.71 - samples/sec: 1100.38 - lr: 0.000005
|
187 |
+
2023-04-11 08:32:00,278 epoch 10 - iter 854/4276 - loss 0.00916004 - time (sec): 51.37 - samples/sec: 1082.68 - lr: 0.000004
|
188 |
+
2023-04-11 08:32:25,952 epoch 10 - iter 1281/4276 - loss 0.00947741 - time (sec): 77.04 - samples/sec: 1084.11 - lr: 0.000004
|
189 |
+
2023-04-11 08:32:51,619 epoch 10 - iter 1708/4276 - loss 0.00922028 - time (sec): 102.71 - samples/sec: 1082.23 - lr: 0.000003
|
190 |
+
2023-04-11 08:33:17,397 epoch 10 - iter 2135/4276 - loss 0.00924503 - time (sec): 128.48 - samples/sec: 1087.21 - lr: 0.000003
|
191 |
+
2023-04-11 08:33:43,209 epoch 10 - iter 2562/4276 - loss 0.00928543 - time (sec): 154.30 - samples/sec: 1085.54 - lr: 0.000002
|
192 |
+
2023-04-11 08:34:09,247 epoch 10 - iter 2989/4276 - loss 0.00893538 - time (sec): 180.33 - samples/sec: 1082.30 - lr: 0.000002
|
193 |
+
2023-04-11 08:34:35,096 epoch 10 - iter 3416/4276 - loss 0.00939691 - time (sec): 206.18 - samples/sec: 1079.56 - lr: 0.000001
|
194 |
+
2023-04-11 08:35:01,291 epoch 10 - iter 3843/4276 - loss 0.00881917 - time (sec): 232.38 - samples/sec: 1073.84 - lr: 0.000001
|
195 |
+
2023-04-11 08:35:27,885 epoch 10 - iter 4270/4276 - loss 0.00882288 - time (sec): 258.97 - samples/sec: 1069.59 - lr: 0.000000
|
196 |
+
2023-04-11 08:35:28,233 ----------------------------------------------------------------------------------------------------
|
197 |
+
2023-04-11 08:35:28,234 EPOCH 10 done: loss 0.0088 - lr 0.000000
|
198 |
+
2023-04-11 08:35:36,527 ----------------------------------------------------------------------------------------------------
|
199 |
+
2023-04-11 08:35:36,530 Testing using last state of model ...
|
200 |
+
2023-04-11 08:36:06,557 Evaluating as a multi-label problem: False
|
201 |
+
2023-04-11 08:36:06,627 0.877 0.884 0.8805 0.7929
|
202 |
+
2023-04-11 08:36:06,629
|
203 |
+
Results:
|
204 |
+
- F-score (micro) 0.8805
|
205 |
+
- F-score (macro) 0.8612
|
206 |
+
- Accuracy 0.7929
|
207 |
+
|
208 |
+
By class:
|
209 |
+
precision recall f1-score support
|
210 |
+
|
211 |
+
PROC 0.8581 0.8811 0.8695 3364
|
212 |
+
DISO 0.8911 0.8908 0.8910 2472
|
213 |
+
CHEM 0.9091 0.9073 0.9082 1565
|
214 |
+
ANAT 0.8082 0.7468 0.7763 316
|
215 |
+
|
216 |
+
micro avg 0.8770 0.8840 0.8805 7717
|
217 |
+
macro avg 0.8666 0.8565 0.8612 7717
|
218 |
+
weighted avg 0.8770 0.8840 0.8804 7717
|
219 |
+
|
220 |
+
2023-04-11 08:36:06,629 ----------------------------------------------------------------------------------------------------
|
weights.txt
ADDED
File without changes
|