roscazo commited on
Commit
6f11da3
·
1 Parent(s): af6bf56

Initial commit

Browse files
Files changed (6) hide show
  1. checkpoint.pt +3 -0
  2. loss.tsv +11 -0
  3. pytorch_model.bin +3 -0
  4. test.tsv +0 -0
  5. training.log +220 -0
  6. weights.txt +0 -0
checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c66a25142e5a518a96d679debadda50a1707b59b0c3d34a911eb2b15d5e876
3
+ size 1493635494
loss.tsv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS
2
+ 1 07:56:20 0.0001 0.621609109068684
3
+ 2 08:00:40 0.0000 0.18528141120861272
4
+ 3 08:05:00 0.0000 0.1255173663716284
5
+ 4 08:09:23 0.0000 0.09261398238630186
6
+ 5 08:13:45 0.0000 0.06865140495198072
7
+ 6 08:18:06 0.0000 0.047839101698062526
8
+ 7 08:22:26 0.0000 0.03289963716871862
9
+ 8 08:26:46 0.0000 0.02101287573210864
10
+ 9 08:31:06 0.0000 0.014194353463989447
11
+ 10 08:35:28 0.0000 0.008811923258925587
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c36701925a8fdf2df24d5cf95903953891f833839eb2da3a312a80893a50bbc1
3
+ size 1493636295
test.tsv ADDED
The diff for this file is too large to render. See raw diff
 
training.log ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-04-11 07:52:01,240 ----------------------------------------------------------------------------------------------------
2
+ 2023-04-11 07:52:01,244 Model: "SequenceTagger(
3
+ (embeddings): TransformerWordEmbeddings(
4
+ (model): RobertaModel(
5
+ (embeddings): RobertaEmbeddings(
6
+ (word_embeddings): Embedding(50263, 768)
7
+ (position_embeddings): Embedding(514, 768, padding_idx=1)
8
+ (token_type_embeddings): Embedding(1, 768)
9
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
10
+ (dropout): Dropout(p=0.1, inplace=False)
11
+ )
12
+ (encoder): RobertaEncoder(
13
+ (layer): ModuleList(
14
+ (0-11): 12 x RobertaLayer(
15
+ (attention): RobertaAttention(
16
+ (self): RobertaSelfAttention(
17
+ (query): Linear(in_features=768, out_features=768, bias=True)
18
+ (key): Linear(in_features=768, out_features=768, bias=True)
19
+ (value): Linear(in_features=768, out_features=768, bias=True)
20
+ (dropout): Dropout(p=0.1, inplace=False)
21
+ )
22
+ (output): RobertaSelfOutput(
23
+ (dense): Linear(in_features=768, out_features=768, bias=True)
24
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
25
+ (dropout): Dropout(p=0.1, inplace=False)
26
+ )
27
+ )
28
+ (intermediate): RobertaIntermediate(
29
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
30
+ (intermediate_act_fn): GELUActivation()
31
+ )
32
+ (output): RobertaOutput(
33
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
34
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
35
+ (dropout): Dropout(p=0.1, inplace=False)
36
+ )
37
+ )
38
+ )
39
+ )
40
+ (pooler): RobertaPooler(
41
+ (dense): Linear(in_features=768, out_features=768, bias=True)
42
+ (activation): Tanh()
43
+ )
44
+ )
45
+ )
46
+ (locked_dropout): LockedDropout(p=0.5)
47
+ (linear): Linear(in_features=768, out_features=17, bias=True)
48
+ (loss_function): CrossEntropyLoss()
49
+ )"
50
+ 2023-04-11 07:52:01,245 ----------------------------------------------------------------------------------------------------
51
+ 2023-04-11 07:52:01,247 Corpus: "Corpus: 12554 train + 4549 dev + 4505 test sentences"
52
+ 2023-04-11 07:52:01,248 ----------------------------------------------------------------------------------------------------
53
+ 2023-04-11 07:52:01,250 Parameters:
54
+ 2023-04-11 07:52:01,252 - learning_rate: "0.000050"
55
+ 2023-04-11 07:52:01,253 - mini_batch_size: "4"
56
+ 2023-04-11 07:52:01,254 - patience: "3"
57
+ 2023-04-11 07:52:01,256 - anneal_factor: "0.5"
58
+ 2023-04-11 07:52:01,257 - max_epochs: "10"
59
+ 2023-04-11 07:52:01,258 - shuffle: "True"
60
+ 2023-04-11 07:52:01,259 - train_with_dev: "True"
61
+ 2023-04-11 07:52:01,260 - batch_growth_annealing: "False"
62
+ 2023-04-11 07:52:01,262 ----------------------------------------------------------------------------------------------------
63
+ 2023-04-11 07:52:01,264 Model training base path: "CREBMSP_results"
64
+ 2023-04-11 07:52:01,265 ----------------------------------------------------------------------------------------------------
65
+ 2023-04-11 07:52:01,266 Device: cuda
66
+ 2023-04-11 07:52:01,267 ----------------------------------------------------------------------------------------------------
67
+ 2023-04-11 07:52:01,269 Embeddings storage mode: none
68
+ 2023-04-11 07:52:01,270 ----------------------------------------------------------------------------------------------------
69
+ 2023-04-11 07:52:31,267 epoch 1 - iter 427/4276 - loss 1.87909215 - time (sec): 30.00 - samples/sec: 1446.87 - lr: 0.000005
70
+ 2023-04-11 07:52:57,233 epoch 1 - iter 854/4276 - loss 1.32536726 - time (sec): 55.96 - samples/sec: 1540.07 - lr: 0.000010
71
+ 2023-04-11 07:53:22,647 epoch 1 - iter 1281/4276 - loss 1.12000789 - time (sec): 81.38 - samples/sec: 1412.04 - lr: 0.000015
72
+ 2023-04-11 07:53:48,118 epoch 1 - iter 1708/4276 - loss 1.00885882 - time (sec): 106.85 - samples/sec: 1268.03 - lr: 0.000020
73
+ 2023-04-11 07:54:13,232 epoch 1 - iter 2135/4276 - loss 0.90793861 - time (sec): 131.96 - samples/sec: 1192.25 - lr: 0.000025
74
+ 2023-04-11 07:54:38,606 epoch 1 - iter 2562/4276 - loss 0.83160292 - time (sec): 157.33 - samples/sec: 1137.84 - lr: 0.000030
75
+ 2023-04-11 07:55:03,961 epoch 1 - iter 2989/4276 - loss 0.76685321 - time (sec): 182.69 - samples/sec: 1097.97 - lr: 0.000035
76
+ 2023-04-11 07:55:29,860 epoch 1 - iter 3416/4276 - loss 0.68896532 - time (sec): 208.59 - samples/sec: 1129.83 - lr: 0.000040
77
+ 2023-04-11 07:55:55,140 epoch 1 - iter 3843/4276 - loss 0.64980627 - time (sec): 233.87 - samples/sec: 1106.99 - lr: 0.000045
78
+ 2023-04-11 07:56:20,160 epoch 1 - iter 4270/4276 - loss 0.62203959 - time (sec): 258.89 - samples/sec: 1070.06 - lr: 0.000050
79
+ 2023-04-11 07:56:20,508 ----------------------------------------------------------------------------------------------------
80
+ 2023-04-11 07:56:20,510 EPOCH 1 done: loss 0.6216 - lr 0.000050
81
+ 2023-04-11 07:56:22,961 ----------------------------------------------------------------------------------------------------
82
+ 2023-04-11 07:56:48,475 epoch 2 - iter 427/4276 - loss 0.20826646 - time (sec): 25.51 - samples/sec: 1089.16 - lr: 0.000049
83
+ 2023-04-11 07:57:14,086 epoch 2 - iter 854/4276 - loss 0.19309402 - time (sec): 51.12 - samples/sec: 1086.38 - lr: 0.000049
84
+ 2023-04-11 07:57:39,771 epoch 2 - iter 1281/4276 - loss 0.19314959 - time (sec): 76.81 - samples/sec: 1082.26 - lr: 0.000048
85
+ 2023-04-11 07:58:05,813 epoch 2 - iter 1708/4276 - loss 0.18982202 - time (sec): 102.85 - samples/sec: 1076.96 - lr: 0.000048
86
+ 2023-04-11 07:58:31,469 epoch 2 - iter 2135/4276 - loss 0.18835936 - time (sec): 128.51 - samples/sec: 1075.89 - lr: 0.000047
87
+ 2023-04-11 07:58:57,254 epoch 2 - iter 2562/4276 - loss 0.18721166 - time (sec): 154.29 - samples/sec: 1077.05 - lr: 0.000047
88
+ 2023-04-11 07:59:22,930 epoch 2 - iter 2989/4276 - loss 0.18831955 - time (sec): 179.97 - samples/sec: 1077.28 - lr: 0.000046
89
+ 2023-04-11 07:59:48,986 epoch 2 - iter 3416/4276 - loss 0.18784028 - time (sec): 206.02 - samples/sec: 1073.12 - lr: 0.000046
90
+ 2023-04-11 08:00:14,438 epoch 2 - iter 3843/4276 - loss 0.18631720 - time (sec): 231.48 - samples/sec: 1075.27 - lr: 0.000045
91
+ 2023-04-11 08:00:40,029 epoch 2 - iter 4270/4276 - loss 0.18545112 - time (sec): 257.07 - samples/sec: 1077.05 - lr: 0.000044
92
+ 2023-04-11 08:00:40,402 ----------------------------------------------------------------------------------------------------
93
+ 2023-04-11 08:00:40,404 EPOCH 2 done: loss 0.1853 - lr 0.000044
94
+ 2023-04-11 08:00:43,081 ----------------------------------------------------------------------------------------------------
95
+ 2023-04-11 08:01:08,689 epoch 3 - iter 427/4276 - loss 0.10756568 - time (sec): 25.61 - samples/sec: 1077.73 - lr: 0.000044
96
+ 2023-04-11 08:01:34,223 epoch 3 - iter 854/4276 - loss 0.11256584 - time (sec): 51.14 - samples/sec: 1067.61 - lr: 0.000043
97
+ 2023-04-11 08:01:59,709 epoch 3 - iter 1281/4276 - loss 0.11766577 - time (sec): 76.63 - samples/sec: 1063.30 - lr: 0.000043
98
+ 2023-04-11 08:02:25,508 epoch 3 - iter 1708/4276 - loss 0.11967896 - time (sec): 102.42 - samples/sec: 1069.08 - lr: 0.000042
99
+ 2023-04-11 08:02:51,126 epoch 3 - iter 2135/4276 - loss 0.12272097 - time (sec): 128.04 - samples/sec: 1068.42 - lr: 0.000042
100
+ 2023-04-11 08:03:16,785 epoch 3 - iter 2562/4276 - loss 0.12613423 - time (sec): 153.70 - samples/sec: 1070.88 - lr: 0.000041
101
+ 2023-04-11 08:03:42,674 epoch 3 - iter 2989/4276 - loss 0.12434777 - time (sec): 179.59 - samples/sec: 1073.70 - lr: 0.000041
102
+ 2023-04-11 08:04:08,548 epoch 3 - iter 3416/4276 - loss 0.12561538 - time (sec): 205.46 - samples/sec: 1076.38 - lr: 0.000040
103
+ 2023-04-11 08:04:34,388 epoch 3 - iter 3843/4276 - loss 0.12639782 - time (sec): 231.31 - samples/sec: 1077.42 - lr: 0.000039
104
+ 2023-04-11 08:05:00,280 epoch 3 - iter 4270/4276 - loss 0.12565441 - time (sec): 257.20 - samples/sec: 1077.04 - lr: 0.000039
105
+ 2023-04-11 08:05:00,628 ----------------------------------------------------------------------------------------------------
106
+ 2023-04-11 08:05:00,630 EPOCH 3 done: loss 0.1255 - lr 0.000039
107
+ 2023-04-11 08:05:03,316 ----------------------------------------------------------------------------------------------------
108
+ 2023-04-11 08:05:29,064 epoch 4 - iter 427/4276 - loss 0.07937009 - time (sec): 25.75 - samples/sec: 1093.59 - lr: 0.000038
109
+ 2023-04-11 08:05:55,266 epoch 4 - iter 854/4276 - loss 0.08553328 - time (sec): 51.95 - samples/sec: 1096.96 - lr: 0.000038
110
+ 2023-04-11 08:06:21,370 epoch 4 - iter 1281/4276 - loss 0.08226230 - time (sec): 78.05 - samples/sec: 1077.95 - lr: 0.000037
111
+ 2023-04-11 08:06:47,652 epoch 4 - iter 1708/4276 - loss 0.08759891 - time (sec): 104.33 - samples/sec: 1073.69 - lr: 0.000037
112
+ 2023-04-11 08:07:13,692 epoch 4 - iter 2135/4276 - loss 0.08892818 - time (sec): 130.37 - samples/sec: 1075.03 - lr: 0.000036
113
+ 2023-04-11 08:07:39,673 epoch 4 - iter 2562/4276 - loss 0.09054387 - time (sec): 156.36 - samples/sec: 1070.47 - lr: 0.000036
114
+ 2023-04-11 08:08:05,603 epoch 4 - iter 2989/4276 - loss 0.09010262 - time (sec): 182.29 - samples/sec: 1068.84 - lr: 0.000035
115
+ 2023-04-11 08:08:31,466 epoch 4 - iter 3416/4276 - loss 0.09103521 - time (sec): 208.15 - samples/sec: 1064.43 - lr: 0.000034
116
+ 2023-04-11 08:08:57,317 epoch 4 - iter 3843/4276 - loss 0.09209534 - time (sec): 234.00 - samples/sec: 1065.67 - lr: 0.000034
117
+ 2023-04-11 08:09:23,268 epoch 4 - iter 4270/4276 - loss 0.09259541 - time (sec): 259.95 - samples/sec: 1065.57 - lr: 0.000033
118
+ 2023-04-11 08:09:23,618 ----------------------------------------------------------------------------------------------------
119
+ 2023-04-11 08:09:23,619 EPOCH 4 done: loss 0.0926 - lr 0.000033
120
+ 2023-04-11 08:09:26,348 ----------------------------------------------------------------------------------------------------
121
+ 2023-04-11 08:09:52,083 epoch 5 - iter 427/4276 - loss 0.05592755 - time (sec): 25.73 - samples/sec: 1089.14 - lr: 0.000033
122
+ 2023-04-11 08:10:17,950 epoch 5 - iter 854/4276 - loss 0.06527284 - time (sec): 51.60 - samples/sec: 1056.57 - lr: 0.000032
123
+ 2023-04-11 08:10:43,825 epoch 5 - iter 1281/4276 - loss 0.06153976 - time (sec): 77.47 - samples/sec: 1056.26 - lr: 0.000032
124
+ 2023-04-11 08:11:09,692 epoch 5 - iter 1708/4276 - loss 0.06749125 - time (sec): 103.34 - samples/sec: 1063.57 - lr: 0.000031
125
+ 2023-04-11 08:11:35,614 epoch 5 - iter 2135/4276 - loss 0.06839364 - time (sec): 129.26 - samples/sec: 1068.27 - lr: 0.000031
126
+ 2023-04-11 08:12:01,303 epoch 5 - iter 2562/4276 - loss 0.06963346 - time (sec): 154.95 - samples/sec: 1066.16 - lr: 0.000030
127
+ 2023-04-11 08:12:27,328 epoch 5 - iter 2989/4276 - loss 0.06933764 - time (sec): 180.98 - samples/sec: 1070.11 - lr: 0.000029
128
+ 2023-04-11 08:12:53,272 epoch 5 - iter 3416/4276 - loss 0.06831147 - time (sec): 206.92 - samples/sec: 1068.24 - lr: 0.000029
129
+ 2023-04-11 08:13:19,128 epoch 5 - iter 3843/4276 - loss 0.06885265 - time (sec): 232.78 - samples/sec: 1069.77 - lr: 0.000028
130
+ 2023-04-11 08:13:44,881 epoch 5 - iter 4270/4276 - loss 0.06861645 - time (sec): 258.53 - samples/sec: 1071.43 - lr: 0.000028
131
+ 2023-04-11 08:13:45,250 ----------------------------------------------------------------------------------------------------
132
+ 2023-04-11 08:13:45,251 EPOCH 5 done: loss 0.0687 - lr 0.000028
133
+ 2023-04-11 08:13:47,855 ----------------------------------------------------------------------------------------------------
134
+ 2023-04-11 08:14:13,715 epoch 6 - iter 427/4276 - loss 0.04965217 - time (sec): 25.86 - samples/sec: 1047.26 - lr: 0.000027
135
+ 2023-04-11 08:14:39,500 epoch 6 - iter 854/4276 - loss 0.05200554 - time (sec): 51.64 - samples/sec: 1043.00 - lr: 0.000027
136
+ 2023-04-11 08:15:05,494 epoch 6 - iter 1281/4276 - loss 0.04883649 - time (sec): 77.63 - samples/sec: 1053.18 - lr: 0.000026
137
+ 2023-04-11 08:15:31,675 epoch 6 - iter 1708/4276 - loss 0.04860057 - time (sec): 103.82 - samples/sec: 1062.16 - lr: 0.000026
138
+ 2023-04-11 08:15:57,397 epoch 6 - iter 2135/4276 - loss 0.04686293 - time (sec): 129.54 - samples/sec: 1064.28 - lr: 0.000025
139
+ 2023-04-11 08:16:23,066 epoch 6 - iter 2562/4276 - loss 0.04688968 - time (sec): 155.21 - samples/sec: 1075.47 - lr: 0.000024
140
+ 2023-04-11 08:16:48,784 epoch 6 - iter 2989/4276 - loss 0.04738732 - time (sec): 180.92 - samples/sec: 1076.18 - lr: 0.000024
141
+ 2023-04-11 08:17:14,472 epoch 6 - iter 3416/4276 - loss 0.04857132 - time (sec): 206.61 - samples/sec: 1078.35 - lr: 0.000023
142
+ 2023-04-11 08:17:40,237 epoch 6 - iter 3843/4276 - loss 0.04764392 - time (sec): 232.38 - samples/sec: 1078.25 - lr: 0.000023
143
+ 2023-04-11 08:18:05,989 epoch 6 - iter 4270/4276 - loss 0.04784009 - time (sec): 258.13 - samples/sec: 1072.85 - lr: 0.000022
144
+ 2023-04-11 08:18:06,327 ----------------------------------------------------------------------------------------------------
145
+ 2023-04-11 08:18:06,329 EPOCH 6 done: loss 0.0478 - lr 0.000022
146
+ 2023-04-11 08:18:08,965 ----------------------------------------------------------------------------------------------------
147
+ 2023-04-11 08:18:34,621 epoch 7 - iter 427/4276 - loss 0.04169676 - time (sec): 25.65 - samples/sec: 1078.45 - lr: 0.000022
148
+ 2023-04-11 08:19:00,288 epoch 7 - iter 854/4276 - loss 0.03889063 - time (sec): 51.32 - samples/sec: 1079.22 - lr: 0.000021
149
+ 2023-04-11 08:19:25,845 epoch 7 - iter 1281/4276 - loss 0.03600230 - time (sec): 76.88 - samples/sec: 1074.59 - lr: 0.000021
150
+ 2023-04-11 08:19:51,633 epoch 7 - iter 1708/4276 - loss 0.03408375 - time (sec): 102.67 - samples/sec: 1069.48 - lr: 0.000020
151
+ 2023-04-11 08:20:17,371 epoch 7 - iter 2135/4276 - loss 0.03496732 - time (sec): 128.40 - samples/sec: 1071.00 - lr: 0.000019
152
+ 2023-04-11 08:20:43,117 epoch 7 - iter 2562/4276 - loss 0.03456081 - time (sec): 154.15 - samples/sec: 1076.58 - lr: 0.000019
153
+ 2023-04-11 08:21:08,941 epoch 7 - iter 2989/4276 - loss 0.03472130 - time (sec): 179.97 - samples/sec: 1080.88 - lr: 0.000018
154
+ 2023-04-11 08:21:34,633 epoch 7 - iter 3416/4276 - loss 0.03388419 - time (sec): 205.67 - samples/sec: 1082.92 - lr: 0.000018
155
+ 2023-04-11 08:22:00,268 epoch 7 - iter 3843/4276 - loss 0.03321656 - time (sec): 231.30 - samples/sec: 1079.07 - lr: 0.000017
156
+ 2023-04-11 08:22:26,001 epoch 7 - iter 4270/4276 - loss 0.03294924 - time (sec): 257.03 - samples/sec: 1077.38 - lr: 0.000017
157
+ 2023-04-11 08:22:26,358 ----------------------------------------------------------------------------------------------------
158
+ 2023-04-11 08:22:26,359 EPOCH 7 done: loss 0.0329 - lr 0.000017
159
+ 2023-04-11 08:22:28,991 ----------------------------------------------------------------------------------------------------
160
+ 2023-04-11 08:22:54,759 epoch 8 - iter 427/4276 - loss 0.01991391 - time (sec): 25.77 - samples/sec: 1091.09 - lr: 0.000016
161
+ 2023-04-11 08:23:20,455 epoch 8 - iter 854/4276 - loss 0.02008748 - time (sec): 51.46 - samples/sec: 1087.44 - lr: 0.000016
162
+ 2023-04-11 08:23:46,301 epoch 8 - iter 1281/4276 - loss 0.02071964 - time (sec): 77.31 - samples/sec: 1091.13 - lr: 0.000015
163
+ 2023-04-11 08:24:12,005 epoch 8 - iter 1708/4276 - loss 0.02060885 - time (sec): 103.01 - samples/sec: 1086.76 - lr: 0.000014
164
+ 2023-04-11 08:24:37,602 epoch 8 - iter 2135/4276 - loss 0.02230171 - time (sec): 128.61 - samples/sec: 1081.19 - lr: 0.000014
165
+ 2023-04-11 08:25:03,104 epoch 8 - iter 2562/4276 - loss 0.02194943 - time (sec): 154.11 - samples/sec: 1081.02 - lr: 0.000013
166
+ 2023-04-11 08:25:28,792 epoch 8 - iter 2989/4276 - loss 0.02166994 - time (sec): 179.80 - samples/sec: 1081.85 - lr: 0.000013
167
+ 2023-04-11 08:25:54,314 epoch 8 - iter 3416/4276 - loss 0.02079076 - time (sec): 205.32 - samples/sec: 1078.58 - lr: 0.000012
168
+ 2023-04-11 08:26:19,932 epoch 8 - iter 3843/4276 - loss 0.02085187 - time (sec): 230.94 - samples/sec: 1077.62 - lr: 0.000012
169
+ 2023-04-11 08:26:45,880 epoch 8 - iter 4270/4276 - loss 0.02104430 - time (sec): 256.89 - samples/sec: 1077.94 - lr: 0.000011
170
+ 2023-04-11 08:26:46,229 ----------------------------------------------------------------------------------------------------
171
+ 2023-04-11 08:26:46,231 EPOCH 8 done: loss 0.0210 - lr 0.000011
172
+ 2023-04-11 08:26:48,847 ----------------------------------------------------------------------------------------------------
173
+ 2023-04-11 08:27:14,788 epoch 9 - iter 427/4276 - loss 0.01704588 - time (sec): 25.94 - samples/sec: 1092.91 - lr: 0.000011
174
+ 2023-04-11 08:27:40,564 epoch 9 - iter 854/4276 - loss 0.01373665 - time (sec): 51.72 - samples/sec: 1083.59 - lr: 0.000010
175
+ 2023-04-11 08:28:06,247 epoch 9 - iter 1281/4276 - loss 0.01269875 - time (sec): 77.40 - samples/sec: 1099.73 - lr: 0.000009
176
+ 2023-04-11 08:28:31,749 epoch 9 - iter 1708/4276 - loss 0.01307406 - time (sec): 102.90 - samples/sec: 1092.49 - lr: 0.000009
177
+ 2023-04-11 08:28:57,340 epoch 9 - iter 2135/4276 - loss 0.01330464 - time (sec): 128.49 - samples/sec: 1083.63 - lr: 0.000008
178
+ 2023-04-11 08:29:23,005 epoch 9 - iter 2562/4276 - loss 0.01323370 - time (sec): 154.16 - samples/sec: 1084.86 - lr: 0.000008
179
+ 2023-04-11 08:29:48,714 epoch 9 - iter 2989/4276 - loss 0.01356354 - time (sec): 179.87 - samples/sec: 1081.40 - lr: 0.000007
180
+ 2023-04-11 08:30:14,522 epoch 9 - iter 3416/4276 - loss 0.01333538 - time (sec): 205.67 - samples/sec: 1080.12 - lr: 0.000007
181
+ 2023-04-11 08:30:40,139 epoch 9 - iter 3843/4276 - loss 0.01382847 - time (sec): 231.29 - samples/sec: 1076.78 - lr: 0.000006
182
+ 2023-04-11 08:31:05,963 epoch 9 - iter 4270/4276 - loss 0.01417043 - time (sec): 257.11 - samples/sec: 1077.64 - lr: 0.000006
183
+ 2023-04-11 08:31:06,310 ----------------------------------------------------------------------------------------------------
184
+ 2023-04-11 08:31:06,312 EPOCH 9 done: loss 0.0142 - lr 0.000006
185
+ 2023-04-11 08:31:08,911 ----------------------------------------------------------------------------------------------------
186
+ 2023-04-11 08:31:34,627 epoch 10 - iter 427/4276 - loss 0.00788266 - time (sec): 25.71 - samples/sec: 1100.38 - lr: 0.000005
187
+ 2023-04-11 08:32:00,278 epoch 10 - iter 854/4276 - loss 0.00916004 - time (sec): 51.37 - samples/sec: 1082.68 - lr: 0.000004
188
+ 2023-04-11 08:32:25,952 epoch 10 - iter 1281/4276 - loss 0.00947741 - time (sec): 77.04 - samples/sec: 1084.11 - lr: 0.000004
189
+ 2023-04-11 08:32:51,619 epoch 10 - iter 1708/4276 - loss 0.00922028 - time (sec): 102.71 - samples/sec: 1082.23 - lr: 0.000003
190
+ 2023-04-11 08:33:17,397 epoch 10 - iter 2135/4276 - loss 0.00924503 - time (sec): 128.48 - samples/sec: 1087.21 - lr: 0.000003
191
+ 2023-04-11 08:33:43,209 epoch 10 - iter 2562/4276 - loss 0.00928543 - time (sec): 154.30 - samples/sec: 1085.54 - lr: 0.000002
192
+ 2023-04-11 08:34:09,247 epoch 10 - iter 2989/4276 - loss 0.00893538 - time (sec): 180.33 - samples/sec: 1082.30 - lr: 0.000002
193
+ 2023-04-11 08:34:35,096 epoch 10 - iter 3416/4276 - loss 0.00939691 - time (sec): 206.18 - samples/sec: 1079.56 - lr: 0.000001
194
+ 2023-04-11 08:35:01,291 epoch 10 - iter 3843/4276 - loss 0.00881917 - time (sec): 232.38 - samples/sec: 1073.84 - lr: 0.000001
195
+ 2023-04-11 08:35:27,885 epoch 10 - iter 4270/4276 - loss 0.00882288 - time (sec): 258.97 - samples/sec: 1069.59 - lr: 0.000000
196
+ 2023-04-11 08:35:28,233 ----------------------------------------------------------------------------------------------------
197
+ 2023-04-11 08:35:28,234 EPOCH 10 done: loss 0.0088 - lr 0.000000
198
+ 2023-04-11 08:35:36,527 ----------------------------------------------------------------------------------------------------
199
+ 2023-04-11 08:35:36,530 Testing using last state of model ...
200
+ 2023-04-11 08:36:06,557 Evaluating as a multi-label problem: False
201
+ 2023-04-11 08:36:06,627 0.877 0.884 0.8805 0.7929
202
+ 2023-04-11 08:36:06,629
203
+ Results:
204
+ - F-score (micro) 0.8805
205
+ - F-score (macro) 0.8612
206
+ - Accuracy 0.7929
207
+
208
+ By class:
209
+ precision recall f1-score support
210
+
211
+ PROC 0.8581 0.8811 0.8695 3364
212
+ DISO 0.8911 0.8908 0.8910 2472
213
+ CHEM 0.9091 0.9073 0.9082 1565
214
+ ANAT 0.8082 0.7468 0.7763 316
215
+
216
+ micro avg 0.8770 0.8840 0.8805 7717
217
+ macro avg 0.8666 0.8565 0.8612 7717
218
+ weighted avg 0.8770 0.8840 0.8804 7717
219
+
220
+ 2023-04-11 08:36:06,629 ----------------------------------------------------------------------------------------------------
weights.txt ADDED
File without changes