svercoutere commited on
Commit
ac43a81
1 Parent(s): 7219c5d

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ textcat_multilabel/model filter=lfs diff=lfs merge=lfs -text
37
+ transformer/model filter=lfs diff=lfs merge=lfs -text
config.cfg ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "/content/spacy_textcat_train.spacy"
3
+ dev = "/content/spacy_textcat_test.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = "pytorch"
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "nl"
13
+ pipeline = ["transformer","textcat_multilabel"]
14
+ batch_size = 128
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
+
22
+ [components]
23
+
24
+ [components.textcat_multilabel]
25
+ factory = "textcat_multilabel"
26
+ scorer = {"@scorers":"spacy.textcat_multilabel_scorer.v2"}
27
+ threshold = 0.5
28
+
29
+ [components.textcat_multilabel.model]
30
+ @architectures = "spacy.TextCatEnsemble.v2"
31
+ nO = null
32
+
33
+ [components.textcat_multilabel.model.linear_model]
34
+ @architectures = "spacy.TextCatBOW.v3"
35
+ exclusive_classes = false
36
+ length = 262144
37
+ ngram_size = 1
38
+ no_output_layer = false
39
+ nO = null
40
+
41
+ [components.textcat_multilabel.model.tok2vec]
42
+ @architectures = "spacy-transformers.TransformerListener.v1"
43
+ grad_factor = 1.0
44
+ pooling = {"@layers":"reduce_mean.v1"}
45
+ upstream = "*"
46
+
47
+ [components.transformer]
48
+ factory = "transformer"
49
+ max_batch_items = 4096
50
+ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
51
+
52
+ [components.transformer.model]
53
+ @architectures = "spacy-transformers.TransformerModel.v3"
54
+ name = "svercoutere/robbert-2023-dutch-base-abb"
55
+ mixed_precision = false
56
+
57
+ [components.transformer.model.get_spans]
58
+ @span_getters = "spacy-transformers.strided_spans.v1"
59
+ window = 128
60
+ stride = 96
61
+
62
+ [components.transformer.model.grad_scaler_config]
63
+
64
+ [components.transformer.model.tokenizer_config]
65
+ use_fast = true
66
+
67
+ [components.transformer.model.transformer_config]
68
+
69
+ [corpora]
70
+
71
+ [corpora.dev]
72
+ @readers = "spacy.Corpus.v1"
73
+ path = ${paths.dev}
74
+ max_length = 0
75
+ gold_preproc = false
76
+ limit = 0
77
+ augmenter = null
78
+
79
+ [corpora.train]
80
+ @readers = "spacy.Corpus.v1"
81
+ path = ${paths.train}
82
+ max_length = 0
83
+ gold_preproc = false
84
+ limit = 0
85
+ augmenter = null
86
+
87
+ [training]
88
+ accumulate_gradient = 3
89
+ dev_corpus = "corpora.dev"
90
+ train_corpus = "corpora.train"
91
+ seed = ${system.seed}
92
+ gpu_allocator = ${system.gpu_allocator}
93
+ dropout = 0.1
94
+ patience = 1600
95
+ max_epochs = 0
96
+ max_steps = 20000
97
+ eval_frequency = 200
98
+ frozen_components = []
99
+ annotating_components = []
100
+ before_to_disk = null
101
+ before_update = null
102
+
103
+ [training.batcher]
104
+ @batchers = "spacy.batch_by_padded.v1"
105
+ discard_oversize = true
106
+ size = 2000
107
+ buffer = 256
108
+ get_length = null
109
+
110
+ [training.logger]
111
+ @loggers = "spacy.ConsoleLogger.v1"
112
+ progress_bar = false
113
+
114
+ [training.optimizer]
115
+ @optimizers = "Adam.v1"
116
+ beta1 = 0.9
117
+ beta2 = 0.999
118
+ L2_is_weight_decay = true
119
+ L2 = 0.01
120
+ grad_clip = 1.0
121
+ use_averages = false
122
+ eps = 0.00000001
123
+
124
+ [training.optimizer.learn_rate]
125
+ @schedules = "warmup_linear.v1"
126
+ warmup_steps = 250
127
+ total_steps = 20000
128
+ initial_rate = 0.00005
129
+
130
+ [training.score_weights]
131
+ cats_score = 1.0
132
+ cats_score_desc = null
133
+ cats_micro_p = null
134
+ cats_micro_r = null
135
+ cats_micro_f = null
136
+ cats_macro_p = null
137
+ cats_macro_r = null
138
+ cats_macro_f = null
139
+ cats_macro_auc = null
140
+ cats_f_per_type = null
141
+
142
+ [pretraining]
143
+
144
+ [initialize]
145
+ vectors = ${paths.vectors}
146
+ init_tok2vec = ${paths.init_tok2vec}
147
+ vocab_data = null
148
+ lookups = null
149
+ before_init = null
150
+ after_init = null
151
+
152
+ [initialize.components]
153
+
154
+ [initialize.tokenizer]
meta.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"nl",
3
+ "name":"pipeline",
4
+ "version":"0.0.0",
5
+ "spacy_version":">=3.7.5,<3.8.0",
6
+ "description":"",
7
+ "author":"",
8
+ "email":"",
9
+ "url":"",
10
+ "license":"",
11
+ "spacy_git_version":"a6d0fc360",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null,
17
+ "mode":"default"
18
+ },
19
+ "labels":{
20
+ "transformer":[
21
+
22
+ ],
23
+ "textcat_multilabel":[
24
+ "burgerzaken",
25
+ "stadsbestuur",
26
+ "cultuur, sport en vrije tijd",
27
+ "mobiliteit en openbare werken",
28
+ "groen en milieu",
29
+ "onderwijs en kinderopvang",
30
+ "samenleven, welzijn en gezondheid",
31
+ "werken en ondernemen",
32
+ "wonen en (ver)bouwen"
33
+ ]
34
+ },
35
+ "pipeline":[
36
+ "transformer",
37
+ "textcat_multilabel"
38
+ ],
39
+ "components":[
40
+ "transformer",
41
+ "textcat_multilabel"
42
+ ],
43
+ "disabled":[
44
+
45
+ ],
46
+ "performance":{
47
+ "cats_score":0.9719654552,
48
+ "cats_score_desc":"macro AUC",
49
+ "cats_micro_p":0.8677536232,
50
+ "cats_micro_r":0.8607367475,
51
+ "cats_micro_f":0.8642309427,
52
+ "cats_macro_p":0.7953757735,
53
+ "cats_macro_r":0.8020388718,
54
+ "cats_macro_f":0.7967031577,
55
+ "cats_macro_auc":0.9719654552,
56
+ "cats_f_per_type":{
57
+ "burgerzaken":{
58
+ "p":0.8823529412,
59
+ "r":0.8823529412,
60
+ "f":0.8823529412
61
+ },
62
+ "stadsbestuur":{
63
+ "p":0.936,
64
+ "r":0.8780487805,
65
+ "f":0.9060987415
66
+ },
67
+ "cultuur, sport en vrije tijd":{
68
+ "p":0.7671232877,
69
+ "r":0.8549618321,
70
+ "f":0.8086642599
71
+ },
72
+ "mobiliteit en openbare werken":{
73
+ "p":0.8923076923,
74
+ "r":0.8529411765,
75
+ "f":0.8721804511
76
+ },
77
+ "groen en milieu":{
78
+ "p":0.6481481481,
79
+ "r":0.7954545455,
80
+ "f":0.7142857143
81
+ },
82
+ "onderwijs en kinderopvang":{
83
+ "p":0.8947368421,
84
+ "r":0.8717948718,
85
+ "f":0.8831168831
86
+ },
87
+ "samenleven, welzijn en gezondheid":{
88
+ "p":0.6511627907,
89
+ "r":0.6666666667,
90
+ "f":0.6588235294
91
+ },
92
+ "werken en ondernemen":{
93
+ "p":0.6153846154,
94
+ "r":0.5,
95
+ "f":0.5517241379
96
+ },
97
+ "wonen en (ver)bouwen":{
98
+ "p":0.8711656442,
99
+ "r":0.9161290323,
100
+ "f":0.893081761
101
+ }
102
+ },
103
+ "transformer_loss":10.075634643,
104
+ "textcat_multilabel_loss":1.4619364219
105
+ }
106
+ }
textcat_multilabel/cfg ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "burgerzaken",
4
+ "stadsbestuur",
5
+ "cultuur, sport en vrije tijd",
6
+ "mobiliteit en openbare werken",
7
+ "groen en milieu",
8
+ "onderwijs en kinderopvang",
9
+ "samenleven, welzijn en gezondheid",
10
+ "werken en ondernemen",
11
+ "wonen en (ver)bouwen"
12
+ ],
13
+ "threshold":0.5
14
+ }
textcat_multilabel/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c150416d1a00c98f54bb4cb661e4dbc9aae4102d5d058d81b7678f2a24e8c001
3
+ size 16564411
tokenizer ADDED
The diff for this file is too large to render. See raw diff
 
transformer/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "max_batch_items":4096
3
+ }
transformer/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed2d953e347826f578226f456e8697e263d84f0fa889425ce3dfe19a6cbd5f5
3
+ size 501386812
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }