config
Browse files- config.json +7 -7
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_dropout": 0.1,
|
4 |
"activation_function": "gelu",
|
5 |
"adaptive": true,
|
@@ -18,7 +18,7 @@
|
|
18 |
"AutoModelForSequenceClassification": "modeling_lsg_bart.LSGBartForSequenceClassification"
|
19 |
},
|
20 |
"base_model_prefix": "lsg",
|
21 |
-
"block_size":
|
22 |
"bos_token_id": 0,
|
23 |
"classif_dropout": 0.1,
|
24 |
"classifier_dropout": 0.0,
|
@@ -54,19 +54,19 @@
|
|
54 |
"mask_first_token": false,
|
55 |
"max_position_embeddings": 16384,
|
56 |
"model_type": "bart",
|
57 |
-
"no_repeat_ngram_size":
|
58 |
"normalize_before": false,
|
59 |
"normalize_embedding": true,
|
60 |
-
"num_beams":
|
61 |
"num_global_tokens": 1,
|
62 |
"num_hidden_layers": 6,
|
63 |
"pad_token_id": 1,
|
64 |
"pass_global_tokens_to_decoder": true,
|
65 |
"pool_with_global": true,
|
66 |
"scale_embedding": false,
|
67 |
-
"sparse_block_size":
|
68 |
-
"sparsity_factor":
|
69 |
-
"sparsity_type": "
|
70 |
"task_specific_params": {
|
71 |
"summarization": {
|
72 |
"length_penalty": 1.0,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "ccdv/lsg-bart-base-16384",
|
3 |
"activation_dropout": 0.1,
|
4 |
"activation_function": "gelu",
|
5 |
"adaptive": true,
|
|
|
18 |
"AutoModelForSequenceClassification": "modeling_lsg_bart.LSGBartForSequenceClassification"
|
19 |
},
|
20 |
"base_model_prefix": "lsg",
|
21 |
+
"block_size": 256,
|
22 |
"bos_token_id": 0,
|
23 |
"classif_dropout": 0.1,
|
24 |
"classifier_dropout": 0.0,
|
|
|
54 |
"mask_first_token": false,
|
55 |
"max_position_embeddings": 16384,
|
56 |
"model_type": "bart",
|
57 |
+
"no_repeat_ngram_size": null,
|
58 |
"normalize_before": false,
|
59 |
"normalize_embedding": true,
|
60 |
+
"num_beams": 5,
|
61 |
"num_global_tokens": 1,
|
62 |
"num_hidden_layers": 6,
|
63 |
"pad_token_id": 1,
|
64 |
"pass_global_tokens_to_decoder": true,
|
65 |
"pool_with_global": true,
|
66 |
"scale_embedding": false,
|
67 |
+
"sparse_block_size": 0,
|
68 |
+
"sparsity_factor": 4,
|
69 |
+
"sparsity_type": "none",
|
70 |
"task_specific_params": {
|
71 |
"summarization": {
|
72 |
"length_penalty": 1.0,
|