ccdv commited on
Commit
de6c06a
·
1 Parent(s): 003a42f
Files changed (1) hide show
  1. config.json +7 -7
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "bart_test",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "adaptive": true,
@@ -18,7 +18,7 @@
18
  "AutoModelForSequenceClassification": "modeling_lsg_bart.LSGBartForSequenceClassification"
19
  },
20
  "base_model_prefix": "lsg",
21
- "block_size": 128,
22
  "bos_token_id": 0,
23
  "classif_dropout": 0.1,
24
  "classifier_dropout": 0.0,
@@ -54,19 +54,19 @@
54
  "mask_first_token": false,
55
  "max_position_embeddings": 16384,
56
  "model_type": "bart",
57
- "no_repeat_ngram_size": 3,
58
  "normalize_before": false,
59
  "normalize_embedding": true,
60
- "num_beams": 4,
61
  "num_global_tokens": 1,
62
  "num_hidden_layers": 6,
63
  "pad_token_id": 1,
64
  "pass_global_tokens_to_decoder": true,
65
  "pool_with_global": true,
66
  "scale_embedding": false,
67
- "sparse_block_size": 128,
68
- "sparsity_factor": 2,
69
- "sparsity_type": "norm",
70
  "task_specific_params": {
71
  "summarization": {
72
  "length_penalty": 1.0,
 
1
  {
2
+ "_name_or_path": "ccdv/lsg-bart-base-16384",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "adaptive": true,
 
18
  "AutoModelForSequenceClassification": "modeling_lsg_bart.LSGBartForSequenceClassification"
19
  },
20
  "base_model_prefix": "lsg",
21
+ "block_size": 256,
22
  "bos_token_id": 0,
23
  "classif_dropout": 0.1,
24
  "classifier_dropout": 0.0,
 
54
  "mask_first_token": false,
55
  "max_position_embeddings": 16384,
56
  "model_type": "bart",
57
+ "no_repeat_ngram_size": null,
58
  "normalize_before": false,
59
  "normalize_embedding": true,
60
+ "num_beams": 5,
61
  "num_global_tokens": 1,
62
  "num_hidden_layers": 6,
63
  "pad_token_id": 1,
64
  "pass_global_tokens_to_decoder": true,
65
  "pool_with_global": true,
66
  "scale_embedding": false,
67
+ "sparse_block_size": 0,
68
+ "sparsity_factor": 4,
69
+ "sparsity_type": "none",
70
  "task_specific_params": {
71
  "summarization": {
72
  "length_penalty": 1.0,