guymorganb
commited on
Commit
·
58a4d44
1
Parent(s):
a198e93
diminished block size and sparse_block_size to deal with leftovers
Browse files- modeling_lsg_bert.py +4 -4
modeling_lsg_bert.py
CHANGED
@@ -59,15 +59,15 @@ class LSGBertConfig(BertConfig):
|
|
59 |
|
60 |
def __init__(
|
61 |
self,
|
62 |
-
adaptive=True,
|
63 |
base_model_prefix="lsg",
|
64 |
-
block_size=
|
|
|
|
|
|
|
65 |
lsh_num_pre_rounds=1,
|
66 |
mask_first_token=False,
|
67 |
num_global_tokens=1,
|
68 |
pool_with_global=True,
|
69 |
-
sparse_block_size=128,
|
70 |
-
sparsity_factor=2,
|
71 |
sparsity_type="norm",
|
72 |
**kwargs
|
73 |
):
|
|
|
59 |
|
60 |
def __init__(
|
61 |
self,
|
|
|
62 |
base_model_prefix="lsg",
|
63 |
+
block_size=64,
|
64 |
+
sparse_block_size=64,
|
65 |
+
sparsity_factor=2,
|
66 |
+
adaptive=True,
|
67 |
lsh_num_pre_rounds=1,
|
68 |
mask_first_token=False,
|
69 |
num_global_tokens=1,
|
70 |
pool_with_global=True,
|
|
|
|
|
71 |
sparsity_type="norm",
|
72 |
**kwargs
|
73 |
):
|