guymorganb
/

e5-large-v2-4096-lsg-patched

Sentence Similarity

PyTorch

English

bert

custom_code

Model card Files Files and versions Community

guymorganb commited on 11 days ago

Commit

9e948ba

1 Parent(s): 143f3be

messing around with differing settings.

Browse files

Files changed (2) hide show

config.json +7 -6
modeling_lsg_bert.py +17 -15

config.json CHANGED Viewed

@@ -16,8 +16,14 @@
     "AutoModelForSequenceClassification": "modeling_lsg_bert.LSGBertForSequenceClassification",
     "AutoModelForTokenClassification": "modeling_lsg_bert.LSGBertForTokenClassification"
   },
-  "base_model_prefix": "lsg",
   "block_size": 128,
   "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
@@ -30,14 +36,9 @@
   "max_position_embeddings": 4096,
   "model_type": "bert",
   "num_attention_heads": 16,
-  "num_global_tokens": 1,
   "num_hidden_layers": 24,
   "pad_token_id": 0,
-  "pool_with_global": true,
   "position_embedding_type": "absolute",
-  "sparse_block_size": 128,
-  "sparsity_factor": 2,
-  "sparsity_type": "norm",
   "torch_dtype": "float32",
   "transformers_version": "4.30.2",
   "type_vocab_size": 2,

     "AutoModelForSequenceClassification": "modeling_lsg_bert.LSGBertForSequenceClassification",
     "AutoModelForTokenClassification": "modeling_lsg_bert.LSGBertForTokenClassification"
   },
   "block_size": 128,
+  "sparse_block_size": 128,
+  "sparsity_factor": 2,
+  "base_model_prefix": "lsg",
+  "sparsity_type": "norm",
+  "is_decoder": false,
+  "pool_with_global": true,
+  "num_global_tokens": 1,
   "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "max_position_embeddings": 4096,
   "model_type": "bert",
   "num_attention_heads": 16,
   "num_hidden_layers": 24,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.30.2",
   "type_vocab_size": 2,

modeling_lsg_bert.py CHANGED Viewed

@@ -59,16 +59,17 @@ class LSGBertConfig(BertConfig):
     def __init__(
         self,
-        adaptive=False,
         base_model_prefix="lsg",
-        block_size=0,
         lsh_num_pre_rounds=1,
-        sparse_block_size=0,
         mask_first_token=False,
-        num_global_tokens=0,
-        pool_with_global=False,
-        sparsity_factor=1,
-        sparsity_type="non3",
         **kwargs
         ):
         """Constructs LSGBertConfig."""
@@ -85,6 +86,7 @@ class LSGBertConfig(BertConfig):
         self.sparse_block_size = sparse_block_size
         self.sparsity_factor = sparsity_factor
         self.sparsity_type = sparsity_type
         if sparsity_type not in [None, "none", "norm", "lsh", "pooling", "stride", "block_stride"]:
             logger.warning(
@@ -98,20 +100,20 @@ class LSGBertConfig(BertConfig):
                 "[WARNING CONFIG]: sparsity_factor > encoder_attention_heads is not recommended for stride/block_stride sparsity"
             )
-        # if self.num_global_tokens < 1:
-        #     logger.warning(
-        #         "[WARNING CONFIG]: num_global_tokens < 1 is not compatible, setting num_global_tokens=1"
-        #     )
-        #     self.num_global_tokens = 1
         elif self.num_global_tokens > 512:
             logger.warning(
                 "[WARNING CONFIG]: num_global_tokens > 512 is not allowed, setting num_global_tokens=512"
             )
             self.num_global_tokens = 512
-        # if self.sparsity_factor > 0:
-        #     assert self.block_size % self.sparsity_factor == 0, "[ERROR CONFIG]: block_size must be divisible by sparsity_factor"
-        #     assert self.block_size//self.sparsity_factor >= 1, "[ERROR CONFIG]: make sure block_size >= sparsity_factor"
         if self.mask_first_token and not pool_with_global:
             logger.warning(

     def __init__(
         self,
+        adaptive=True,
+        is_decoder = False,
         base_model_prefix="lsg",
+        block_size=128,
         lsh_num_pre_rounds=1,
         mask_first_token=False,
+        num_global_tokens=1,
+        pool_with_global=True,
+        sparse_block_size=128,
+        sparsity_factor=2,
+        sparsity_type="norm",
         **kwargs
         ):
         """Constructs LSGBertConfig."""
         self.sparse_block_size = sparse_block_size
         self.sparsity_factor = sparsity_factor
         self.sparsity_type = sparsity_type
+        self.is_decoder = is_decoder
         if sparsity_type not in [None, "none", "norm", "lsh", "pooling", "stride", "block_stride"]:
             logger.warning(
                 "[WARNING CONFIG]: sparsity_factor > encoder_attention_heads is not recommended for stride/block_stride sparsity"
             )
+        if self.num_global_tokens < 1:
+            logger.warning(
+                "[WARNING CONFIG]: num_global_tokens < 1 is not compatible, setting num_global_tokens=1"
+            )
+            self.num_global_tokens = 1
         elif self.num_global_tokens > 512:
             logger.warning(
                 "[WARNING CONFIG]: num_global_tokens > 512 is not allowed, setting num_global_tokens=512"
             )
             self.num_global_tokens = 512
+        if self.sparsity_factor > 0:
+            assert self.block_size % self.sparsity_factor == 0, "[ERROR CONFIG]: block_size must be divisible by sparsity_factor"
+            assert self.block_size//self.sparsity_factor >= 1, "[ERROR CONFIG]: make sure block_size >= sparsity_factor"
         if self.mask_first_token and not pool_with_global:
             logger.warning(