guymorganb
/

e5-large-v2-4096-lsg-patched

Sentence Similarity

PyTorch

English

bert

custom_code

Model card Files Files and versions Community

guymorganb commited on 17 days ago

Commit

d349c58

1 Parent(s): c6d90f3

updated readme with inference example

Browse files

Files changed (1) hide show

README.md +85 -20

README.md CHANGED Viewed

@@ -12,26 +12,6 @@ A [Local-Sparse-Global (LSG)](https://arxiv.org/abs/2210.15497) version of [intf
 Below is an example to encode queries and passages from the MS-MARCO passage ranking dataset.
-```python
-from sentence_transformers import SentenceTransformer
-model = SentenceTransformer(
-    "guymorganb/e5-large-v2-4096-lsg-patched",
-    {"trust_remote_code": True}
-)
-input_texts = [
-    'query: how much protein should a female eat',
-    'query: summit define',
-    "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
-    "passage: Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments."
-]
-embeddings = model.encode(input_texts, normalize_embeddings=True)
-```
-or...
 ```python
 import torch
 import torch.nn.functional as F
@@ -92,6 +72,91 @@ embeddings = F.normalize(embeddings, p=2, dim=1)
 # 8) Example similarity: compare first two (queries) vs. last two (passages)
 scores = (embeddings[:2] @ embeddings[2:].T) * 100
 print("Similarity scores:\n", scores.tolist())
 ```
 @article{wang2022text,

 Below is an example to encode queries and passages from the MS-MARCO passage ranking dataset.
 ```python
 import torch
 import torch.nn.functional as F
 # 8) Example similarity: compare first two (queries) vs. last two (passages)
 scores = (embeddings[:2] @ embeddings[2:].T) * 100
 print("Similarity scores:\n", scores.tolist())
+```
+or...test for inference
+```python
+# Modified test script
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoConfig, AutoModel
+import time
+# Keep your average_pool function the same
+model_name = "guymorganb/e5-large-v2-4096-lsg-patched"
+# Load with explicit LSG settings
+config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
+config.is_decoder = False
+config.block_size = 4096          # Double the block size #############
+config.sparse_block_size = 4096   # Keep equal to block_size ##############
+config.sparsity_factor = 2
+config.sparsity_type = "norm"
+config.adaptive = True
+config.num_global_tokens = 1
+config.pool_with_global = True
+print("Config after loading:")
+for k, v in config.to_dict().items():
+    print(f"{k}: {v}")
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModel.from_pretrained(
+    model_name,
+    config=config,
+    trust_remote_code=True
+)
+model.eval()
+# Test with gradually increasing lengths
+test_lengths = [
+    10,     # Very short
+    64,     #
+    128,    #
+    256,    #
+    512,    #
+    1024,   #
+    2048,   #
+    3072,   #
+    4096    # Full context
+]
+for length in test_lengths:
+    test_text = f"passage: {'test ' * length }"
+    try:
+        encoding = tokenizer(
+            test_text,
+            max_length=4096,
+            padding=True,
+            # pad_to_multiple_of=4096,  # dont use unless you want a fixed size
+            truncation=True,
+            return_tensors='pt'
+        )
+        actual_length = encoding['input_ids'].size(1)
+        print(f"\nTesting length {actual_length} tokens:")
+        print(f"Input tensor shape: {encoding['input_ids'].shape}")
+        start = time.time()
+        with torch.no_grad():
+            encoding["attention_mask"] = encoding["attention_mask"].float()
+            outputs = model(**encoding, return_dict=True)
+            embeddings = average_pool(outputs.last_hidden_state, encoding["attention_mask"])
+            embeddings = F.normalize(embeddings, p=2, dim=1)
+        end = time.time()
+        print(f"Success! Processing time: {end - start:.3f} seconds")
+        print(f"Embedding shape: {embeddings.shape}")
+    except RuntimeError as e:
+        print(f"Failed at length {actual_length}")
+        print(f"Error: {str(e)}")
+        print(f"Last successful shape: {encoding['input_ids'].shape}")
+        break
+```
 ```
 @article{wang2022text,