mtesmer-iqnox commited on
Commit
7ed8d70
·
1 Parent(s): 8c0b937
Files changed (1) hide show
  1. app.py +136 -4
app.py CHANGED
@@ -1,7 +1,139 @@
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
1
+ from typing import List, Dict, Any
2
+
3
  import gradio as gr
4
+ import spaces
5
+ import torch
6
+ import numpy as np
7
+
8
+ # For the dense embedding
9
+ from sentence_transformers import SentenceTransformer
10
+
11
+ # For SPLADE sparse embedding
12
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
13
+
14
+ # For ColBERT
15
+ from transformers import AutoModel, AutoTokenizer
16
+
17
+
18
+ ############################
19
+ # 1) Load models & tokenizers
20
+ ############################
21
+
22
+ # 1A) Dense embedding model (Nomic)
23
+ dense_model = SentenceTransformer(
24
+ "nomic-ai/nomic-embed-text-v1.5",
25
+ trust_remote_code=True,
26
+ device="cuda" # Force GPU if available
27
+ )
28
+
29
+ # 1B) SPLADE for sparse embeddings
30
+ # Using "naver/splade-cocondenser-ensembledistil" as an example
31
+ sparse_tokenizer = AutoTokenizer.from_pretrained("naver/splade-cocondenser-ensembledistil")
32
+ sparse_model = AutoModelForMaskedLM.from_pretrained("naver/splade-cocondenser-ensembledistil")
33
+ sparse_model.eval()
34
+ sparse_model.to("cuda") # move to GPU
35
+
36
+ # 1C) ColBERT model
37
+ colbert_tokenizer = AutoTokenizer.from_pretrained("colbert-ir/colbertv2.0")
38
+ colbert_model = AutoModel.from_pretrained("colbert-ir/colbertv2.0")
39
+ colbert_model.eval()
40
+ colbert_model.to("cuda")
41
+
42
+
43
+ ############################
44
+ # 2) Helper functions
45
+ ############################
46
+
47
+ def get_dense_embedding(text: str) -> List[float]:
48
+ """
49
+ Use SentenceTransformer to get a single dense vector.
50
+ """
51
+ # model.encode returns a NumPy array of shape (dim,)
52
+ emb = dense_model.encode(text)
53
+ return emb.tolist() # convert to Python list for JSON serialization
54
+
55
+
56
+ def get_splade_sparse_embedding(text: str) -> List[float]:
57
+ """
58
+ Compute a sparse embedding with SPLADE (max pooling over tokens).
59
+ Returns a large vector ~ vocabulary size, e.g. 30k+ dims.
60
+ """
61
+ inputs = sparse_tokenizer(
62
+ text,
63
+ return_tensors="pt",
64
+ truncation=True,
65
+ max_length=256
66
+ )
67
+ inputs = {k: v.to("cuda") for k, v in inputs.items()}
68
+
69
+ with torch.no_grad():
70
+ # shape: [batch=1, seq_len, vocab_size]
71
+ logits = sparse_model(**inputs).logits.squeeze(0) # [seq_len, vocab_size]
72
+
73
+ # SPLADE approach for query-like encoding (max over sequence dimension):
74
+ # For doc encoding, one might do sum instead of max; usage can differ.
75
+ # We'll do max pooling: log(1 + ReLU(logits)) -> max over seq_len
76
+ sparse_emb = torch.log1p(torch.relu(logits)).max(dim=0).values
77
+ # Convert to CPU list
78
+ return sparse_emb.cpu().numpy().tolist()
79
+
80
+
81
+ def get_colbert_embedding(text: str) -> List[List[float]]:
82
+ """
83
+ Generate token-level embeddings via ColBERT.
84
+ Returns a list of [token_dim] for each token in the sequence.
85
+ """
86
+ inputs = colbert_tokenizer(
87
+ text,
88
+ return_tensors="pt",
89
+ truncation=True,
90
+ max_length=180
91
+ )
92
+ inputs = {k: v.to("cuda") for k, v in inputs.items()}
93
+
94
+ with torch.no_grad():
95
+ outputs = colbert_model(**inputs)
96
+ # outputs.last_hidden_state: [1, seq_len, hidden_dim]
97
+ emb = outputs.last_hidden_state.squeeze(0) # shape: [seq_len, hidden_dim]
98
+
99
+ # Convert each token embedding to a list
100
+ return emb.cpu().numpy().tolist()
101
+
102
+
103
+ ############################
104
+ # 3) The main embedding function
105
+ ############################
106
+
107
+ @spaces.GPU
108
+ def embed(document: str) -> Dict[str, Any]:
109
+ """
110
+ Single function that returns dense, sparse (SPLADE), and ColBERT embeddings.
111
+ Decorated with @spaces.GPU for ephemeral GPU usage in Hugging Face Spaces.
112
+ """
113
+ dense_emb = get_dense_embedding(document)
114
+ sparse_emb = get_splade_sparse_embedding(document)
115
+ colbert_emb = get_colbert_embedding(document)
116
+
117
+ return {
118
+ "dense_embedding": dense_emb,
119
+ "sparse_embedding": sparse_emb,
120
+ "colbert_embedding": colbert_emb
121
+ }
122
+
123
+
124
+ ############################
125
+ # 4) Gradio App
126
+ ############################
127
+
128
+ with gr.Blocks() as app:
129
+ gr.Markdown("# Multi-Embedding Generator (Dense, SPLADE, ColBERT)")
130
+
131
+ text_input = gr.Textbox(label="Enter text to embed")
132
+ output = gr.JSON(label="Embeddings")
133
 
134
+ # On submit, call embed() -> returns JSON
135
+ text_input.submit(embed, inputs=text_input, outputs=output)
136
 
137
+ if __name__ == "__main__":
138
+ # queue() is optional but useful for concurrency
139
+ app.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)