Spaces:
Sleeping
Sleeping
File size: 4,299 Bytes
7ed8d70 8c0b937 7ed8d70 8c0b937 7ed8d70 8c0b937 7ed8d70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from typing import List, Dict, Any
import gradio as gr
import spaces
import torch
import numpy as np
# For the dense embedding
from sentence_transformers import SentenceTransformer
# For SPLADE sparse embedding
from transformers import AutoTokenizer, AutoModelForMaskedLM
# For ColBERT
from transformers import AutoModel, AutoTokenizer
############################
# 1) Load models & tokenizers
############################
# 1A) Dense embedding model (Nomic)
dense_model = SentenceTransformer(
"nomic-ai/nomic-embed-text-v1.5",
trust_remote_code=True,
device="cuda" # Force GPU if available
)
# 1B) SPLADE for sparse embeddings
# Using "naver/splade-cocondenser-ensembledistil" as an example
sparse_tokenizer = AutoTokenizer.from_pretrained("naver/splade-cocondenser-ensembledistil")
sparse_model = AutoModelForMaskedLM.from_pretrained("naver/splade-cocondenser-ensembledistil")
sparse_model.eval()
sparse_model.to("cuda") # move to GPU
# 1C) ColBERT model
colbert_tokenizer = AutoTokenizer.from_pretrained("colbert-ir/colbertv2.0")
colbert_model = AutoModel.from_pretrained("colbert-ir/colbertv2.0")
colbert_model.eval()
colbert_model.to("cuda")
############################
# 2) Helper functions
############################
def get_dense_embedding(text: str) -> List[float]:
"""
Use SentenceTransformer to get a single dense vector.
"""
# model.encode returns a NumPy array of shape (dim,)
emb = dense_model.encode(text)
return emb.tolist() # convert to Python list for JSON serialization
def get_splade_sparse_embedding(text: str) -> List[float]:
"""
Compute a sparse embedding with SPLADE (max pooling over tokens).
Returns a large vector ~ vocabulary size, e.g. 30k+ dims.
"""
inputs = sparse_tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=256
)
inputs = {k: v.to("cuda") for k, v in inputs.items()}
with torch.no_grad():
# shape: [batch=1, seq_len, vocab_size]
logits = sparse_model(**inputs).logits.squeeze(0) # [seq_len, vocab_size]
# SPLADE approach for query-like encoding (max over sequence dimension):
# For doc encoding, one might do sum instead of max; usage can differ.
# We'll do max pooling: log(1 + ReLU(logits)) -> max over seq_len
sparse_emb = torch.log1p(torch.relu(logits)).max(dim=0).values
# Convert to CPU list
return sparse_emb.cpu().numpy().tolist()
def get_colbert_embedding(text: str) -> List[List[float]]:
"""
Generate token-level embeddings via ColBERT.
Returns a list of [token_dim] for each token in the sequence.
"""
inputs = colbert_tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=180
)
inputs = {k: v.to("cuda") for k, v in inputs.items()}
with torch.no_grad():
outputs = colbert_model(**inputs)
# outputs.last_hidden_state: [1, seq_len, hidden_dim]
emb = outputs.last_hidden_state.squeeze(0) # shape: [seq_len, hidden_dim]
# Convert each token embedding to a list
return emb.cpu().numpy().tolist()
############################
# 3) The main embedding function
############################
@spaces.GPU
def embed(document: str) -> Dict[str, Any]:
"""
Single function that returns dense, sparse (SPLADE), and ColBERT embeddings.
Decorated with @spaces.GPU for ephemeral GPU usage in Hugging Face Spaces.
"""
dense_emb = get_dense_embedding(document)
sparse_emb = get_splade_sparse_embedding(document)
colbert_emb = get_colbert_embedding(document)
return {
"dense_embedding": dense_emb,
"sparse_embedding": sparse_emb,
"colbert_embedding": colbert_emb
}
############################
# 4) Gradio App
############################
with gr.Blocks() as app:
gr.Markdown("# Multi-Embedding Generator (Dense, SPLADE, ColBERT)")
text_input = gr.Textbox(label="Enter text to embed")
output = gr.JSON(label="Embeddings")
# On submit, call embed() -> returns JSON
text_input.submit(embed, inputs=text_input, outputs=output)
if __name__ == "__main__":
# queue() is optional but useful for concurrency
app.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860) |