neofung
/

LdIR-Qwen2-reranker-1.5B

       value: 68.83696915006163
     - type: mrr
       value: 79.77644651857584
+---
+## Introduction
+This model is a downstream task of [Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B) .
+We leverage the work of [FlagEmbedding reranker](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/reranker) ,
+and implement with Qwen2-1.5B as pretrained model.
+## Usage
+```python
+from typing import cast, List, Union, Tuple, Dict, Optional
+import numpy as np
+import torch
+from tqdm import tqdm
+import transformers
+from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer, DataCollatorWithPadding
+from transformers.models.qwen2 import Qwen2Config, Qwen2ForSequenceClassification
+from transformers.trainer_pt_utils import LabelSmoother
+IGNORE_TOKEN_ID = LabelSmoother.ignore_index
+def preprocess(
+    sources,
+    tokenizer: transformers.PreTrainedTokenizer,
+    max_len: int = 1024,
+) -> Dict:
+    # Apply prompt templates
+    input_ids, attention_masks = [], []
+    for i, source in enumerate(sources):
+        ## system_message
+        messages = [
+            {"role": "user",
+            "content": "\n\n".join(source)}
+        ]
+        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        model_inputs = tokenizer([text])
+        input_id = model_inputs['input_ids'][0]
+        attention_mask = model_inputs['attention_mask'][0]
+        if len(input_id) > max_len:
+            diff = len(input_id) - max_len
+            input_id = input_id[:-5-diff] + input_id[-5:]
+            attention_mask = attention_mask[:-5-diff] + attention_mask[-5:]
+            assert len(input_id) == max_len
+        input_ids.append(input_id)
+        attention_masks.append(attention_mask)
+    return dict(
+        input_ids=input_ids,
+        attention_mask=attention_masks
+    )
+class FlagRerankerCustom:
+    def __init__(
+            self,
+            model: PreTrainedModel,
+            tokenizer: PreTrainedTokenizer,
+            use_fp16: bool = False
+    ) -> None:
+        self.tokenizer = tokenizer
+        self.model = model
+        self.data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+        if torch.cuda.is_available():
+            self.device = torch.device('cuda')
+        elif torch.backends.mps.is_available():
+            self.device = torch.device('mps')
+        else:
+            self.device = torch.device('cpu')
+            use_fp16 = False
+        if use_fp16:
+            self.model.half()
+        self.model = self.model.to(self.device)
+        self.model.eval()
+        self.num_gpus = torch.cuda.device_count()
+        if self.num_gpus > 1:
+            print(f"----------using {self.num_gpus}*GPUs----------")
+            self.model = torch.nn.DataParallel(self.model)
+    @torch.no_grad()
+    def compute_score(self, sentence_pairs: Union[List[Tuple[str, str]], Tuple[str, str]], batch_size: int = 64,
+                      max_length: int = 1024) -> List[float]:
+        if self.num_gpus > 0:
+            batch_size = batch_size * self.num_gpus
+        assert isinstance(sentence_pairs, list)
+        if isinstance(sentence_pairs[0], str):
+            sentence_pairs = [sentence_pairs]
+        all_scores = []
+        for start_index in tqdm(range(0, len(sentence_pairs), batch_size), desc="Compute Scores",
+                                disable=True):
+            sentences_batch = sentence_pairs[start_index:start_index + batch_size]
+            inputs = preprocess(sources=sentences_batch, tokenizer=self.tokenizer, max_len=max_length)
+            inputs = [dict(zip(inputs, t)) for t in zip(*inputs.values())]
+            inputs = self.data_collator(inputs).to(self.device)
+            scores = self.model(**inputs, return_dict=True).logits
+            scores = scores.squeeze()
+            all_scores.extend(scores.detach().to(torch.float).cpu().numpy().tolist())
+        if len(all_scores) == 1:
+            return all_scores[0]
+        return all_scores
+tokenizer = transformers.AutoTokenizer.from_pretrained(
+    "neofung/LdIR-Qwen2-reranker-1.5B-large",
+    padding_side="right",
+)
+config = Qwen2Config.from_pretrained(
+    "neofung/LdIR-Qwen2-reranker-1.5B-large",
+    trust_remote_code=True,
+    bf16=True,
+)
+model = Qwen2ForSequenceClassification.from_pretrained(
+    "neofung/LdIR-Qwen2-reranker-1.5B-large",
+    config = config,
+    trust_remote_code = True,
+)
+model = FlagRerankerCustom(model=model, tokenizer=tokenizer, use_fp16=False)
+pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']]
+model.compute_score(pairs)
+# [-2.655318021774292, 11.7670316696167]
+```