--- license: apache-2.0 base_model: - answerdotai/ModernBERT-large datasets: - codelion/optillm-router-dataset --- # How to use? This model is used in [optillm](https://github.com/codelion/optillm) to route between the various approaches based on the prompt. To use the model with optillm you can just prepend `router` to the model name. E.g. if we set `router-gpt-4o-mini` as the model, it will use the `gpt-4o-mini` as the base model. Otherwise, refer to the code in [router-plugin](https://github.com/codelion/optillm/blob/main/optillm/plugins/router_plugin.py) to see how to use this model for classification. This model is based on `ModernBERT-large`and better than the previous [router model](https://huggingface.co/codelion/optillm-bert-uncased) that was based on `bert-large-uncased`. ### Router results on AIME 2024 pass@1 | Model | Score | |-------|-----:| | router-gpt4o-mini with codelion/optillm-modernbert-large | 13.33 | | router-gpt4o-mini with codelion/optillm-bert-uncased | 6.67 | | gpt4o-mini | 3.33 | # Usage To use the model directly you will need to use our `OptILMClassifier` class as we added additional layers to the base model. The additional `effort_encoder` is used to take into account the number of tokens a given approach consumes. Also, note the mapping of the returned index to the `APPROACHES` list as shown below. ```python import torch import torch.nn as nn import torch.nn.functional as F from transformers import AutoModel, AutoTokenizer, AutoConfig from huggingface_hub import hf_hub_download from safetensors import safe_open from safetensors.torch import load_model from transformers import AutoTokenizer, AutoModel # Constants MAX_LENGTH = 1024 APPROACHES = ["none", "mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", "cot_reflection", "plansearch", "leap", "re2"] BASE_MODEL = "answerdotai/ModernBERT-large" OPTILLM_MODEL_NAME = "codelion/optillm-modernbert-large" class OptILMClassifier(nn.Module): def __init__(self, base_model, num_labels): super().__init__() self.base_model = base_model self.effort_encoder = nn.Sequential( nn.Linear(1, 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU() ) self.classifier = nn.Linear(base_model.config.hidden_size + 64, num_labels) def forward(self, input_ids, attention_mask, effort): outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask) pooled_output = outputs.last_hidden_state[:, 0] # Shape: (batch_size, hidden_size) effort_encoded = self.effort_encoder(effort.unsqueeze(1)) # Shape: (batch_size, 64) combined_input = torch.cat((pooled_output, effort_encoded), dim=1) logits = self.classifier(combined_input) return logits def load_optillm_model(): device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu") # Load the base model base_model = AutoModel.from_pretrained(BASE_MODEL) # Create the OptILMClassifier model = OptILMClassifier(base_model, num_labels=len(APPROACHES)) model.to(device) # Download the safetensors file safetensors_path = hf_hub_download(repo_id=OPTILLM_MODEL_NAME, filename="model.safetensors") # Load the state dict from the safetensors file load_model(model, safetensors_path) tokenizer = AutoTokenizer.from_pretrained(OPTILLM_MODEL_NAME) return model, tokenizer, device def preprocess_input(tokenizer, system_prompt, initial_query): combined_input = f"{system_prompt}\n\nUser: {initial_query}" encoding = tokenizer.encode_plus( combined_input, add_special_tokens=True, max_length=MAX_LENGTH, padding='max_length', truncation=True, return_attention_mask=True, return_tensors='pt' ) return encoding['input_ids'], encoding['attention_mask'] def predict_approach(model, input_ids, attention_mask, device, effort=0.7): model.eval() with torch.no_grad(): input_ids = input_ids.to(device) attention_mask = attention_mask.to(device) effort_tensor = torch.tensor([effort], dtype=torch.float).to(device) logits = model(input_ids, attention_mask=attention_mask, effort=effort_tensor) probabilities = F.softmax(logits, dim=1) predicted_approach_index = torch.argmax(probabilities, dim=1).item() confidence = probabilities[0][predicted_approach_index].item() return APPROACHES[predicted_approach_index], confidence ``` You can now use the `predict_approach` method to get the predicted approach as follows: ```python # Load the trained model router_model, tokenizer, device = load_optillm_model() # Preprocess the input input_ids, attention_mask = preprocess_input(tokenizer, system_prompt, initial_query) # Predict the best approach predicted_approach, _ = predict_approach(router_model, input_ids, attention_mask, device) print(f"Router predicted approach: {predicted_approach}") ```