Spaces:
Sleeping
Sleeping
update
Browse files- app.py +56 -6
- model_finetuned/config.pth +0 -3
app.py
CHANGED
@@ -34,7 +34,7 @@ title = "H2O AI Predict the LLM"
|
|
34 |
|
35 |
description =" The objective of this [competition](https://www.kaggle.com/competitions/h2oai-predict-the-llm) was to \
|
36 |
detect which out of 7 possible LLM models produced a particular response. \n\n\
|
37 |
-
This demo is utilizing finetuned HuggingFaceH4/zephyr-7b-beta model for a multiclass classification task. \
|
38 |
Our team's solution is [here](https://www.kaggle.com/competitions/h2oai-predict-the-llm/discussion/453728)"
|
39 |
|
40 |
title = title + "\n" + description
|
@@ -163,11 +163,61 @@ model = CustomModel()
|
|
163 |
### End Load the model
|
164 |
|
165 |
def do_inference(full_text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
|
173 |
|
@@ -175,7 +225,7 @@ def do_inference(full_text):
|
|
175 |
def do_submit(question, response):
|
176 |
full_text = question + " " + response
|
177 |
result = do_inference(full_text)
|
178 |
-
return
|
179 |
|
180 |
@spaces.GPU
|
181 |
def greet():
|
|
|
34 |
|
35 |
description =" The objective of this [competition](https://www.kaggle.com/competitions/h2oai-predict-the-llm) was to \
|
36 |
detect which out of 7 possible LLM models produced a particular response. \n\n\
|
37 |
+
This demo is utilizing finetuned HuggingFaceH4/zephyr-7b-beta model for a multiclass classification task. \n\n \
|
38 |
Our team's solution is [here](https://www.kaggle.com/competitions/h2oai-predict-the-llm/discussion/453728)"
|
39 |
|
40 |
title = title + "\n" + description
|
|
|
163 |
### End Load the model
|
164 |
|
165 |
def do_inference(full_text):
|
166 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
167 |
+
model_paths = [
|
168 |
+
'model_finetuned/HuggingFaceH4-zephyr-7b-beta_fold0_best.pth']
|
169 |
+
|
170 |
+
# config_path = ("/home/rashmi/Documents/kaggle/h2oai_predict_llm/src/models_exp56/config.pth")
|
171 |
+
|
172 |
+
def prepare_input(cfg, text):
|
173 |
+
inputs = cfg.tokenizer.encode_plus(
|
174 |
+
text,
|
175 |
+
return_tensors=None,
|
176 |
+
add_special_tokens=True,
|
177 |
+
max_length=CFG.max_len,
|
178 |
+
pad_to_max_length=True,
|
179 |
+
truncation="longest_first",
|
180 |
+
)
|
181 |
+
for k, v in inputs.items():
|
182 |
+
inputs[k] = torch.tensor(v, dtype=torch.long)
|
183 |
+
return inputs
|
184 |
+
|
185 |
+
# model = CustomModel()
|
186 |
+
state = torch.load(model_paths[0], map_location=torch.device("cpu"))
|
187 |
+
model.load_state_dict(state["model"]) # ,strict=False)
|
188 |
+
model.eval()
|
189 |
+
model.to(device)
|
190 |
+
|
191 |
+
inputs = prepare_input(CFG, full_text)
|
192 |
+
inputs["input_ids"] = inputs["input_ids"].reshape(1, -1).to(device)
|
193 |
+
inputs["attention_mask"] = inputs["attention_mask"].reshape(1, -1).to(device)
|
194 |
|
195 |
+
with torch.no_grad():
|
196 |
+
with torch.cuda.amp.autocast(
|
197 |
+
enabled=True, dtype=torch.float16, cache_enabled=True
|
198 |
+
):
|
199 |
+
y_preds = model(inputs)
|
200 |
+
y_preds = y_preds.detach().to("cpu").numpy().astype(np.float32)
|
201 |
+
y_preds= torch.softmax(torch.tensor(y_preds), 1).numpy()
|
202 |
+
|
203 |
+
result = np.argmax(y_preds)
|
204 |
+
|
205 |
+
if result == 0:
|
206 |
+
return "0. llama2-70b-chat"
|
207 |
+
elif result == 1:
|
208 |
+
return "1. wizardLM-13b"
|
209 |
+
elif result == 2:
|
210 |
+
return "2. llama2-13b-chat"
|
211 |
+
elif result == 3:
|
212 |
+
return "3. wizardLM-70b"
|
213 |
+
elif result == 4:
|
214 |
+
return "4. llama2-7b-chat"
|
215 |
+
elif result == 5:
|
216 |
+
return "5. tinyllama-1b-chat"
|
217 |
+
elif result == 6:
|
218 |
+
return "6. mistral-7b-openorca"
|
219 |
+
else:
|
220 |
+
return "Error"
|
221 |
|
222 |
|
223 |
|
|
|
225 |
def do_submit(question, response):
|
226 |
full_text = question + " " + response
|
227 |
result = do_inference(full_text)
|
228 |
+
return result
|
229 |
|
230 |
@spaces.GPU
|
231 |
def greet():
|
model_finetuned/config.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7a170d96950730d29ea3f6fdc76b3beb9bc9806126ee0be945cffbc12419d2c9
|
3 |
-
size 3356
|
|
|
|
|
|
|
|