Commit
·
3916521
1
Parent(s):
70b37c4
init commit
Browse files- .gitignore +5 -0
- 00-poe-generate-mistral-reasoning.ipynb +1550 -0
- 01-poe-dataset-creation.ipynb +1421 -0
- 02-poe-token-count-exploration.ipynb +435 -0
- 03-poe-eval-sg.ipynb +1981 -0
- autotrain/autotrain.sh +28 -0
- autotrain/autotrain.yml +34 -0
- autotrain/run_autotrain.py +67 -0
- requirements.txt +11 -0
- utilities/clean_up_spaces.ipynb +180 -0
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.ipynb_checkpoints
|
2 |
+
autotrain/.ipynb_checkpoints
|
3 |
+
.idea
|
4 |
+
autotrain/autotrain_configs/*
|
5 |
+
legacy
|
00-poe-generate-mistral-reasoning.ipynb
ADDED
@@ -0,0 +1,1550 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "a51ae982-cad6-4d26-a18a-83472879325b",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# Setup"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "markdown",
|
13 |
+
"id": "538e8d9b-74c8-4684-80be-623778bdaa90",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"## Config"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"cell_type": "code",
|
21 |
+
"execution_count": 1,
|
22 |
+
"id": "cf368153-eb55-42f4-887f-889c4e9d7c60",
|
23 |
+
"metadata": {},
|
24 |
+
"outputs": [],
|
25 |
+
"source": [
|
26 |
+
"INPUT_DATASET = 'layoric/labeled-multiple-choice-explained'\n",
|
27 |
+
"REVISION = '536f3b8'\n",
|
28 |
+
"OUTPUT_DATASET = 'derek-thomas/labeled-multiple-choice-explained-mistral-reasoning'\n",
|
29 |
+
"NAMESPACE = 'HF-test-lab' # Use your username or wherever you have the ability to deploy inference endpoints"
|
30 |
+
]
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cell_type": "code",
|
34 |
+
"execution_count": 2,
|
35 |
+
"id": "c5a12bab-97bf-4a1d-989b-89f50a7fc272",
|
36 |
+
"metadata": {},
|
37 |
+
"outputs": [
|
38 |
+
{
|
39 |
+
"data": {
|
40 |
+
"application/vnd.jupyter.widget-view+json": {
|
41 |
+
"model_id": "aed670be03f741699118c33316dce7ec",
|
42 |
+
"version_major": 2,
|
43 |
+
"version_minor": 0
|
44 |
+
},
|
45 |
+
"text/plain": [
|
46 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"metadata": {},
|
50 |
+
"output_type": "display_data"
|
51 |
+
}
|
52 |
+
],
|
53 |
+
"source": [
|
54 |
+
"from transformers import AutoTokenizer\n",
|
55 |
+
"from huggingface_hub import get_token, login\n",
|
56 |
+
"\n",
|
57 |
+
"login()"
|
58 |
+
]
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"cell_type": "code",
|
62 |
+
"execution_count": 3,
|
63 |
+
"id": "27c57c67-5574-4412-bde4-fae218e0f87f",
|
64 |
+
"metadata": {},
|
65 |
+
"outputs": [],
|
66 |
+
"source": [
|
67 |
+
"BASE_MODEL = 'mistralai/Mistral-7B-Instruct-v0.3'\n",
|
68 |
+
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=get_token())"
|
69 |
+
]
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"cell_type": "markdown",
|
73 |
+
"id": "ef2d177e-3651-4747-be3c-648be3a9b5ff",
|
74 |
+
"metadata": {},
|
75 |
+
"source": [
|
76 |
+
"# Mistral Prompt Creation"
|
77 |
+
]
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"cell_type": "code",
|
81 |
+
"execution_count": 4,
|
82 |
+
"id": "447d6673-8bf5-4f56-aed5-d71bf47ac63f",
|
83 |
+
"metadata": {},
|
84 |
+
"outputs": [],
|
85 |
+
"source": [
|
86 |
+
"import pandas as pd\n",
|
87 |
+
"from datasets import load_dataset\n",
|
88 |
+
"import json"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"cell_type": "markdown",
|
93 |
+
"id": "08f3bf3d-cedf-4157-aba8-e1dc6d6edd08",
|
94 |
+
"metadata": {},
|
95 |
+
"source": [
|
96 |
+
"## Load and Cleanup Dataset"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": 5,
|
102 |
+
"id": "bff8c5d2-9a80-4159-ae09-05ce70a53643",
|
103 |
+
"metadata": {},
|
104 |
+
"outputs": [],
|
105 |
+
"source": [
|
106 |
+
"# Load dataset from Hugging Face Hub\n",
|
107 |
+
"dataset = load_dataset(INPUT_DATASET, split='train', revision=REVISION)\n",
|
108 |
+
"\n",
|
109 |
+
"# Convert to pandas dataframe\n",
|
110 |
+
"df = dataset.to_pandas()"
|
111 |
+
]
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"cell_type": "code",
|
115 |
+
"execution_count": 6,
|
116 |
+
"id": "3119c978-9e2e-45f3-8389-d61d6733dbdb",
|
117 |
+
"metadata": {},
|
118 |
+
"outputs": [
|
119 |
+
{
|
120 |
+
"data": {
|
121 |
+
"text/html": [
|
122 |
+
"<div>\n",
|
123 |
+
"<style scoped>\n",
|
124 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
125 |
+
" vertical-align: middle;\n",
|
126 |
+
" }\n",
|
127 |
+
"\n",
|
128 |
+
" .dataframe tbody tr th {\n",
|
129 |
+
" vertical-align: top;\n",
|
130 |
+
" }\n",
|
131 |
+
"\n",
|
132 |
+
" .dataframe thead th {\n",
|
133 |
+
" text-align: right;\n",
|
134 |
+
" }\n",
|
135 |
+
"</style>\n",
|
136 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
137 |
+
" <thead>\n",
|
138 |
+
" <tr style=\"text-align: right;\">\n",
|
139 |
+
" <th></th>\n",
|
140 |
+
" <th>formatted_question</th>\n",
|
141 |
+
" <th>combinedfact</th>\n",
|
142 |
+
" <th>answerKey</th>\n",
|
143 |
+
" <th>topic</th>\n",
|
144 |
+
" <th>__index_level_0__</th>\n",
|
145 |
+
" <th>explanation</th>\n",
|
146 |
+
" </tr>\n",
|
147 |
+
" </thead>\n",
|
148 |
+
" <tbody>\n",
|
149 |
+
" <tr>\n",
|
150 |
+
" <th>0</th>\n",
|
151 |
+
" <td>what is satellite technology used for predicti...</td>\n",
|
152 |
+
" <td>satellite technology is used for predicting wh...</td>\n",
|
153 |
+
" <td>c</td>\n",
|
154 |
+
" <td>technology</td>\n",
|
155 |
+
" <td>35972.0</td>\n",
|
156 |
+
" <td>a) Seconds and minutes: This option is incorre...</td>\n",
|
157 |
+
" </tr>\n",
|
158 |
+
" <tr>\n",
|
159 |
+
" <th>1</th>\n",
|
160 |
+
" <td>what does irradiating food do? (a) relieve pai...</td>\n",
|
161 |
+
" <td>irradiated food improves food safety.</td>\n",
|
162 |
+
" <td>c</td>\n",
|
163 |
+
" <td>food science</td>\n",
|
164 |
+
" <td>20412.0</td>\n",
|
165 |
+
" <td>(a) Relieve pain: This option is not correct b...</td>\n",
|
166 |
+
" </tr>\n",
|
167 |
+
" <tr>\n",
|
168 |
+
" <th>2</th>\n",
|
169 |
+
" <td>what protects a mammal's skin? (a) fiber folli...</td>\n",
|
170 |
+
" <td>fiber follicles protect mammal skin</td>\n",
|
171 |
+
" <td>a</td>\n",
|
172 |
+
" <td>biology</td>\n",
|
173 |
+
" <td>5779.0</td>\n",
|
174 |
+
" <td>b) Exfoliation: Exfoliation is the process of ...</td>\n",
|
175 |
+
" </tr>\n",
|
176 |
+
" <tr>\n",
|
177 |
+
" <th>3</th>\n",
|
178 |
+
" <td>what do earthworms do when a segment breaks of...</td>\n",
|
179 |
+
" <td>earthworms can regrow segments that break off</td>\n",
|
180 |
+
" <td>b</td>\n",
|
181 |
+
" <td>biology</td>\n",
|
182 |
+
" <td>4064.0</td>\n",
|
183 |
+
" <td>a) Dies: This option is not correct because ea...</td>\n",
|
184 |
+
" </tr>\n",
|
185 |
+
" <tr>\n",
|
186 |
+
" <th>4</th>\n",
|
187 |
+
" <td>lightning can be bad for what? (a) the environ...</td>\n",
|
188 |
+
" <td>lightning can be bad for the environment.</td>\n",
|
189 |
+
" <td>a</td>\n",
|
190 |
+
" <td>electricity</td>\n",
|
191 |
+
" <td>34080.0</td>\n",
|
192 |
+
" <td>b) Rainstorms: Lightning is actually a natural...</td>\n",
|
193 |
+
" </tr>\n",
|
194 |
+
" <tr>\n",
|
195 |
+
" <th>...</th>\n",
|
196 |
+
" <td>...</td>\n",
|
197 |
+
" <td>...</td>\n",
|
198 |
+
" <td>...</td>\n",
|
199 |
+
" <td>...</td>\n",
|
200 |
+
" <td>...</td>\n",
|
201 |
+
" <td>...</td>\n",
|
202 |
+
" </tr>\n",
|
203 |
+
" <tr>\n",
|
204 |
+
" <th>9093</th>\n",
|
205 |
+
" <td>organisms that can cause infection do what? (a...</td>\n",
|
206 |
+
" <td>organisms that can cause infection make humans...</td>\n",
|
207 |
+
" <td>g</td>\n",
|
208 |
+
" <td>biology</td>\n",
|
209 |
+
" <td>33485.0</td>\n",
|
210 |
+
" <td>a) Bandaging open sores is not the correct ans...</td>\n",
|
211 |
+
" </tr>\n",
|
212 |
+
" <tr>\n",
|
213 |
+
" <th>9094</th>\n",
|
214 |
+
" <td>fungi are living things that cannot make thei...</td>\n",
|
215 |
+
" <td>fungi are living things that cannot make their...</td>\n",
|
216 |
+
" <td>a</td>\n",
|
217 |
+
" <td>biology</td>\n",
|
218 |
+
" <td>12097.0</td>\n",
|
219 |
+
" <td>b) Fungi are living things that can make their...</td>\n",
|
220 |
+
" </tr>\n",
|
221 |
+
" <tr>\n",
|
222 |
+
" <th>9095</th>\n",
|
223 |
+
" <td>an overheated body can use water for: (a) meta...</td>\n",
|
224 |
+
" <td>the evaporation of water from the skin cools t...</td>\n",
|
225 |
+
" <td>g</td>\n",
|
226 |
+
" <td>biology</td>\n",
|
227 |
+
" <td>6522.0</td>\n",
|
228 |
+
" <td>a) Metabolic reaction: This option is incorrec...</td>\n",
|
229 |
+
" </tr>\n",
|
230 |
+
" <tr>\n",
|
231 |
+
" <th>9096</th>\n",
|
232 |
+
" <td>what is essential for cellular respiration for...</td>\n",
|
233 |
+
" <td>plants are essential for cellular respiration ...</td>\n",
|
234 |
+
" <td>f</td>\n",
|
235 |
+
" <td>biology</td>\n",
|
236 |
+
" <td>27144.0</td>\n",
|
237 |
+
" <td>a) Electrons are involved in cellular respirat...</td>\n",
|
238 |
+
" </tr>\n",
|
239 |
+
" <tr>\n",
|
240 |
+
" <th>9097</th>\n",
|
241 |
+
" <td>what helps insulate and protect the body? (a) ...</td>\n",
|
242 |
+
" <td>living cells in follicles help insulate and pr...</td>\n",
|
243 |
+
" <td>b</td>\n",
|
244 |
+
" <td>biology</td>\n",
|
245 |
+
" <td>18522.0</td>\n",
|
246 |
+
" <td>a) H2O: Water is essential for life, but it do...</td>\n",
|
247 |
+
" </tr>\n",
|
248 |
+
" </tbody>\n",
|
249 |
+
"</table>\n",
|
250 |
+
"<p>9098 rows × 6 columns</p>\n",
|
251 |
+
"</div>"
|
252 |
+
],
|
253 |
+
"text/plain": [
|
254 |
+
" formatted_question \\\n",
|
255 |
+
"0 what is satellite technology used for predicti... \n",
|
256 |
+
"1 what does irradiating food do? (a) relieve pai... \n",
|
257 |
+
"2 what protects a mammal's skin? (a) fiber folli... \n",
|
258 |
+
"3 what do earthworms do when a segment breaks of... \n",
|
259 |
+
"4 lightning can be bad for what? (a) the environ... \n",
|
260 |
+
"... ... \n",
|
261 |
+
"9093 organisms that can cause infection do what? (a... \n",
|
262 |
+
"9094 fungi are living things that cannot make thei... \n",
|
263 |
+
"9095 an overheated body can use water for: (a) meta... \n",
|
264 |
+
"9096 what is essential for cellular respiration for... \n",
|
265 |
+
"9097 what helps insulate and protect the body? (a) ... \n",
|
266 |
+
"\n",
|
267 |
+
" combinedfact answerKey \\\n",
|
268 |
+
"0 satellite technology is used for predicting wh... c \n",
|
269 |
+
"1 irradiated food improves food safety. c \n",
|
270 |
+
"2 fiber follicles protect mammal skin a \n",
|
271 |
+
"3 earthworms can regrow segments that break off b \n",
|
272 |
+
"4 lightning can be bad for the environment. a \n",
|
273 |
+
"... ... ... \n",
|
274 |
+
"9093 organisms that can cause infection make humans... g \n",
|
275 |
+
"9094 fungi are living things that cannot make their... a \n",
|
276 |
+
"9095 the evaporation of water from the skin cools t... g \n",
|
277 |
+
"9096 plants are essential for cellular respiration ... f \n",
|
278 |
+
"9097 living cells in follicles help insulate and pr... b \n",
|
279 |
+
"\n",
|
280 |
+
" topic __index_level_0__ \\\n",
|
281 |
+
"0 technology 35972.0 \n",
|
282 |
+
"1 food science 20412.0 \n",
|
283 |
+
"2 biology 5779.0 \n",
|
284 |
+
"3 biology 4064.0 \n",
|
285 |
+
"4 electricity 34080.0 \n",
|
286 |
+
"... ... ... \n",
|
287 |
+
"9093 biology 33485.0 \n",
|
288 |
+
"9094 biology 12097.0 \n",
|
289 |
+
"9095 biology 6522.0 \n",
|
290 |
+
"9096 biology 27144.0 \n",
|
291 |
+
"9097 biology 18522.0 \n",
|
292 |
+
"\n",
|
293 |
+
" explanation \n",
|
294 |
+
"0 a) Seconds and minutes: This option is incorre... \n",
|
295 |
+
"1 (a) Relieve pain: This option is not correct b... \n",
|
296 |
+
"2 b) Exfoliation: Exfoliation is the process of ... \n",
|
297 |
+
"3 a) Dies: This option is not correct because ea... \n",
|
298 |
+
"4 b) Rainstorms: Lightning is actually a natural... \n",
|
299 |
+
"... ... \n",
|
300 |
+
"9093 a) Bandaging open sores is not the correct ans... \n",
|
301 |
+
"9094 b) Fungi are living things that can make their... \n",
|
302 |
+
"9095 a) Metabolic reaction: This option is incorrec... \n",
|
303 |
+
"9096 a) Electrons are involved in cellular respirat... \n",
|
304 |
+
"9097 a) H2O: Water is essential for life, but it do... \n",
|
305 |
+
"\n",
|
306 |
+
"[9098 rows x 6 columns]"
|
307 |
+
]
|
308 |
+
},
|
309 |
+
"execution_count": 6,
|
310 |
+
"metadata": {},
|
311 |
+
"output_type": "execute_result"
|
312 |
+
}
|
313 |
+
],
|
314 |
+
"source": [
|
315 |
+
"df"
|
316 |
+
]
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"cell_type": "code",
|
320 |
+
"execution_count": 7,
|
321 |
+
"id": "a1ec5678-13a5-4602-9d67-8406248fa414",
|
322 |
+
"metadata": {},
|
323 |
+
"outputs": [
|
324 |
+
{
|
325 |
+
"name": "stdout",
|
326 |
+
"output_type": "stream",
|
327 |
+
"text": [
|
328 |
+
"Before Cleaning: 9098 rows\n",
|
329 |
+
"After Cleaning: 8413 rows\n"
|
330 |
+
]
|
331 |
+
}
|
332 |
+
],
|
333 |
+
"source": [
|
334 |
+
"print(f\"Before Cleaning: {len(df)} rows\")\n",
|
335 |
+
"\n",
|
336 |
+
"# Drop the __index_level_0__ column if it exists\n",
|
337 |
+
"df.drop(columns=['__index_level_0__'], errors='ignore', inplace=True)\n",
|
338 |
+
"\n",
|
339 |
+
"# Ensure all values in 'formatted_question' are strings\n",
|
340 |
+
"df['formatted_question'] = df['formatted_question'].astype(str)\n",
|
341 |
+
"\n",
|
342 |
+
"# Filter on topic so that we can stratify later\n",
|
343 |
+
"df['topic'] = df['topic'].fillna('unknown')\n",
|
344 |
+
"topic_counts = df['topic'].value_counts()\n",
|
345 |
+
"valid_topics = topic_counts[topic_counts >= 5].index\n",
|
346 |
+
"df = df[df['topic'].isin(valid_topics)]\n",
|
347 |
+
"df = df[~df['topic'].isin(['unknown'])]\n",
|
348 |
+
"\n",
|
349 |
+
"df.rename(columns={\n",
|
350 |
+
" 'combinedfact': 'combined_fact',\n",
|
351 |
+
" 'answerKey': 'answer_key' \n",
|
352 |
+
"}, inplace=True)\n",
|
353 |
+
"\n",
|
354 |
+
"\n",
|
355 |
+
"# Drop the index level 0 if it exists\n",
|
356 |
+
"df.reset_index(drop=True, inplace=True)\n",
|
357 |
+
"print(f\"After Cleaning: {len(df)} rows\")"
|
358 |
+
]
|
359 |
+
},
|
360 |
+
{
|
361 |
+
"cell_type": "code",
|
362 |
+
"execution_count": 8,
|
363 |
+
"id": "71337823-91b9-4946-95cd-71810f5c5101",
|
364 |
+
"metadata": {},
|
365 |
+
"outputs": [
|
366 |
+
{
|
367 |
+
"data": {
|
368 |
+
"text/html": [
|
369 |
+
"<div>\n",
|
370 |
+
"<style scoped>\n",
|
371 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
372 |
+
" vertical-align: middle;\n",
|
373 |
+
" }\n",
|
374 |
+
"\n",
|
375 |
+
" .dataframe tbody tr th {\n",
|
376 |
+
" vertical-align: top;\n",
|
377 |
+
" }\n",
|
378 |
+
"\n",
|
379 |
+
" .dataframe thead th {\n",
|
380 |
+
" text-align: right;\n",
|
381 |
+
" }\n",
|
382 |
+
"</style>\n",
|
383 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
384 |
+
" <thead>\n",
|
385 |
+
" <tr style=\"text-align: right;\">\n",
|
386 |
+
" <th></th>\n",
|
387 |
+
" <th>formatted_question</th>\n",
|
388 |
+
" <th>combinedfact</th>\n",
|
389 |
+
" <th>answer_key</th>\n",
|
390 |
+
" <th>topic</th>\n",
|
391 |
+
" <th>explanation</th>\n",
|
392 |
+
" </tr>\n",
|
393 |
+
" </thead>\n",
|
394 |
+
" <tbody>\n",
|
395 |
+
" <tr>\n",
|
396 |
+
" <th>0</th>\n",
|
397 |
+
" <td>what is satellite technology used for predicti...</td>\n",
|
398 |
+
" <td>satellite technology is used for predicting wh...</td>\n",
|
399 |
+
" <td>c</td>\n",
|
400 |
+
" <td>technology</td>\n",
|
401 |
+
" <td>a) Seconds and minutes: This option is incorre...</td>\n",
|
402 |
+
" </tr>\n",
|
403 |
+
" <tr>\n",
|
404 |
+
" <th>1</th>\n",
|
405 |
+
" <td>what does irradiating food do? (a) relieve pai...</td>\n",
|
406 |
+
" <td>irradiated food improves food safety.</td>\n",
|
407 |
+
" <td>c</td>\n",
|
408 |
+
" <td>food science</td>\n",
|
409 |
+
" <td>(a) Relieve pain: This option is not correct b...</td>\n",
|
410 |
+
" </tr>\n",
|
411 |
+
" <tr>\n",
|
412 |
+
" <th>2</th>\n",
|
413 |
+
" <td>what protects a mammal's skin? (a) fiber folli...</td>\n",
|
414 |
+
" <td>fiber follicles protect mammal skin</td>\n",
|
415 |
+
" <td>a</td>\n",
|
416 |
+
" <td>biology</td>\n",
|
417 |
+
" <td>b) Exfoliation: Exfoliation is the process of ...</td>\n",
|
418 |
+
" </tr>\n",
|
419 |
+
" <tr>\n",
|
420 |
+
" <th>3</th>\n",
|
421 |
+
" <td>what do earthworms do when a segment breaks of...</td>\n",
|
422 |
+
" <td>earthworms can regrow segments that break off</td>\n",
|
423 |
+
" <td>b</td>\n",
|
424 |
+
" <td>biology</td>\n",
|
425 |
+
" <td>a) Dies: This option is not correct because ea...</td>\n",
|
426 |
+
" </tr>\n",
|
427 |
+
" <tr>\n",
|
428 |
+
" <th>4</th>\n",
|
429 |
+
" <td>lightning can be bad for what? (a) the environ...</td>\n",
|
430 |
+
" <td>lightning can be bad for the environment.</td>\n",
|
431 |
+
" <td>a</td>\n",
|
432 |
+
" <td>electricity</td>\n",
|
433 |
+
" <td>b) Rainstorms: Lightning is actually a natural...</td>\n",
|
434 |
+
" </tr>\n",
|
435 |
+
" <tr>\n",
|
436 |
+
" <th>...</th>\n",
|
437 |
+
" <td>...</td>\n",
|
438 |
+
" <td>...</td>\n",
|
439 |
+
" <td>...</td>\n",
|
440 |
+
" <td>...</td>\n",
|
441 |
+
" <td>...</td>\n",
|
442 |
+
" </tr>\n",
|
443 |
+
" <tr>\n",
|
444 |
+
" <th>8408</th>\n",
|
445 |
+
" <td>organisms that can cause infection do what? (a...</td>\n",
|
446 |
+
" <td>organisms that can cause infection make humans...</td>\n",
|
447 |
+
" <td>g</td>\n",
|
448 |
+
" <td>biology</td>\n",
|
449 |
+
" <td>a) Bandaging open sores is not the correct ans...</td>\n",
|
450 |
+
" </tr>\n",
|
451 |
+
" <tr>\n",
|
452 |
+
" <th>8409</th>\n",
|
453 |
+
" <td>fungi are living things that cannot make thei...</td>\n",
|
454 |
+
" <td>fungi are living things that cannot make their...</td>\n",
|
455 |
+
" <td>a</td>\n",
|
456 |
+
" <td>biology</td>\n",
|
457 |
+
" <td>b) Fungi are living things that can make their...</td>\n",
|
458 |
+
" </tr>\n",
|
459 |
+
" <tr>\n",
|
460 |
+
" <th>8410</th>\n",
|
461 |
+
" <td>an overheated body can use water for: (a) meta...</td>\n",
|
462 |
+
" <td>the evaporation of water from the skin cools t...</td>\n",
|
463 |
+
" <td>g</td>\n",
|
464 |
+
" <td>biology</td>\n",
|
465 |
+
" <td>a) Metabolic reaction: This option is incorrec...</td>\n",
|
466 |
+
" </tr>\n",
|
467 |
+
" <tr>\n",
|
468 |
+
" <th>8411</th>\n",
|
469 |
+
" <td>what is essential for cellular respiration for...</td>\n",
|
470 |
+
" <td>plants are essential for cellular respiration ...</td>\n",
|
471 |
+
" <td>f</td>\n",
|
472 |
+
" <td>biology</td>\n",
|
473 |
+
" <td>a) Electrons are involved in cellular respirat...</td>\n",
|
474 |
+
" </tr>\n",
|
475 |
+
" <tr>\n",
|
476 |
+
" <th>8412</th>\n",
|
477 |
+
" <td>what helps insulate and protect the body? (a) ...</td>\n",
|
478 |
+
" <td>living cells in follicles help insulate and pr...</td>\n",
|
479 |
+
" <td>b</td>\n",
|
480 |
+
" <td>biology</td>\n",
|
481 |
+
" <td>a) H2O: Water is essential for life, but it do...</td>\n",
|
482 |
+
" </tr>\n",
|
483 |
+
" </tbody>\n",
|
484 |
+
"</table>\n",
|
485 |
+
"<p>8413 rows × 5 columns</p>\n",
|
486 |
+
"</div>"
|
487 |
+
],
|
488 |
+
"text/plain": [
|
489 |
+
" formatted_question \\\n",
|
490 |
+
"0 what is satellite technology used for predicti... \n",
|
491 |
+
"1 what does irradiating food do? (a) relieve pai... \n",
|
492 |
+
"2 what protects a mammal's skin? (a) fiber folli... \n",
|
493 |
+
"3 what do earthworms do when a segment breaks of... \n",
|
494 |
+
"4 lightning can be bad for what? (a) the environ... \n",
|
495 |
+
"... ... \n",
|
496 |
+
"8408 organisms that can cause infection do what? (a... \n",
|
497 |
+
"8409 fungi are living things that cannot make thei... \n",
|
498 |
+
"8410 an overheated body can use water for: (a) meta... \n",
|
499 |
+
"8411 what is essential for cellular respiration for... \n",
|
500 |
+
"8412 what helps insulate and protect the body? (a) ... \n",
|
501 |
+
"\n",
|
502 |
+
" combinedfact answer_key \\\n",
|
503 |
+
"0 satellite technology is used for predicting wh... c \n",
|
504 |
+
"1 irradiated food improves food safety. c \n",
|
505 |
+
"2 fiber follicles protect mammal skin a \n",
|
506 |
+
"3 earthworms can regrow segments that break off b \n",
|
507 |
+
"4 lightning can be bad for the environment. a \n",
|
508 |
+
"... ... ... \n",
|
509 |
+
"8408 organisms that can cause infection make humans... g \n",
|
510 |
+
"8409 fungi are living things that cannot make their... a \n",
|
511 |
+
"8410 the evaporation of water from the skin cools t... g \n",
|
512 |
+
"8411 plants are essential for cellular respiration ... f \n",
|
513 |
+
"8412 living cells in follicles help insulate and pr... b \n",
|
514 |
+
"\n",
|
515 |
+
" topic explanation \n",
|
516 |
+
"0 technology a) Seconds and minutes: This option is incorre... \n",
|
517 |
+
"1 food science (a) Relieve pain: This option is not correct b... \n",
|
518 |
+
"2 biology b) Exfoliation: Exfoliation is the process of ... \n",
|
519 |
+
"3 biology a) Dies: This option is not correct because ea... \n",
|
520 |
+
"4 electricity b) Rainstorms: Lightning is actually a natural... \n",
|
521 |
+
"... ... ... \n",
|
522 |
+
"8408 biology a) Bandaging open sores is not the correct ans... \n",
|
523 |
+
"8409 biology b) Fungi are living things that can make their... \n",
|
524 |
+
"8410 biology a) Metabolic reaction: This option is incorrec... \n",
|
525 |
+
"8411 biology a) Electrons are involved in cellular respirat... \n",
|
526 |
+
"8412 biology a) H2O: Water is essential for life, but it do... \n",
|
527 |
+
"\n",
|
528 |
+
"[8413 rows x 5 columns]"
|
529 |
+
]
|
530 |
+
},
|
531 |
+
"execution_count": 8,
|
532 |
+
"metadata": {},
|
533 |
+
"output_type": "execute_result"
|
534 |
+
}
|
535 |
+
],
|
536 |
+
"source": [
|
537 |
+
"df"
|
538 |
+
]
|
539 |
+
},
|
540 |
+
{
|
541 |
+
"cell_type": "markdown",
|
542 |
+
"id": "eb6efb48-1282-4539-a726-450a17fd7f12",
|
543 |
+
"metadata": {},
|
544 |
+
"source": [
|
545 |
+
"## Process and Format Questions and Answer Choices\n",
|
546 |
+
"I do some minor cleaning since the original dataset is a bit messy."
|
547 |
+
]
|
548 |
+
},
|
549 |
+
{
|
550 |
+
"cell_type": "code",
|
551 |
+
"execution_count": 9,
|
552 |
+
"id": "c3ac53c9-664a-4d8e-9820-7be788d120fa",
|
553 |
+
"metadata": {},
|
554 |
+
"outputs": [],
|
555 |
+
"source": [
|
556 |
+
"# Split before the first occurrence of \"(a)\" to separate question and answer choices\n",
|
557 |
+
"df['topic'] = df['topic'].str.capitalize()\n",
|
558 |
+
"df['question_text'] = df['formatted_question'].str.extract(r'^(.*?)(?=\\(a\\)|$)')[0]\n",
|
559 |
+
"df['answer_choices'] = df['formatted_question'].str.extract(r'(\\(a\\).*)$')[0]\n",
|
560 |
+
"\n",
|
561 |
+
"# Function to check if the text needs a question mark\n",
|
562 |
+
"def add_question_mark_if_needed(text):\n",
|
563 |
+
" text = text.strip().capitalize()\n",
|
564 |
+
" if not text.endswith('?'):\n",
|
565 |
+
" return text + '?'\n",
|
566 |
+
" return text\n",
|
567 |
+
"\n",
|
568 |
+
"# Apply the function to the 'question_text' column\n",
|
569 |
+
"df['question_text'] = df['question_text'].apply(add_question_mark_if_needed)\n",
|
570 |
+
"\n",
|
571 |
+
"# Function to capitalize each answer choice item\n",
|
572 |
+
"def capitalize_answer_choices(answer_choices):\n",
|
573 |
+
" import re\n",
|
574 |
+
" pattern = r'\\(([a-z])\\)\\s*([^\\(]+)'\n",
|
575 |
+
" matches = re.findall(pattern, answer_choices)\n",
|
576 |
+
" capitalized_choices = []\n",
|
577 |
+
" for match in matches:\n",
|
578 |
+
" label, text = match\n",
|
579 |
+
" capitalized_choices.append(f'({label}) {text.strip().capitalize()}')\n",
|
580 |
+
" return ' '.join(capitalized_choices)\n",
|
581 |
+
"\n",
|
582 |
+
"# Apply function to format answer choices\n",
|
583 |
+
"df['answer_choices'] = df['answer_choices'].apply(capitalize_answer_choices)"
|
584 |
+
]
|
585 |
+
},
|
586 |
+
{
|
587 |
+
"cell_type": "code",
|
588 |
+
"execution_count": 10,
|
589 |
+
"id": "f96ed8a7-77d2-4ee4-b3a1-7dcb30f5966e",
|
590 |
+
"metadata": {},
|
591 |
+
"outputs": [
|
592 |
+
{
|
593 |
+
"data": {
|
594 |
+
"text/html": [
|
595 |
+
"<div>\n",
|
596 |
+
"<style scoped>\n",
|
597 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
598 |
+
" vertical-align: middle;\n",
|
599 |
+
" }\n",
|
600 |
+
"\n",
|
601 |
+
" .dataframe tbody tr th {\n",
|
602 |
+
" vertical-align: top;\n",
|
603 |
+
" }\n",
|
604 |
+
"\n",
|
605 |
+
" .dataframe thead th {\n",
|
606 |
+
" text-align: right;\n",
|
607 |
+
" }\n",
|
608 |
+
"</style>\n",
|
609 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
610 |
+
" <thead>\n",
|
611 |
+
" <tr style=\"text-align: right;\">\n",
|
612 |
+
" <th></th>\n",
|
613 |
+
" <th>formatted_question</th>\n",
|
614 |
+
" <th>combinedfact</th>\n",
|
615 |
+
" <th>answer_key</th>\n",
|
616 |
+
" <th>topic</th>\n",
|
617 |
+
" <th>explanation</th>\n",
|
618 |
+
" <th>question_text</th>\n",
|
619 |
+
" <th>answer_choices</th>\n",
|
620 |
+
" </tr>\n",
|
621 |
+
" </thead>\n",
|
622 |
+
" <tbody>\n",
|
623 |
+
" <tr>\n",
|
624 |
+
" <th>0</th>\n",
|
625 |
+
" <td>what is satellite technology used for predicti...</td>\n",
|
626 |
+
" <td>satellite technology is used for predicting wh...</td>\n",
|
627 |
+
" <td>c</td>\n",
|
628 |
+
" <td>Technology</td>\n",
|
629 |
+
" <td>a) Seconds and minutes: This option is incorre...</td>\n",
|
630 |
+
" <td>What is satellite technology used for predicting?</td>\n",
|
631 |
+
" <td>(a) Seconds and minutes (b) The strength and m...</td>\n",
|
632 |
+
" </tr>\n",
|
633 |
+
" <tr>\n",
|
634 |
+
" <th>1</th>\n",
|
635 |
+
" <td>what does irradiating food do? (a) relieve pai...</td>\n",
|
636 |
+
" <td>irradiated food improves food safety.</td>\n",
|
637 |
+
" <td>c</td>\n",
|
638 |
+
" <td>Food science</td>\n",
|
639 |
+
" <td>(a) Relieve pain: This option is not correct b...</td>\n",
|
640 |
+
" <td>What does irradiating food do?</td>\n",
|
641 |
+
" <td>(a) Relieve pain (b) Enhance food's nutrients ...</td>\n",
|
642 |
+
" </tr>\n",
|
643 |
+
" <tr>\n",
|
644 |
+
" <th>2</th>\n",
|
645 |
+
" <td>what protects a mammal's skin? (a) fiber folli...</td>\n",
|
646 |
+
" <td>fiber follicles protect mammal skin</td>\n",
|
647 |
+
" <td>a</td>\n",
|
648 |
+
" <td>Biology</td>\n",
|
649 |
+
" <td>b) Exfoliation: Exfoliation is the process of ...</td>\n",
|
650 |
+
" <td>What protects a mammal's skin?</td>\n",
|
651 |
+
" <td>(a) Fiber follicles (b) Exfoliation (c) Resist...</td>\n",
|
652 |
+
" </tr>\n",
|
653 |
+
" <tr>\n",
|
654 |
+
" <th>3</th>\n",
|
655 |
+
" <td>what do earthworms do when a segment breaks of...</td>\n",
|
656 |
+
" <td>earthworms can regrow segments that break off</td>\n",
|
657 |
+
" <td>b</td>\n",
|
658 |
+
" <td>Biology</td>\n",
|
659 |
+
" <td>a) Dies: This option is not correct because ea...</td>\n",
|
660 |
+
" <td>What do earthworms do when a segment breaks off?</td>\n",
|
661 |
+
" <td>(a) Dies (b) Regrows it (c) Reproduces (d) Sed...</td>\n",
|
662 |
+
" </tr>\n",
|
663 |
+
" <tr>\n",
|
664 |
+
" <th>4</th>\n",
|
665 |
+
" <td>lightning can be bad for what? (a) the environ...</td>\n",
|
666 |
+
" <td>lightning can be bad for the environment.</td>\n",
|
667 |
+
" <td>a</td>\n",
|
668 |
+
" <td>Electricity</td>\n",
|
669 |
+
" <td>b) Rainstorms: Lightning is actually a natural...</td>\n",
|
670 |
+
" <td>Lightning can be bad for what?</td>\n",
|
671 |
+
" <td>(a) The environment (b) Rainstorms (c) Destruc...</td>\n",
|
672 |
+
" </tr>\n",
|
673 |
+
" <tr>\n",
|
674 |
+
" <th>...</th>\n",
|
675 |
+
" <td>...</td>\n",
|
676 |
+
" <td>...</td>\n",
|
677 |
+
" <td>...</td>\n",
|
678 |
+
" <td>...</td>\n",
|
679 |
+
" <td>...</td>\n",
|
680 |
+
" <td>...</td>\n",
|
681 |
+
" <td>...</td>\n",
|
682 |
+
" </tr>\n",
|
683 |
+
" <tr>\n",
|
684 |
+
" <th>8408</th>\n",
|
685 |
+
" <td>organisms that can cause infection do what? (a...</td>\n",
|
686 |
+
" <td>organisms that can cause infection make humans...</td>\n",
|
687 |
+
" <td>g</td>\n",
|
688 |
+
" <td>Biology</td>\n",
|
689 |
+
" <td>a) Bandaging open sores is not the correct ans...</td>\n",
|
690 |
+
" <td>Organisms that can cause infection do what?</td>\n",
|
691 |
+
" <td>(a) Bandage open sores (b) Keep flesh clean (c...</td>\n",
|
692 |
+
" </tr>\n",
|
693 |
+
" <tr>\n",
|
694 |
+
" <th>8409</th>\n",
|
695 |
+
" <td>fungi are living things that cannot make thei...</td>\n",
|
696 |
+
" <td>fungi are living things that cannot make their...</td>\n",
|
697 |
+
" <td>a</td>\n",
|
698 |
+
" <td>Biology</td>\n",
|
699 |
+
" <td>b) Fungi are living things that can make their...</td>\n",
|
700 |
+
" <td>Fungi are living things that cannot make their...</td>\n",
|
701 |
+
" <td>(a) Food (b) Cells (c) Energy (d) Fruits (e) H...</td>\n",
|
702 |
+
" </tr>\n",
|
703 |
+
" <tr>\n",
|
704 |
+
" <th>8410</th>\n",
|
705 |
+
" <td>an overheated body can use water for: (a) meta...</td>\n",
|
706 |
+
" <td>the evaporation of water from the skin cools t...</td>\n",
|
707 |
+
" <td>g</td>\n",
|
708 |
+
" <td>Biology</td>\n",
|
709 |
+
" <td>a) Metabolic reaction: This option is incorrec...</td>\n",
|
710 |
+
" <td>An overheated body can use water for:?</td>\n",
|
711 |
+
" <td>(a) Metabolic reaction (b) Dehydrating (c) Rai...</td>\n",
|
712 |
+
" </tr>\n",
|
713 |
+
" <tr>\n",
|
714 |
+
" <th>8411</th>\n",
|
715 |
+
" <td>what is essential for cellular respiration for...</td>\n",
|
716 |
+
" <td>plants are essential for cellular respiration ...</td>\n",
|
717 |
+
" <td>f</td>\n",
|
718 |
+
" <td>Biology</td>\n",
|
719 |
+
" <td>a) Electrons are involved in cellular respirat...</td>\n",
|
720 |
+
" <td>What is essential for cellular respiration for...</td>\n",
|
721 |
+
" <td>(a) Electron (b) Glucose (c) Energy (d) Energy...</td>\n",
|
722 |
+
" </tr>\n",
|
723 |
+
" <tr>\n",
|
724 |
+
" <th>8412</th>\n",
|
725 |
+
" <td>what helps insulate and protect the body? (a) ...</td>\n",
|
726 |
+
" <td>living cells in follicles help insulate and pr...</td>\n",
|
727 |
+
" <td>b</td>\n",
|
728 |
+
" <td>Biology</td>\n",
|
729 |
+
" <td>a) H2O: Water is essential for life, but it do...</td>\n",
|
730 |
+
" <td>What helps insulate and protect the body?</td>\n",
|
731 |
+
" <td>(a) H2o (b) Living cells in follicles (c) Laye...</td>\n",
|
732 |
+
" </tr>\n",
|
733 |
+
" </tbody>\n",
|
734 |
+
"</table>\n",
|
735 |
+
"<p>8413 rows × 7 columns</p>\n",
|
736 |
+
"</div>"
|
737 |
+
],
|
738 |
+
"text/plain": [
|
739 |
+
" formatted_question \\\n",
|
740 |
+
"0 what is satellite technology used for predicti... \n",
|
741 |
+
"1 what does irradiating food do? (a) relieve pai... \n",
|
742 |
+
"2 what protects a mammal's skin? (a) fiber folli... \n",
|
743 |
+
"3 what do earthworms do when a segment breaks of... \n",
|
744 |
+
"4 lightning can be bad for what? (a) the environ... \n",
|
745 |
+
"... ... \n",
|
746 |
+
"8408 organisms that can cause infection do what? (a... \n",
|
747 |
+
"8409 fungi are living things that cannot make thei... \n",
|
748 |
+
"8410 an overheated body can use water for: (a) meta... \n",
|
749 |
+
"8411 what is essential for cellular respiration for... \n",
|
750 |
+
"8412 what helps insulate and protect the body? (a) ... \n",
|
751 |
+
"\n",
|
752 |
+
" combinedfact answer_key \\\n",
|
753 |
+
"0 satellite technology is used for predicting wh... c \n",
|
754 |
+
"1 irradiated food improves food safety. c \n",
|
755 |
+
"2 fiber follicles protect mammal skin a \n",
|
756 |
+
"3 earthworms can regrow segments that break off b \n",
|
757 |
+
"4 lightning can be bad for the environment. a \n",
|
758 |
+
"... ... ... \n",
|
759 |
+
"8408 organisms that can cause infection make humans... g \n",
|
760 |
+
"8409 fungi are living things that cannot make their... a \n",
|
761 |
+
"8410 the evaporation of water from the skin cools t... g \n",
|
762 |
+
"8411 plants are essential for cellular respiration ... f \n",
|
763 |
+
"8412 living cells in follicles help insulate and pr... b \n",
|
764 |
+
"\n",
|
765 |
+
" topic explanation \\\n",
|
766 |
+
"0 Technology a) Seconds and minutes: This option is incorre... \n",
|
767 |
+
"1 Food science (a) Relieve pain: This option is not correct b... \n",
|
768 |
+
"2 Biology b) Exfoliation: Exfoliation is the process of ... \n",
|
769 |
+
"3 Biology a) Dies: This option is not correct because ea... \n",
|
770 |
+
"4 Electricity b) Rainstorms: Lightning is actually a natural... \n",
|
771 |
+
"... ... ... \n",
|
772 |
+
"8408 Biology a) Bandaging open sores is not the correct ans... \n",
|
773 |
+
"8409 Biology b) Fungi are living things that can make their... \n",
|
774 |
+
"8410 Biology a) Metabolic reaction: This option is incorrec... \n",
|
775 |
+
"8411 Biology a) Electrons are involved in cellular respirat... \n",
|
776 |
+
"8412 Biology a) H2O: Water is essential for life, but it do... \n",
|
777 |
+
"\n",
|
778 |
+
" question_text \\\n",
|
779 |
+
"0 What is satellite technology used for predicting? \n",
|
780 |
+
"1 What does irradiating food do? \n",
|
781 |
+
"2 What protects a mammal's skin? \n",
|
782 |
+
"3 What do earthworms do when a segment breaks off? \n",
|
783 |
+
"4 Lightning can be bad for what? \n",
|
784 |
+
"... ... \n",
|
785 |
+
"8408 Organisms that can cause infection do what? \n",
|
786 |
+
"8409 Fungi are living things that cannot make their... \n",
|
787 |
+
"8410 An overheated body can use water for:? \n",
|
788 |
+
"8411 What is essential for cellular respiration for... \n",
|
789 |
+
"8412 What helps insulate and protect the body? \n",
|
790 |
+
"\n",
|
791 |
+
" answer_choices \n",
|
792 |
+
"0 (a) Seconds and minutes (b) The strength and m... \n",
|
793 |
+
"1 (a) Relieve pain (b) Enhance food's nutrients ... \n",
|
794 |
+
"2 (a) Fiber follicles (b) Exfoliation (c) Resist... \n",
|
795 |
+
"3 (a) Dies (b) Regrows it (c) Reproduces (d) Sed... \n",
|
796 |
+
"4 (a) The environment (b) Rainstorms (c) Destruc... \n",
|
797 |
+
"... ... \n",
|
798 |
+
"8408 (a) Bandage open sores (b) Keep flesh clean (c... \n",
|
799 |
+
"8409 (a) Food (b) Cells (c) Energy (d) Fruits (e) H... \n",
|
800 |
+
"8410 (a) Metabolic reaction (b) Dehydrating (c) Rai... \n",
|
801 |
+
"8411 (a) Electron (b) Glucose (c) Energy (d) Energy... \n",
|
802 |
+
"8412 (a) H2o (b) Living cells in follicles (c) Laye... \n",
|
803 |
+
"\n",
|
804 |
+
"[8413 rows x 7 columns]"
|
805 |
+
]
|
806 |
+
},
|
807 |
+
"execution_count": 10,
|
808 |
+
"metadata": {},
|
809 |
+
"output_type": "execute_result"
|
810 |
+
}
|
811 |
+
],
|
812 |
+
"source": [
|
813 |
+
"df"
|
814 |
+
]
|
815 |
+
},
|
816 |
+
{
|
817 |
+
"cell_type": "markdown",
|
818 |
+
"id": "8d911d0a-2097-4d16-8bd8-c1527ebd20ca",
|
819 |
+
"metadata": {},
|
820 |
+
"source": [
|
821 |
+
"## Prompt Creation"
|
822 |
+
]
|
823 |
+
},
|
824 |
+
{
|
825 |
+
"cell_type": "markdown",
|
826 |
+
"id": "524aa429-fec5-4ba2-add7-e27b70d44854",
|
827 |
+
"metadata": {},
|
828 |
+
"source": [
|
829 |
+
"This is the original prompt from layoric. It looks pretty good, but we need to adjust this for mistral. \n",
|
830 |
+
"```\n",
|
831 |
+
"{{#role 'system'~}}\n",
|
832 |
+
"You are an AI assistant that helps people find information. User will give you a question. Your task is to answer as faithfully as you can, and most importantly, provide explanation why incorrect answers are not correct. While answering think step-by-step and justify your answer.\n",
|
833 |
+
"{{~/role}}\n",
|
834 |
+
"{{#role 'user'~}}\n",
|
835 |
+
"USER:\n",
|
836 |
+
"Topic: {{topic}}\n",
|
837 |
+
"Question: {{question}}\n",
|
838 |
+
"\n",
|
839 |
+
"### Answer\n",
|
840 |
+
"The correct answer is:\n",
|
841 |
+
"{{answer_key}}). {{answer}}\n",
|
842 |
+
"\n",
|
843 |
+
"### Explanation:\n",
|
844 |
+
"Let's break it down step by step.\n",
|
845 |
+
"\n",
|
846 |
+
"1. Read the question and options carefully.\n",
|
847 |
+
"2. Identify the differences between the options.\n",
|
848 |
+
"3. Determine which options are not logical based on the difference.\n",
|
849 |
+
"4. Go through each incorrect answer providing an explanation why it is incorrect.\n",
|
850 |
+
"{{~/role}}\n",
|
851 |
+
"\n",
|
852 |
+
"{{#role 'assistant'~}}\n",
|
853 |
+
"{{~gen 'explanation'}}\n",
|
854 |
+
"{{~/role}}\n",
|
855 |
+
"```\n",
|
856 |
+
"Changes:\n",
|
857 |
+
"1. We need to move the system instructions to the user message\n",
|
858 |
+
"2. Let's rename `explanation` to `reasoning`\n",
|
859 |
+
"\n",
|
860 |
+
"We want something like this:"
|
861 |
+
]
|
862 |
+
},
|
863 |
+
{
|
864 |
+
"cell_type": "code",
|
865 |
+
"execution_count": 11,
|
866 |
+
"id": "0e6b3afc-8f53-490b-92ca-aec1216f62cc",
|
867 |
+
"metadata": {},
|
868 |
+
"outputs": [],
|
869 |
+
"source": [
|
870 |
+
"def create_mistral_reasoning_prompt(row):\n",
|
871 |
+
" reasoning_instructions = f'''You are an AI assistant that helps people find information. User will give you a question and an answer. Your task is to explain your reasoning of the correct answer and provide explanation why incorrect answers are not correct. While answering, think step-by-step and justify your answer.\n",
|
872 |
+
"\n",
|
873 |
+
"Topic: {row['topic']}\n",
|
874 |
+
"Question: {row['question_text']}\n",
|
875 |
+
"Answer Choices: {row['answer_choices']}\n",
|
876 |
+
"\n",
|
877 |
+
"The correct answer is: ({row['answer_key']})\n",
|
878 |
+
"\n",
|
879 |
+
"### Reasoning:\n",
|
880 |
+
"Let's break it down step by step.\n",
|
881 |
+
"\n",
|
882 |
+
"1. Read the question and options carefully.\n",
|
883 |
+
"2. Identify the differences between the options.\n",
|
884 |
+
"3. Determine which options are not logical based on the difference.\n",
|
885 |
+
"4. Go through each incorrect answer providing an explanation why it is incorrect.'''\n",
|
886 |
+
" \n",
|
887 |
+
" return [{\"role\": \"user\", \"content\": reasoning_instructions}]\n"
|
888 |
+
]
|
889 |
+
},
|
890 |
+
{
|
891 |
+
"cell_type": "markdown",
|
892 |
+
"id": "3e759e97-c046-4fd1-a924-7618d1756588",
|
893 |
+
"metadata": {},
|
894 |
+
"source": [
|
895 |
+
"Note that I updated the instructions slightly."
|
896 |
+
]
|
897 |
+
},
|
898 |
+
{
|
899 |
+
"cell_type": "code",
|
900 |
+
"execution_count": 12,
|
901 |
+
"id": "1e27e8b4-5cb3-42bf-8b2f-d0627c1d1ae9",
|
902 |
+
"metadata": {},
|
903 |
+
"outputs": [
|
904 |
+
{
|
905 |
+
"name": "stdout",
|
906 |
+
"output_type": "stream",
|
907 |
+
"text": [
|
908 |
+
"[{'role': 'user', 'content': \"You are an AI assistant that helps people find information. User will give you a question and an answer. Your task is to explain your reasoning of the correct answer and provide explanation why incorrect answers are not correct. While answering, think step-by-step and justify your answer.\\n\\nTopic: Technology\\nQuestion: What is satellite technology used for predicting?\\nAnswer Choices: (a) Seconds and minutes (b) The strength and magnitude of an earthquake (c) What it's like outside each day (d) 70-75 degrees fahrenheit (e) Rapid changes occur (f) Dead-ends and false starts. (g) Snow, ice, and rock (h) Around 5 to 27 degrees celsius\\n\\nThe correct answer is: (c)\\n\\n### Reasoning:\\nLet's break it down step by step.\\n\\n1. Read the question and options carefully.\\n2. Identify the differences between the options.\\n3. Determine which options are not logical based on the difference.\\n4. Go through each incorrect answer providing an explanation why it is incorrect.\"}]\n",
|
909 |
+
"\n",
|
910 |
+
"---\n",
|
911 |
+
"\n",
|
912 |
+
"You are an AI assistant that helps people find information. User will give you a question and an answer. Your task is to explain your reasoning of the correct answer and provide explanation why incorrect answers are not correct. While answering, think step-by-step and justify your answer.\n",
|
913 |
+
"\n",
|
914 |
+
"Topic: Technology\n",
|
915 |
+
"Question: What is satellite technology used for predicting?\n",
|
916 |
+
"Answer Choices: (a) Seconds and minutes (b) The strength and magnitude of an earthquake (c) What it's like outside each day (d) 70-75 degrees fahrenheit (e) Rapid changes occur (f) Dead-ends and false starts. (g) Snow, ice, and rock (h) Around 5 to 27 degrees celsius\n",
|
917 |
+
"\n",
|
918 |
+
"The correct answer is: (c)\n",
|
919 |
+
"\n",
|
920 |
+
"### Reasoning:\n",
|
921 |
+
"Let's break it down step by step.\n",
|
922 |
+
"\n",
|
923 |
+
"1. Read the question and options carefully.\n",
|
924 |
+
"2. Identify the differences between the options.\n",
|
925 |
+
"3. Determine which options are not logical based on the difference.\n",
|
926 |
+
"4. Go through each incorrect answer providing an explanation why it is incorrect.\n"
|
927 |
+
]
|
928 |
+
}
|
929 |
+
],
|
930 |
+
"source": [
|
931 |
+
"df['mistral_reasoning_prompt'] = df.apply(create_mistral_reasoning_prompt, axis=1)\n",
|
932 |
+
"print(df['mistral_reasoning_prompt'].iloc[0])\n",
|
933 |
+
"print('\\n---\\n')\n",
|
934 |
+
"print(df['mistral_reasoning_prompt'].iloc[0][0]['content'])"
|
935 |
+
]
|
936 |
+
},
|
937 |
+
{
|
938 |
+
"cell_type": "code",
|
939 |
+
"execution_count": 13,
|
940 |
+
"id": "1f9c248e-fef9-4194-a567-0ee10be7184d",
|
941 |
+
"metadata": {},
|
942 |
+
"outputs": [
|
943 |
+
{
|
944 |
+
"data": {
|
945 |
+
"image/png": "",
|
946 |
+
"text/plain": [
|
947 |
+
"<Figure size 640x480 with 1 Axes>"
|
948 |
+
]
|
949 |
+
},
|
950 |
+
"metadata": {},
|
951 |
+
"output_type": "display_data"
|
952 |
+
},
|
953 |
+
{
|
954 |
+
"name": "stdout",
|
955 |
+
"output_type": "stream",
|
956 |
+
"text": [
|
957 |
+
"313 193\n"
|
958 |
+
]
|
959 |
+
}
|
960 |
+
],
|
961 |
+
"source": [
|
962 |
+
"import matplotlib.pyplot as plt\n",
|
963 |
+
"\n",
|
964 |
+
"token_dist = df['mistral_reasoning_prompt'].apply(lambda x: len(tokenizer.apply_chat_template(x)))\n",
|
965 |
+
"\n",
|
966 |
+
"# Plot the histogram of token counts\n",
|
967 |
+
"plt.hist(token_dist, bins=20, edgecolor='black')\n",
|
968 |
+
"plt.title(\"Histogram of Token Counts\")\n",
|
969 |
+
"plt.xlabel(\"Token Count\")\n",
|
970 |
+
"plt.ylabel(\"Frequency\")\n",
|
971 |
+
"plt.show()\n",
|
972 |
+
"print(max(token_dist), min(token_dist))"
|
973 |
+
]
|
974 |
+
},
|
975 |
+
{
|
976 |
+
"cell_type": "markdown",
|
977 |
+
"id": "5a040685-4e01-4b78-9a04-3c4a4342f661",
|
978 |
+
"metadata": {},
|
979 |
+
"source": [
|
980 |
+
"# Deployment"
|
981 |
+
]
|
982 |
+
},
|
983 |
+
{
|
984 |
+
"cell_type": "code",
|
985 |
+
"execution_count": 14,
|
986 |
+
"id": "8fa1a1a3-5bf3-463d-9ad2-f4a78c07388d",
|
987 |
+
"metadata": {},
|
988 |
+
"outputs": [],
|
989 |
+
"source": [
|
990 |
+
"from huggingface_hub import create_inference_endpoint\n",
|
991 |
+
"from huggingface_hub import get_inference_endpoint\n",
|
992 |
+
"\n",
|
993 |
+
"\n",
|
994 |
+
"def get_my_endpoint():\n",
|
995 |
+
" name = f\"poe-reasoning\"\n",
|
996 |
+
" namespace=NAMESPACE\n",
|
997 |
+
" try:\n",
|
998 |
+
" endpoint = get_inference_endpoint(name, namespace=namespace)\n",
|
999 |
+
" endpoint.wait()\n",
|
1000 |
+
" except:\n",
|
1001 |
+
" # Custom Docker image details\n",
|
1002 |
+
" custom_image = {\n",
|
1003 |
+
" \"health_route\": \"/health\",\n",
|
1004 |
+
" \"url\": \"ghcr.io/huggingface/text-generation-inference:2.4.1\", # This is the min version\n",
|
1005 |
+
" \"env\": {\n",
|
1006 |
+
" # \"LORA_ADAPTERS\": \"derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-sg,derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-R-sg,derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-sg\", # Add adapters here\n",
|
1007 |
+
" \"MAX_BATCH_PREFILL_TOKENS\": \"8192\",\n",
|
1008 |
+
" \"MAX_INPUT_TOKENS\": \"320\", # Set according to your needs\n",
|
1009 |
+
" \"MAX_TOTAL_TOKENS\": \"2000\", # Set according to your needs\n",
|
1010 |
+
" \"DISABLE_CUSTOM_KERNELS\": 'false',\n",
|
1011 |
+
" \"MODEL_ID\": \"/repository\"\n",
|
1012 |
+
" },\n",
|
1013 |
+
" }\n",
|
1014 |
+
" \n",
|
1015 |
+
" secrets = {\n",
|
1016 |
+
" \"HF_TOKEN\": get_token()\n",
|
1017 |
+
" }\n",
|
1018 |
+
" \n",
|
1019 |
+
" # Creating the inference endpoint\n",
|
1020 |
+
" endpoint = create_inference_endpoint(\n",
|
1021 |
+
" name=name,\n",
|
1022 |
+
" namespace=namespace,\n",
|
1023 |
+
" repository='mistralai/Mistral-7B-Instruct-v0.3',\n",
|
1024 |
+
" framework=\"pytorch\",\n",
|
1025 |
+
" accelerator=\"gpu\",\n",
|
1026 |
+
" instance_size=\"x1\",\n",
|
1027 |
+
" instance_type=\"nvidia-l4\",\n",
|
1028 |
+
" region=\"us-east-1\",\n",
|
1029 |
+
" vendor=\"aws\",\n",
|
1030 |
+
" min_replica=4,\n",
|
1031 |
+
" max_replica=4,\n",
|
1032 |
+
" task=\"text-generation\",\n",
|
1033 |
+
" custom_image=custom_image,\n",
|
1034 |
+
" secrets=secrets\n",
|
1035 |
+
" )\n",
|
1036 |
+
" # endpoint.wait()\n",
|
1037 |
+
" \n",
|
1038 |
+
" print(\"Your model is ready to use!\")\n",
|
1039 |
+
" endpoint.wait()\n",
|
1040 |
+
" return endpoint"
|
1041 |
+
]
|
1042 |
+
},
|
1043 |
+
{
|
1044 |
+
"cell_type": "code",
|
1045 |
+
"execution_count": 15,
|
1046 |
+
"id": "0f2e2a06-6210-4cca-a96e-25fd2938d73d",
|
1047 |
+
"metadata": {},
|
1048 |
+
"outputs": [
|
1049 |
+
{
|
1050 |
+
"name": "stdout",
|
1051 |
+
"output_type": "stream",
|
1052 |
+
"text": [
|
1053 |
+
"Your model is ready to use!\n"
|
1054 |
+
]
|
1055 |
+
}
|
1056 |
+
],
|
1057 |
+
"source": [
|
1058 |
+
"endpoint = get_my_endpoint()"
|
1059 |
+
]
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"cell_type": "code",
|
1063 |
+
"execution_count": 16,
|
1064 |
+
"id": "b3d76aa7-c97f-4e2d-9400-93b13f22b846",
|
1065 |
+
"metadata": {},
|
1066 |
+
"outputs": [
|
1067 |
+
{
|
1068 |
+
"data": {
|
1069 |
+
"text/plain": [
|
1070 |
+
"'(a) Seconds and minutes: Satellite technology is not used for predicting seconds or minutes with precision. While atomic clocks on satellites help keep time for GPS, they are not used for predicting seconds or minutes in the general sense.\\n\\n(b) The strength and magnitude of an earthquake: While satellite technology can detect earthquakes based on changes in ground motion, it is not used to predict earthquakes accurately. The movements of tectonic plates are not predictable with sufficient certainty to allow for accurate earthquake prediction.\\n\\n(d) 70-75 degrees fahrenheit: Satellite technology is not used to predict temperature in this specific range. Satellites can help monitor and predict temperature variations globally, but they cannot target a specific temperature range, especially one as narrow as 70-75°F.\\n\\n(e) Rapid changes occur: Satellite technology is not applicable to predict rapid changes in general, as they may refer to various phenomena, not just those related to satellite-monitored data.\\n\\n(f) Dead-ends and false starts: Satellite technology does not provide information on dead-ends and false starts. These concepts are related to navigation or decision-making processes, not satellite observations.\\n\\n(h) Around 5 to 27 degrees celsius: Just as with the Fahrenheit example, satellites are not used to predict a specific temperature range like this. Satellite technology can help predict temperature variations, but it does not target a specific range of temperatures.\\n\\nNow, we are left with option (c) \"What it\\'s like outside each day.\"\\n\\nSatellite technology is extensively used in monitoring and predicting weather patterns and providing us with up-to-date information about our weather conditions. This includes cloud cover, rainfall, temperature, and wind speeds. In this way, we can understand what it\\'s like outside on any given day.'"
|
1071 |
+
]
|
1072 |
+
},
|
1073 |
+
"execution_count": 16,
|
1074 |
+
"metadata": {},
|
1075 |
+
"output_type": "execute_result"
|
1076 |
+
}
|
1077 |
+
],
|
1078 |
+
"source": [
|
1079 |
+
"response = endpoint.client.chat_completion(df['mistral_reasoning_prompt'].iloc[0], max_tokens=1650)\n",
|
1080 |
+
"response.choices[0].message.content"
|
1081 |
+
]
|
1082 |
+
},
|
1083 |
+
{
|
1084 |
+
"cell_type": "code",
|
1085 |
+
"execution_count": 17,
|
1086 |
+
"id": "7d3f0277-8b3a-4171-bcb5-b8bda5d5f242",
|
1087 |
+
"metadata": {},
|
1088 |
+
"outputs": [
|
1089 |
+
{
|
1090 |
+
"data": {
|
1091 |
+
"application/vnd.jupyter.widget-view+json": {
|
1092 |
+
"model_id": "8e2aee49e93d4df492e32706b9a0b6f4",
|
1093 |
+
"version_major": 2,
|
1094 |
+
"version_minor": 0
|
1095 |
+
},
|
1096 |
+
"text/plain": [
|
1097 |
+
"Processing Prompts: 0%| | 0/8413 [00:00<?, ?it/s]"
|
1098 |
+
]
|
1099 |
+
},
|
1100 |
+
"metadata": {},
|
1101 |
+
"output_type": "display_data"
|
1102 |
+
}
|
1103 |
+
],
|
1104 |
+
"source": [
|
1105 |
+
"import nest_asyncio\n",
|
1106 |
+
"import asyncio\n",
|
1107 |
+
"import pandas as pd\n",
|
1108 |
+
"from concurrent.futures import ThreadPoolExecutor\n",
|
1109 |
+
"from tqdm.notebook import tqdm\n",
|
1110 |
+
"\n",
|
1111 |
+
"# Assuming 'endpoint' and 'df' are already defined\n",
|
1112 |
+
"\n",
|
1113 |
+
"nest_asyncio.apply()\n",
|
1114 |
+
"\n",
|
1115 |
+
"async def async_chat_completion(prompt, max_tokens=1650):\n",
|
1116 |
+
" response = await endpoint.async_client.chat_completion(prompt, max_tokens=max_tokens)\n",
|
1117 |
+
" return response.choices[0].message.content\n",
|
1118 |
+
"\n",
|
1119 |
+
"async def generate_mistral_reasoning(prompts, max_tokens=1650, num_workers=64):\n",
|
1120 |
+
" loop = asyncio.get_event_loop()\n",
|
1121 |
+
" with ThreadPoolExecutor(max_workers=num_workers) as executor:\n",
|
1122 |
+
" tasks = [loop.run_in_executor(executor, lambda p=prompt: asyncio.run(async_chat_completion(p, max_tokens))) for prompt in prompts]\n",
|
1123 |
+
" results = []\n",
|
1124 |
+
" with tqdm(total=len(tasks), desc=\"Processing Prompts\") as pbar:\n",
|
1125 |
+
" for result in asyncio.as_completed(tasks):\n",
|
1126 |
+
" results.append(await result)\n",
|
1127 |
+
" pbar.update(1)\n",
|
1128 |
+
" return results\n",
|
1129 |
+
"\n",
|
1130 |
+
"prompts = df['mistral_reasoning_prompt'].tolist()\n",
|
1131 |
+
"mistral_reasonings = await generate_mistral_reasoning(prompts)\n",
|
1132 |
+
"df['mistral_reasoning'] = mistral_reasonings\n"
|
1133 |
+
]
|
1134 |
+
},
|
1135 |
+
{
|
1136 |
+
"cell_type": "code",
|
1137 |
+
"execution_count": 18,
|
1138 |
+
"id": "573008db-78aa-4ca9-992b-3bdebcfd5fc9",
|
1139 |
+
"metadata": {},
|
1140 |
+
"outputs": [
|
1141 |
+
{
|
1142 |
+
"data": {
|
1143 |
+
"text/plain": [
|
1144 |
+
"InferenceEndpoint(name='poe-reasoning', namespace='HF-test-lab', repository='mistralai/Mistral-7B-Instruct-v0.3', status='paused', url=None)"
|
1145 |
+
]
|
1146 |
+
},
|
1147 |
+
"execution_count": 18,
|
1148 |
+
"metadata": {},
|
1149 |
+
"output_type": "execute_result"
|
1150 |
+
}
|
1151 |
+
],
|
1152 |
+
"source": [
|
1153 |
+
"endpoint.pause()"
|
1154 |
+
]
|
1155 |
+
},
|
1156 |
+
{
|
1157 |
+
"cell_type": "code",
|
1158 |
+
"execution_count": 19,
|
1159 |
+
"id": "0a31bb96-66cd-4f03-83b4-25b64c58a591",
|
1160 |
+
"metadata": {},
|
1161 |
+
"outputs": [
|
1162 |
+
{
|
1163 |
+
"data": {
|
1164 |
+
"text/plain": [
|
1165 |
+
"0 Incorrect answers and explanations:\\n\\n1. Elec...\n",
|
1166 |
+
"1 Sure, let's examine each answer and justify wh...\n",
|
1167 |
+
"2 Sure, let's go through each of the provided an...\n",
|
1168 |
+
"3 1. Reading the question carefully, we can see ...\n",
|
1169 |
+
"4 1. Food: While essential for the growth and he...\n",
|
1170 |
+
" ... \n",
|
1171 |
+
"8408 1. Read the question and options carefully: Th...\n",
|
1172 |
+
"8409 1. Read the question and options carefully: Th...\n",
|
1173 |
+
"8410 1. Read the question and options carefully: Th...\n",
|
1174 |
+
"8411 1. First, let's read the question and options ...\n",
|
1175 |
+
"8412 1. Read the question and options carefully: Th...\n",
|
1176 |
+
"Name: mistral_reasoning, Length: 8413, dtype: object"
|
1177 |
+
]
|
1178 |
+
},
|
1179 |
+
"execution_count": 19,
|
1180 |
+
"metadata": {},
|
1181 |
+
"output_type": "execute_result"
|
1182 |
+
}
|
1183 |
+
],
|
1184 |
+
"source": [
|
1185 |
+
"df.mistral_reasoning"
|
1186 |
+
]
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"cell_type": "code",
|
1190 |
+
"execution_count": 25,
|
1191 |
+
"id": "27542a33-187e-4b3f-aa20-4177f778312d",
|
1192 |
+
"metadata": {},
|
1193 |
+
"outputs": [
|
1194 |
+
{
|
1195 |
+
"data": {
|
1196 |
+
"text/html": [
|
1197 |
+
"<div>\n",
|
1198 |
+
"<style scoped>\n",
|
1199 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
1200 |
+
" vertical-align: middle;\n",
|
1201 |
+
" }\n",
|
1202 |
+
"\n",
|
1203 |
+
" .dataframe tbody tr th {\n",
|
1204 |
+
" vertical-align: top;\n",
|
1205 |
+
" }\n",
|
1206 |
+
"\n",
|
1207 |
+
" .dataframe thead th {\n",
|
1208 |
+
" text-align: right;\n",
|
1209 |
+
" }\n",
|
1210 |
+
"</style>\n",
|
1211 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1212 |
+
" <thead>\n",
|
1213 |
+
" <tr style=\"text-align: right;\">\n",
|
1214 |
+
" <th></th>\n",
|
1215 |
+
" <th>formatted_question</th>\n",
|
1216 |
+
" <th>combined_fact</th>\n",
|
1217 |
+
" <th>answer_key</th>\n",
|
1218 |
+
" <th>topic</th>\n",
|
1219 |
+
" <th>explanation</th>\n",
|
1220 |
+
" <th>question_text</th>\n",
|
1221 |
+
" <th>answer_choices</th>\n",
|
1222 |
+
" <th>mistral_reasoning_prompt</th>\n",
|
1223 |
+
" <th>mistral_reasoning</th>\n",
|
1224 |
+
" </tr>\n",
|
1225 |
+
" </thead>\n",
|
1226 |
+
" <tbody>\n",
|
1227 |
+
" <tr>\n",
|
1228 |
+
" <th>0</th>\n",
|
1229 |
+
" <td>what is satellite technology used for predicti...</td>\n",
|
1230 |
+
" <td>satellite technology is used for predicting wh...</td>\n",
|
1231 |
+
" <td>c</td>\n",
|
1232 |
+
" <td>Technology</td>\n",
|
1233 |
+
" <td>a) Seconds and minutes: This option is incorre...</td>\n",
|
1234 |
+
" <td>What is satellite technology used for predicting?</td>\n",
|
1235 |
+
" <td>(a) Seconds and minutes (b) The strength and m...</td>\n",
|
1236 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1237 |
+
" <td>Incorrect answers and explanations:\\n\\n1. Elec...</td>\n",
|
1238 |
+
" </tr>\n",
|
1239 |
+
" <tr>\n",
|
1240 |
+
" <th>1</th>\n",
|
1241 |
+
" <td>what does irradiating food do? (a) relieve pai...</td>\n",
|
1242 |
+
" <td>irradiated food improves food safety.</td>\n",
|
1243 |
+
" <td>c</td>\n",
|
1244 |
+
" <td>Food science</td>\n",
|
1245 |
+
" <td>(a) Relieve pain: This option is not correct b...</td>\n",
|
1246 |
+
" <td>What does irradiating food do?</td>\n",
|
1247 |
+
" <td>(a) Relieve pain (b) Enhance food's nutrients ...</td>\n",
|
1248 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1249 |
+
" <td>Sure, let's examine each answer and justify wh...</td>\n",
|
1250 |
+
" </tr>\n",
|
1251 |
+
" <tr>\n",
|
1252 |
+
" <th>2</th>\n",
|
1253 |
+
" <td>what protects a mammal's skin? (a) fiber folli...</td>\n",
|
1254 |
+
" <td>fiber follicles protect mammal skin</td>\n",
|
1255 |
+
" <td>a</td>\n",
|
1256 |
+
" <td>Biology</td>\n",
|
1257 |
+
" <td>b) Exfoliation: Exfoliation is the process of ...</td>\n",
|
1258 |
+
" <td>What protects a mammal's skin?</td>\n",
|
1259 |
+
" <td>(a) Fiber follicles (b) Exfoliation (c) Resist...</td>\n",
|
1260 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1261 |
+
" <td>Sure, let's go through each of the provided an...</td>\n",
|
1262 |
+
" </tr>\n",
|
1263 |
+
" <tr>\n",
|
1264 |
+
" <th>3</th>\n",
|
1265 |
+
" <td>what do earthworms do when a segment breaks of...</td>\n",
|
1266 |
+
" <td>earthworms can regrow segments that break off</td>\n",
|
1267 |
+
" <td>b</td>\n",
|
1268 |
+
" <td>Biology</td>\n",
|
1269 |
+
" <td>a) Dies: This option is not correct because ea...</td>\n",
|
1270 |
+
" <td>What do earthworms do when a segment breaks off?</td>\n",
|
1271 |
+
" <td>(a) Dies (b) Regrows it (c) Reproduces (d) Sed...</td>\n",
|
1272 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1273 |
+
" <td>1. Reading the question carefully, we can see ...</td>\n",
|
1274 |
+
" </tr>\n",
|
1275 |
+
" <tr>\n",
|
1276 |
+
" <th>4</th>\n",
|
1277 |
+
" <td>lightning can be bad for what? (a) the environ...</td>\n",
|
1278 |
+
" <td>lightning can be bad for the environment.</td>\n",
|
1279 |
+
" <td>a</td>\n",
|
1280 |
+
" <td>Electricity</td>\n",
|
1281 |
+
" <td>b) Rainstorms: Lightning is actually a natural...</td>\n",
|
1282 |
+
" <td>Lightning can be bad for what?</td>\n",
|
1283 |
+
" <td>(a) The environment (b) Rainstorms (c) Destruc...</td>\n",
|
1284 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1285 |
+
" <td>1. Food: While essential for the growth and he...</td>\n",
|
1286 |
+
" </tr>\n",
|
1287 |
+
" <tr>\n",
|
1288 |
+
" <th>...</th>\n",
|
1289 |
+
" <td>...</td>\n",
|
1290 |
+
" <td>...</td>\n",
|
1291 |
+
" <td>...</td>\n",
|
1292 |
+
" <td>...</td>\n",
|
1293 |
+
" <td>...</td>\n",
|
1294 |
+
" <td>...</td>\n",
|
1295 |
+
" <td>...</td>\n",
|
1296 |
+
" <td>...</td>\n",
|
1297 |
+
" <td>...</td>\n",
|
1298 |
+
" </tr>\n",
|
1299 |
+
" <tr>\n",
|
1300 |
+
" <th>8408</th>\n",
|
1301 |
+
" <td>organisms that can cause infection do what? (a...</td>\n",
|
1302 |
+
" <td>organisms that can cause infection make humans...</td>\n",
|
1303 |
+
" <td>g</td>\n",
|
1304 |
+
" <td>Biology</td>\n",
|
1305 |
+
" <td>a) Bandaging open sores is not the correct ans...</td>\n",
|
1306 |
+
" <td>Organisms that can cause infection do what?</td>\n",
|
1307 |
+
" <td>(a) Bandage open sores (b) Keep flesh clean (c...</td>\n",
|
1308 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1309 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
1310 |
+
" </tr>\n",
|
1311 |
+
" <tr>\n",
|
1312 |
+
" <th>8409</th>\n",
|
1313 |
+
" <td>fungi are living things that cannot make thei...</td>\n",
|
1314 |
+
" <td>fungi are living things that cannot make their...</td>\n",
|
1315 |
+
" <td>a</td>\n",
|
1316 |
+
" <td>Biology</td>\n",
|
1317 |
+
" <td>b) Fungi are living things that can make their...</td>\n",
|
1318 |
+
" <td>Fungi are living things that cannot make their...</td>\n",
|
1319 |
+
" <td>(a) Food (b) Cells (c) Energy (d) Fruits (e) H...</td>\n",
|
1320 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1321 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
1322 |
+
" </tr>\n",
|
1323 |
+
" <tr>\n",
|
1324 |
+
" <th>8410</th>\n",
|
1325 |
+
" <td>an overheated body can use water for: (a) meta...</td>\n",
|
1326 |
+
" <td>the evaporation of water from the skin cools t...</td>\n",
|
1327 |
+
" <td>g</td>\n",
|
1328 |
+
" <td>Biology</td>\n",
|
1329 |
+
" <td>a) Metabolic reaction: This option is incorrec...</td>\n",
|
1330 |
+
" <td>An overheated body can use water for:?</td>\n",
|
1331 |
+
" <td>(a) Metabolic reaction (b) Dehydrating (c) Rai...</td>\n",
|
1332 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1333 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
1334 |
+
" </tr>\n",
|
1335 |
+
" <tr>\n",
|
1336 |
+
" <th>8411</th>\n",
|
1337 |
+
" <td>what is essential for cellular respiration for...</td>\n",
|
1338 |
+
" <td>plants are essential for cellular respiration ...</td>\n",
|
1339 |
+
" <td>f</td>\n",
|
1340 |
+
" <td>Biology</td>\n",
|
1341 |
+
" <td>a) Electrons are involved in cellular respirat...</td>\n",
|
1342 |
+
" <td>What is essential for cellular respiration for...</td>\n",
|
1343 |
+
" <td>(a) Electron (b) Glucose (c) Energy (d) Energy...</td>\n",
|
1344 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1345 |
+
" <td>1. First, let's read the question and options ...</td>\n",
|
1346 |
+
" </tr>\n",
|
1347 |
+
" <tr>\n",
|
1348 |
+
" <th>8412</th>\n",
|
1349 |
+
" <td>what helps insulate and protect the body? (a) ...</td>\n",
|
1350 |
+
" <td>living cells in follicles help insulate and pr...</td>\n",
|
1351 |
+
" <td>b</td>\n",
|
1352 |
+
" <td>Biology</td>\n",
|
1353 |
+
" <td>a) H2O: Water is essential for life, but it do...</td>\n",
|
1354 |
+
" <td>What helps insulate and protect the body?</td>\n",
|
1355 |
+
" <td>(a) H2o (b) Living cells in follicles (c) Laye...</td>\n",
|
1356 |
+
" <td>[{'role': 'user', 'content': 'You are an AI as...</td>\n",
|
1357 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
1358 |
+
" </tr>\n",
|
1359 |
+
" </tbody>\n",
|
1360 |
+
"</table>\n",
|
1361 |
+
"<p>8413 rows × 9 columns</p>\n",
|
1362 |
+
"</div>"
|
1363 |
+
],
|
1364 |
+
"text/plain": [
|
1365 |
+
" formatted_question \\\n",
|
1366 |
+
"0 what is satellite technology used for predicti... \n",
|
1367 |
+
"1 what does irradiating food do? (a) relieve pai... \n",
|
1368 |
+
"2 what protects a mammal's skin? (a) fiber folli... \n",
|
1369 |
+
"3 what do earthworms do when a segment breaks of... \n",
|
1370 |
+
"4 lightning can be bad for what? (a) the environ... \n",
|
1371 |
+
"... ... \n",
|
1372 |
+
"8408 organisms that can cause infection do what? (a... \n",
|
1373 |
+
"8409 fungi are living things that cannot make thei... \n",
|
1374 |
+
"8410 an overheated body can use water for: (a) meta... \n",
|
1375 |
+
"8411 what is essential for cellular respiration for... \n",
|
1376 |
+
"8412 what helps insulate and protect the body? (a) ... \n",
|
1377 |
+
"\n",
|
1378 |
+
" combined_fact answer_key \\\n",
|
1379 |
+
"0 satellite technology is used for predicting wh... c \n",
|
1380 |
+
"1 irradiated food improves food safety. c \n",
|
1381 |
+
"2 fiber follicles protect mammal skin a \n",
|
1382 |
+
"3 earthworms can regrow segments that break off b \n",
|
1383 |
+
"4 lightning can be bad for the environment. a \n",
|
1384 |
+
"... ... ... \n",
|
1385 |
+
"8408 organisms that can cause infection make humans... g \n",
|
1386 |
+
"8409 fungi are living things that cannot make their... a \n",
|
1387 |
+
"8410 the evaporation of water from the skin cools t... g \n",
|
1388 |
+
"8411 plants are essential for cellular respiration ... f \n",
|
1389 |
+
"8412 living cells in follicles help insulate and pr... b \n",
|
1390 |
+
"\n",
|
1391 |
+
" topic explanation \\\n",
|
1392 |
+
"0 Technology a) Seconds and minutes: This option is incorre... \n",
|
1393 |
+
"1 Food science (a) Relieve pain: This option is not correct b... \n",
|
1394 |
+
"2 Biology b) Exfoliation: Exfoliation is the process of ... \n",
|
1395 |
+
"3 Biology a) Dies: This option is not correct because ea... \n",
|
1396 |
+
"4 Electricity b) Rainstorms: Lightning is actually a natural... \n",
|
1397 |
+
"... ... ... \n",
|
1398 |
+
"8408 Biology a) Bandaging open sores is not the correct ans... \n",
|
1399 |
+
"8409 Biology b) Fungi are living things that can make their... \n",
|
1400 |
+
"8410 Biology a) Metabolic reaction: This option is incorrec... \n",
|
1401 |
+
"8411 Biology a) Electrons are involved in cellular respirat... \n",
|
1402 |
+
"8412 Biology a) H2O: Water is essential for life, but it do... \n",
|
1403 |
+
"\n",
|
1404 |
+
" question_text \\\n",
|
1405 |
+
"0 What is satellite technology used for predicting? \n",
|
1406 |
+
"1 What does irradiating food do? \n",
|
1407 |
+
"2 What protects a mammal's skin? \n",
|
1408 |
+
"3 What do earthworms do when a segment breaks off? \n",
|
1409 |
+
"4 Lightning can be bad for what? \n",
|
1410 |
+
"... ... \n",
|
1411 |
+
"8408 Organisms that can cause infection do what? \n",
|
1412 |
+
"8409 Fungi are living things that cannot make their... \n",
|
1413 |
+
"8410 An overheated body can use water for:? \n",
|
1414 |
+
"8411 What is essential for cellular respiration for... \n",
|
1415 |
+
"8412 What helps insulate and protect the body? \n",
|
1416 |
+
"\n",
|
1417 |
+
" answer_choices \\\n",
|
1418 |
+
"0 (a) Seconds and minutes (b) The strength and m... \n",
|
1419 |
+
"1 (a) Relieve pain (b) Enhance food's nutrients ... \n",
|
1420 |
+
"2 (a) Fiber follicles (b) Exfoliation (c) Resist... \n",
|
1421 |
+
"3 (a) Dies (b) Regrows it (c) Reproduces (d) Sed... \n",
|
1422 |
+
"4 (a) The environment (b) Rainstorms (c) Destruc... \n",
|
1423 |
+
"... ... \n",
|
1424 |
+
"8408 (a) Bandage open sores (b) Keep flesh clean (c... \n",
|
1425 |
+
"8409 (a) Food (b) Cells (c) Energy (d) Fruits (e) H... \n",
|
1426 |
+
"8410 (a) Metabolic reaction (b) Dehydrating (c) Rai... \n",
|
1427 |
+
"8411 (a) Electron (b) Glucose (c) Energy (d) Energy... \n",
|
1428 |
+
"8412 (a) H2o (b) Living cells in follicles (c) Laye... \n",
|
1429 |
+
"\n",
|
1430 |
+
" mistral_reasoning_prompt \\\n",
|
1431 |
+
"0 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1432 |
+
"1 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1433 |
+
"2 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1434 |
+
"3 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1435 |
+
"4 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1436 |
+
"... ... \n",
|
1437 |
+
"8408 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1438 |
+
"8409 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1439 |
+
"8410 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1440 |
+
"8411 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1441 |
+
"8412 [{'role': 'user', 'content': 'You are an AI as... \n",
|
1442 |
+
"\n",
|
1443 |
+
" mistral_reasoning \n",
|
1444 |
+
"0 Incorrect answers and explanations:\\n\\n1. Elec... \n",
|
1445 |
+
"1 Sure, let's examine each answer and justify wh... \n",
|
1446 |
+
"2 Sure, let's go through each of the provided an... \n",
|
1447 |
+
"3 1. Reading the question carefully, we can see ... \n",
|
1448 |
+
"4 1. Food: While essential for the growth and he... \n",
|
1449 |
+
"... ... \n",
|
1450 |
+
"8408 1. Read the question and options carefully: Th... \n",
|
1451 |
+
"8409 1. Read the question and options carefully: Th... \n",
|
1452 |
+
"8410 1. Read the question and options carefully: Th... \n",
|
1453 |
+
"8411 1. First, let's read the question and options ... \n",
|
1454 |
+
"8412 1. Read the question and options carefully: Th... \n",
|
1455 |
+
"\n",
|
1456 |
+
"[8413 rows x 9 columns]"
|
1457 |
+
]
|
1458 |
+
},
|
1459 |
+
"execution_count": 25,
|
1460 |
+
"metadata": {},
|
1461 |
+
"output_type": "execute_result"
|
1462 |
+
}
|
1463 |
+
],
|
1464 |
+
"source": [
|
1465 |
+
"df"
|
1466 |
+
]
|
1467 |
+
},
|
1468 |
+
{
|
1469 |
+
"cell_type": "code",
|
1470 |
+
"execution_count": 26,
|
1471 |
+
"id": "55bc6940-07e4-402f-9c56-fb830a35290f",
|
1472 |
+
"metadata": {},
|
1473 |
+
"outputs": [
|
1474 |
+
{
|
1475 |
+
"data": {
|
1476 |
+
"application/vnd.jupyter.widget-view+json": {
|
1477 |
+
"model_id": "b9047ef7088b4eaa83916017c17c0bbc",
|
1478 |
+
"version_major": 2,
|
1479 |
+
"version_minor": 0
|
1480 |
+
},
|
1481 |
+
"text/plain": [
|
1482 |
+
"Uploading the dataset shards: 0%| | 0/1 [00:00<?, ?it/s]"
|
1483 |
+
]
|
1484 |
+
},
|
1485 |
+
"metadata": {},
|
1486 |
+
"output_type": "display_data"
|
1487 |
+
},
|
1488 |
+
{
|
1489 |
+
"data": {
|
1490 |
+
"application/vnd.jupyter.widget-view+json": {
|
1491 |
+
"model_id": "2bc167b9d94d4b8588f71c5f7f82722e",
|
1492 |
+
"version_major": 2,
|
1493 |
+
"version_minor": 0
|
1494 |
+
},
|
1495 |
+
"text/plain": [
|
1496 |
+
"Creating parquet from Arrow format: 0%| | 0/9 [00:00<?, ?ba/s]"
|
1497 |
+
]
|
1498 |
+
},
|
1499 |
+
"metadata": {},
|
1500 |
+
"output_type": "display_data"
|
1501 |
+
},
|
1502 |
+
{
|
1503 |
+
"data": {
|
1504 |
+
"text/plain": [
|
1505 |
+
"CommitInfo(commit_url='https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-reasoning/commit/f6554f7b3e0671558956eed6a42453b281b6f7e8', commit_message='Upload dataset', commit_description='', oid='f6554f7b3e0671558956eed6a42453b281b6f7e8', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-reasoning', endpoint='https://huggingface.co', repo_type='dataset', repo_id='derek-thomas/labeled-multiple-choice-explained-mistral-reasoning'), pr_revision=None, pr_num=None)"
|
1506 |
+
]
|
1507 |
+
},
|
1508 |
+
"execution_count": 26,
|
1509 |
+
"metadata": {},
|
1510 |
+
"output_type": "execute_result"
|
1511 |
+
}
|
1512 |
+
],
|
1513 |
+
"source": [
|
1514 |
+
"from datasets import Dataset\n",
|
1515 |
+
"\n",
|
1516 |
+
"dataset = Dataset.from_pandas(df)\n",
|
1517 |
+
"dataset.push_to_hub(OUTPUT_DATASET)"
|
1518 |
+
]
|
1519 |
+
},
|
1520 |
+
{
|
1521 |
+
"cell_type": "code",
|
1522 |
+
"execution_count": null,
|
1523 |
+
"id": "24857480-8ecc-4dc9-a5b0-af6debca162d",
|
1524 |
+
"metadata": {},
|
1525 |
+
"outputs": [],
|
1526 |
+
"source": []
|
1527 |
+
}
|
1528 |
+
],
|
1529 |
+
"metadata": {
|
1530 |
+
"kernelspec": {
|
1531 |
+
"display_name": "Python 3 (ipykernel)",
|
1532 |
+
"language": "python",
|
1533 |
+
"name": "python3"
|
1534 |
+
},
|
1535 |
+
"language_info": {
|
1536 |
+
"codemirror_mode": {
|
1537 |
+
"name": "ipython",
|
1538 |
+
"version": 3
|
1539 |
+
},
|
1540 |
+
"file_extension": ".py",
|
1541 |
+
"mimetype": "text/x-python",
|
1542 |
+
"name": "python",
|
1543 |
+
"nbconvert_exporter": "python",
|
1544 |
+
"pygments_lexer": "ipython3",
|
1545 |
+
"version": "3.11.10"
|
1546 |
+
}
|
1547 |
+
},
|
1548 |
+
"nbformat": 4,
|
1549 |
+
"nbformat_minor": 5
|
1550 |
+
}
|
01-poe-dataset-creation.ipynb
ADDED
@@ -0,0 +1,1421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "68e9310f-109d-4f30-b263-d1e6c058ee80",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# Setup"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "code",
|
13 |
+
"execution_count": 1,
|
14 |
+
"id": "6805b3b5-782b-437c-82b3-9392abb5a599",
|
15 |
+
"metadata": {
|
16 |
+
"tags": []
|
17 |
+
},
|
18 |
+
"outputs": [],
|
19 |
+
"source": [
|
20 |
+
"# %pip install -q -r requirements.txt"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "markdown",
|
25 |
+
"id": "94f0fcdd-1653-440e-8ebc-9c33d931163a",
|
26 |
+
"metadata": {},
|
27 |
+
"source": [
|
28 |
+
"## Config"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "code",
|
33 |
+
"execution_count": 2,
|
34 |
+
"id": "5d0bd22f-293e-4c15-9dfe-8070553f42b5",
|
35 |
+
"metadata": {
|
36 |
+
"tags": []
|
37 |
+
},
|
38 |
+
"outputs": [],
|
39 |
+
"source": [
|
40 |
+
"INPUT_DATASET = 'derek-thomas/labeled-multiple-choice-explained-mistral-reasoning'\n",
|
41 |
+
"REVISION = '536f3b8'\n",
|
42 |
+
"OUTPUT_DATASET = 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized'"
|
43 |
+
]
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"cell_type": "code",
|
47 |
+
"execution_count": 3,
|
48 |
+
"id": "a1fc7a29-6b60-446d-b708-012f897de6a9",
|
49 |
+
"metadata": {},
|
50 |
+
"outputs": [
|
51 |
+
{
|
52 |
+
"data": {
|
53 |
+
"application/vnd.jupyter.widget-view+json": {
|
54 |
+
"model_id": "85ca07b896804801b5ccd629341e965c",
|
55 |
+
"version_major": 2,
|
56 |
+
"version_minor": 0
|
57 |
+
},
|
58 |
+
"text/plain": [
|
59 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
60 |
+
]
|
61 |
+
},
|
62 |
+
"metadata": {},
|
63 |
+
"output_type": "display_data"
|
64 |
+
}
|
65 |
+
],
|
66 |
+
"source": [
|
67 |
+
"from transformers import AutoTokenizer\n",
|
68 |
+
"from huggingface_hub import get_token, login\n",
|
69 |
+
"\n",
|
70 |
+
"login()"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"cell_type": "code",
|
75 |
+
"execution_count": 4,
|
76 |
+
"id": "f17edf80-7318-4b65-ae60-7433a12ad8cb",
|
77 |
+
"metadata": {},
|
78 |
+
"outputs": [],
|
79 |
+
"source": [
|
80 |
+
"BASE_MODEL = 'mistralai/Mistral-7B-Instruct-v0.3'\n",
|
81 |
+
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=get_token())"
|
82 |
+
]
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"cell_type": "markdown",
|
86 |
+
"id": "df697715-3cff-4177-bfa0-348a76d00a73",
|
87 |
+
"metadata": {},
|
88 |
+
"source": [
|
89 |
+
"# Prompt Experiment\n",
|
90 |
+
"## Goal\n",
|
91 |
+
"I want to explore a few scenarios for prompt fine-tuning. Lets consider a scenario where we have the following available:\n",
|
92 |
+
"- (Q) Question\n",
|
93 |
+
"- (AC) Answer Choices\n",
|
94 |
+
"- (R) Reasoning\n",
|
95 |
+
"- (FA) Final Answer\n",
|
96 |
+
"\n",
|
97 |
+
"How would performance differ if we tried changing the order?\n",
|
98 |
+
"\n",
|
99 |
+
"Scenario 1: `Q - AC - R - FA`\n",
|
100 |
+
"This is the most natural, we want the model to generate reasoning before the final answer. Based on how decoding works, this will give the most information before selecting the Final Answer.\n",
|
101 |
+
"\n",
|
102 |
+
"Scenario 2: `Q - AC - FA - R`\n",
|
103 |
+
"This is quite awkward. Why would we put our reasoning last? Its faster. Could the model be trained in such a way to \"know\" the reasoning before responding with it? If so we could save a lot of tokens. Im skeptical, but its worth testing.\n",
|
104 |
+
"\n",
|
105 |
+
"Scenario 3: `Q - AC - FA`\n",
|
106 |
+
"This is our fine-tuning control.\n",
|
107 |
+
"\n",
|
108 |
+
"Scenario 4: Base\n",
|
109 |
+
"This is our un-fine-tuned control.\n",
|
110 |
+
"\n",
|
111 |
+
"In each of these scenarios I will build prompts with strucutred generation to fine-tune with. I noticed some difficulty in a first pass with getting consistent response formats, but thats out of scope, so structured generation can help a lot here.\n",
|
112 |
+
"\n",
|
113 |
+
"Datasets wont store complex structures like lists of dicts of different types (needed for structured generation, so its easiest if I tokenize. Ill be using Mistral, so Ill skip the system prompt. Its simple enough to come back and change this for a different model in this notebook.\n",
|
114 |
+
"\n",
|
115 |
+
"## Implementation\n",
|
116 |
+
"To explore this goal, we will start with [layoric/labeled-multiple-choice-explained](https://huggingface.co/datasets/layoric/labeled-multiple-choice-explained) as our dataset. It has explanations already provided by GPT-3.5-turbo. Given that these explanations are a bit different than what mistral would do, it might be useful if we generate some from mistral as well. Based on [this notebook](./poe-generate-mistral-reasoning.ipynb) we have been able to generate mistral reasoning in this refined dataset [derek-thomas/labeled-multiple-choice-explained-mistral-reasoning](https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-reasoning).\n",
|
117 |
+
"\n",
|
118 |
+
"In this notebook we will format our data such that we can try each experiment and then we will push it to my repo: [derek-thomas/labeled-multiple-choice-explained](https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained)."
|
119 |
+
]
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"cell_type": "markdown",
|
123 |
+
"id": "13cc5c85-0642-4348-8167-60b5ed5d37d5",
|
124 |
+
"metadata": {},
|
125 |
+
"source": [
|
126 |
+
"## Imports"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"cell_type": "code",
|
131 |
+
"execution_count": 5,
|
132 |
+
"id": "7925796f-6b90-4c43-8c22-e3906cfbdf3b",
|
133 |
+
"metadata": {
|
134 |
+
"tags": []
|
135 |
+
},
|
136 |
+
"outputs": [],
|
137 |
+
"source": [
|
138 |
+
"import pandas as pd\n",
|
139 |
+
"from datasets import load_dataset\n",
|
140 |
+
"import json"
|
141 |
+
]
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"cell_type": "markdown",
|
145 |
+
"id": "c5750868-8442-4d0b-9259-4e7e490793bb",
|
146 |
+
"metadata": {},
|
147 |
+
"source": [
|
148 |
+
"## Load and Preprocess the Dataset"
|
149 |
+
]
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"cell_type": "code",
|
153 |
+
"execution_count": 6,
|
154 |
+
"id": "d8d7a40a-57af-45b9-bcea-8ac4c3ec0e61",
|
155 |
+
"metadata": {
|
156 |
+
"tags": []
|
157 |
+
},
|
158 |
+
"outputs": [
|
159 |
+
{
|
160 |
+
"name": "stdout",
|
161 |
+
"output_type": "stream",
|
162 |
+
"text": [
|
163 |
+
"Before Cleaning: 8413 rows\n"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"data": {
|
168 |
+
"text/html": [
|
169 |
+
"<div>\n",
|
170 |
+
"<style scoped>\n",
|
171 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
172 |
+
" vertical-align: middle;\n",
|
173 |
+
" }\n",
|
174 |
+
"\n",
|
175 |
+
" .dataframe tbody tr th {\n",
|
176 |
+
" vertical-align: top;\n",
|
177 |
+
" }\n",
|
178 |
+
"\n",
|
179 |
+
" .dataframe thead th {\n",
|
180 |
+
" text-align: right;\n",
|
181 |
+
" }\n",
|
182 |
+
"</style>\n",
|
183 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
184 |
+
" <thead>\n",
|
185 |
+
" <tr style=\"text-align: right;\">\n",
|
186 |
+
" <th></th>\n",
|
187 |
+
" <th>formatted_question</th>\n",
|
188 |
+
" <th>combined_fact</th>\n",
|
189 |
+
" <th>answer_key</th>\n",
|
190 |
+
" <th>topic</th>\n",
|
191 |
+
" <th>gpt3_5_reasoning</th>\n",
|
192 |
+
" <th>question_text</th>\n",
|
193 |
+
" <th>answer_choices</th>\n",
|
194 |
+
" <th>mistral_reasoning</th>\n",
|
195 |
+
" </tr>\n",
|
196 |
+
" </thead>\n",
|
197 |
+
" <tbody>\n",
|
198 |
+
" <tr>\n",
|
199 |
+
" <th>0</th>\n",
|
200 |
+
" <td>what is satellite technology used for predicti...</td>\n",
|
201 |
+
" <td>satellite technology is used for predicting wh...</td>\n",
|
202 |
+
" <td>c</td>\n",
|
203 |
+
" <td>Technology</td>\n",
|
204 |
+
" <td>a) Seconds and minutes: This option is incorre...</td>\n",
|
205 |
+
" <td>What is satellite technology used for predicting?</td>\n",
|
206 |
+
" <td>(a) Seconds and minutes (b) The strength and m...</td>\n",
|
207 |
+
" <td>Incorrect answers and explanations:\\n\\n1. Elec...</td>\n",
|
208 |
+
" </tr>\n",
|
209 |
+
" <tr>\n",
|
210 |
+
" <th>1</th>\n",
|
211 |
+
" <td>what does irradiating food do? (a) relieve pai...</td>\n",
|
212 |
+
" <td>irradiated food improves food safety.</td>\n",
|
213 |
+
" <td>c</td>\n",
|
214 |
+
" <td>Food science</td>\n",
|
215 |
+
" <td>(a) Relieve pain: This option is not correct b...</td>\n",
|
216 |
+
" <td>What does irradiating food do?</td>\n",
|
217 |
+
" <td>(a) Relieve pain (b) Enhance food's nutrients ...</td>\n",
|
218 |
+
" <td>Sure, let's examine each answer and justify wh...</td>\n",
|
219 |
+
" </tr>\n",
|
220 |
+
" <tr>\n",
|
221 |
+
" <th>2</th>\n",
|
222 |
+
" <td>what protects a mammal's skin? (a) fiber folli...</td>\n",
|
223 |
+
" <td>fiber follicles protect mammal skin</td>\n",
|
224 |
+
" <td>a</td>\n",
|
225 |
+
" <td>Biology</td>\n",
|
226 |
+
" <td>b) Exfoliation: Exfoliation is the process of ...</td>\n",
|
227 |
+
" <td>What protects a mammal's skin?</td>\n",
|
228 |
+
" <td>(a) Fiber follicles (b) Exfoliation (c) Resist...</td>\n",
|
229 |
+
" <td>Sure, let's go through each of the provided an...</td>\n",
|
230 |
+
" </tr>\n",
|
231 |
+
" <tr>\n",
|
232 |
+
" <th>3</th>\n",
|
233 |
+
" <td>what do earthworms do when a segment breaks of...</td>\n",
|
234 |
+
" <td>earthworms can regrow segments that break off</td>\n",
|
235 |
+
" <td>b</td>\n",
|
236 |
+
" <td>Biology</td>\n",
|
237 |
+
" <td>a) Dies: This option is not correct because ea...</td>\n",
|
238 |
+
" <td>What do earthworms do when a segment breaks off?</td>\n",
|
239 |
+
" <td>(a) Dies (b) Regrows it (c) Reproduces (d) Sed...</td>\n",
|
240 |
+
" <td>1. Reading the question carefully, we can see ...</td>\n",
|
241 |
+
" </tr>\n",
|
242 |
+
" <tr>\n",
|
243 |
+
" <th>4</th>\n",
|
244 |
+
" <td>lightning can be bad for what? (a) the environ...</td>\n",
|
245 |
+
" <td>lightning can be bad for the environment.</td>\n",
|
246 |
+
" <td>a</td>\n",
|
247 |
+
" <td>Electricity</td>\n",
|
248 |
+
" <td>b) Rainstorms: Lightning is actually a natural...</td>\n",
|
249 |
+
" <td>Lightning can be bad for what?</td>\n",
|
250 |
+
" <td>(a) The environment (b) Rainstorms (c) Destruc...</td>\n",
|
251 |
+
" <td>1. Food: While essential for the growth and he...</td>\n",
|
252 |
+
" </tr>\n",
|
253 |
+
" <tr>\n",
|
254 |
+
" <th>...</th>\n",
|
255 |
+
" <td>...</td>\n",
|
256 |
+
" <td>...</td>\n",
|
257 |
+
" <td>...</td>\n",
|
258 |
+
" <td>...</td>\n",
|
259 |
+
" <td>...</td>\n",
|
260 |
+
" <td>...</td>\n",
|
261 |
+
" <td>...</td>\n",
|
262 |
+
" <td>...</td>\n",
|
263 |
+
" </tr>\n",
|
264 |
+
" <tr>\n",
|
265 |
+
" <th>8408</th>\n",
|
266 |
+
" <td>organisms that can cause infection do what? (a...</td>\n",
|
267 |
+
" <td>organisms that can cause infection make humans...</td>\n",
|
268 |
+
" <td>g</td>\n",
|
269 |
+
" <td>Biology</td>\n",
|
270 |
+
" <td>a) Bandaging open sores is not the correct ans...</td>\n",
|
271 |
+
" <td>Organisms that can cause infection do what?</td>\n",
|
272 |
+
" <td>(a) Bandage open sores (b) Keep flesh clean (c...</td>\n",
|
273 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
274 |
+
" </tr>\n",
|
275 |
+
" <tr>\n",
|
276 |
+
" <th>8409</th>\n",
|
277 |
+
" <td>fungi are living things that cannot make thei...</td>\n",
|
278 |
+
" <td>fungi are living things that cannot make their...</td>\n",
|
279 |
+
" <td>a</td>\n",
|
280 |
+
" <td>Biology</td>\n",
|
281 |
+
" <td>b) Fungi are living things that can make their...</td>\n",
|
282 |
+
" <td>Fungi are living things that cannot make their...</td>\n",
|
283 |
+
" <td>(a) Food (b) Cells (c) Energy (d) Fruits (e) H...</td>\n",
|
284 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
285 |
+
" </tr>\n",
|
286 |
+
" <tr>\n",
|
287 |
+
" <th>8410</th>\n",
|
288 |
+
" <td>an overheated body can use water for: (a) meta...</td>\n",
|
289 |
+
" <td>the evaporation of water from the skin cools t...</td>\n",
|
290 |
+
" <td>g</td>\n",
|
291 |
+
" <td>Biology</td>\n",
|
292 |
+
" <td>a) Metabolic reaction: This option is incorrec...</td>\n",
|
293 |
+
" <td>An overheated body can use water for:?</td>\n",
|
294 |
+
" <td>(a) Metabolic reaction (b) Dehydrating (c) Rai...</td>\n",
|
295 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
296 |
+
" </tr>\n",
|
297 |
+
" <tr>\n",
|
298 |
+
" <th>8411</th>\n",
|
299 |
+
" <td>what is essential for cellular respiration for...</td>\n",
|
300 |
+
" <td>plants are essential for cellular respiration ...</td>\n",
|
301 |
+
" <td>f</td>\n",
|
302 |
+
" <td>Biology</td>\n",
|
303 |
+
" <td>a) Electrons are involved in cellular respirat...</td>\n",
|
304 |
+
" <td>What is essential for cellular respiration for...</td>\n",
|
305 |
+
" <td>(a) Electron (b) Glucose (c) Energy (d) Energy...</td>\n",
|
306 |
+
" <td>1. First, let's read the question and options ...</td>\n",
|
307 |
+
" </tr>\n",
|
308 |
+
" <tr>\n",
|
309 |
+
" <th>8412</th>\n",
|
310 |
+
" <td>what helps insulate and protect the body? (a) ...</td>\n",
|
311 |
+
" <td>living cells in follicles help insulate and pr...</td>\n",
|
312 |
+
" <td>b</td>\n",
|
313 |
+
" <td>Biology</td>\n",
|
314 |
+
" <td>a) H2O: Water is essential for life, but it do...</td>\n",
|
315 |
+
" <td>What helps insulate and protect the body?</td>\n",
|
316 |
+
" <td>(a) H2o (b) Living cells in follicles (c) Laye...</td>\n",
|
317 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
318 |
+
" </tr>\n",
|
319 |
+
" </tbody>\n",
|
320 |
+
"</table>\n",
|
321 |
+
"<p>8413 rows × 8 columns</p>\n",
|
322 |
+
"</div>"
|
323 |
+
],
|
324 |
+
"text/plain": [
|
325 |
+
" formatted_question \\\n",
|
326 |
+
"0 what is satellite technology used for predicti... \n",
|
327 |
+
"1 what does irradiating food do? (a) relieve pai... \n",
|
328 |
+
"2 what protects a mammal's skin? (a) fiber folli... \n",
|
329 |
+
"3 what do earthworms do when a segment breaks of... \n",
|
330 |
+
"4 lightning can be bad for what? (a) the environ... \n",
|
331 |
+
"... ... \n",
|
332 |
+
"8408 organisms that can cause infection do what? (a... \n",
|
333 |
+
"8409 fungi are living things that cannot make thei... \n",
|
334 |
+
"8410 an overheated body can use water for: (a) meta... \n",
|
335 |
+
"8411 what is essential for cellular respiration for... \n",
|
336 |
+
"8412 what helps insulate and protect the body? (a) ... \n",
|
337 |
+
"\n",
|
338 |
+
" combined_fact answer_key \\\n",
|
339 |
+
"0 satellite technology is used for predicting wh... c \n",
|
340 |
+
"1 irradiated food improves food safety. c \n",
|
341 |
+
"2 fiber follicles protect mammal skin a \n",
|
342 |
+
"3 earthworms can regrow segments that break off b \n",
|
343 |
+
"4 lightning can be bad for the environment. a \n",
|
344 |
+
"... ... ... \n",
|
345 |
+
"8408 organisms that can cause infection make humans... g \n",
|
346 |
+
"8409 fungi are living things that cannot make their... a \n",
|
347 |
+
"8410 the evaporation of water from the skin cools t... g \n",
|
348 |
+
"8411 plants are essential for cellular respiration ... f \n",
|
349 |
+
"8412 living cells in follicles help insulate and pr... b \n",
|
350 |
+
"\n",
|
351 |
+
" topic gpt3_5_reasoning \\\n",
|
352 |
+
"0 Technology a) Seconds and minutes: This option is incorre... \n",
|
353 |
+
"1 Food science (a) Relieve pain: This option is not correct b... \n",
|
354 |
+
"2 Biology b) Exfoliation: Exfoliation is the process of ... \n",
|
355 |
+
"3 Biology a) Dies: This option is not correct because ea... \n",
|
356 |
+
"4 Electricity b) Rainstorms: Lightning is actually a natural... \n",
|
357 |
+
"... ... ... \n",
|
358 |
+
"8408 Biology a) Bandaging open sores is not the correct ans... \n",
|
359 |
+
"8409 Biology b) Fungi are living things that can make their... \n",
|
360 |
+
"8410 Biology a) Metabolic reaction: This option is incorrec... \n",
|
361 |
+
"8411 Biology a) Electrons are involved in cellular respirat... \n",
|
362 |
+
"8412 Biology a) H2O: Water is essential for life, but it do... \n",
|
363 |
+
"\n",
|
364 |
+
" question_text \\\n",
|
365 |
+
"0 What is satellite technology used for predicting? \n",
|
366 |
+
"1 What does irradiating food do? \n",
|
367 |
+
"2 What protects a mammal's skin? \n",
|
368 |
+
"3 What do earthworms do when a segment breaks off? \n",
|
369 |
+
"4 Lightning can be bad for what? \n",
|
370 |
+
"... ... \n",
|
371 |
+
"8408 Organisms that can cause infection do what? \n",
|
372 |
+
"8409 Fungi are living things that cannot make their... \n",
|
373 |
+
"8410 An overheated body can use water for:? \n",
|
374 |
+
"8411 What is essential for cellular respiration for... \n",
|
375 |
+
"8412 What helps insulate and protect the body? \n",
|
376 |
+
"\n",
|
377 |
+
" answer_choices \\\n",
|
378 |
+
"0 (a) Seconds and minutes (b) The strength and m... \n",
|
379 |
+
"1 (a) Relieve pain (b) Enhance food's nutrients ... \n",
|
380 |
+
"2 (a) Fiber follicles (b) Exfoliation (c) Resist... \n",
|
381 |
+
"3 (a) Dies (b) Regrows it (c) Reproduces (d) Sed... \n",
|
382 |
+
"4 (a) The environment (b) Rainstorms (c) Destruc... \n",
|
383 |
+
"... ... \n",
|
384 |
+
"8408 (a) Bandage open sores (b) Keep flesh clean (c... \n",
|
385 |
+
"8409 (a) Food (b) Cells (c) Energy (d) Fruits (e) H... \n",
|
386 |
+
"8410 (a) Metabolic reaction (b) Dehydrating (c) Rai... \n",
|
387 |
+
"8411 (a) Electron (b) Glucose (c) Energy (d) Energy... \n",
|
388 |
+
"8412 (a) H2o (b) Living cells in follicles (c) Laye... \n",
|
389 |
+
"\n",
|
390 |
+
" mistral_reasoning \n",
|
391 |
+
"0 Incorrect answers and explanations:\\n\\n1. Elec... \n",
|
392 |
+
"1 Sure, let's examine each answer and justify wh... \n",
|
393 |
+
"2 Sure, let's go through each of the provided an... \n",
|
394 |
+
"3 1. Reading the question carefully, we can see ... \n",
|
395 |
+
"4 1. Food: While essential for the growth and he... \n",
|
396 |
+
"... ... \n",
|
397 |
+
"8408 1. Read the question and options carefully: Th... \n",
|
398 |
+
"8409 1. Read the question and options carefully: Th... \n",
|
399 |
+
"8410 1. Read the question and options carefully: Th... \n",
|
400 |
+
"8411 1. First, let's read the question and options ... \n",
|
401 |
+
"8412 1. Read the question and options carefully: Th... \n",
|
402 |
+
"\n",
|
403 |
+
"[8413 rows x 8 columns]"
|
404 |
+
]
|
405 |
+
},
|
406 |
+
"execution_count": 6,
|
407 |
+
"metadata": {},
|
408 |
+
"output_type": "execute_result"
|
409 |
+
}
|
410 |
+
],
|
411 |
+
"source": [
|
412 |
+
"# Load dataset from Hugging Face Hub\n",
|
413 |
+
"dataset = load_dataset(INPUT_DATASET, split='train')\n",
|
414 |
+
"\n",
|
415 |
+
"# Convert to pandas dataframe\n",
|
416 |
+
"df = dataset.to_pandas()\n",
|
417 |
+
"print(f\"Before Cleaning: {len(df)} rows\")\n",
|
418 |
+
"\n",
|
419 |
+
"# Drop the __index_level_0__ column if it exists\n",
|
420 |
+
"df.drop(columns=['mistral_reasoning_prompt'], errors='ignore', inplace=True)\n",
|
421 |
+
"\n",
|
422 |
+
"# Ensure all values in 'formatted_question' are strings\n",
|
423 |
+
"df.rename(columns={\n",
|
424 |
+
" 'explanation': 'gpt3_5_reasoning',\n",
|
425 |
+
"}, inplace=True)\n",
|
426 |
+
"df"
|
427 |
+
]
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"cell_type": "markdown",
|
431 |
+
"id": "2511bc04-f611-4dc7-b3ed-e477907b0200",
|
432 |
+
"metadata": {},
|
433 |
+
"source": [
|
434 |
+
"## Create Prompts from Processed Data"
|
435 |
+
]
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"cell_type": "markdown",
|
439 |
+
"id": "d124c7cf-a369-46a9-94db-069894145959",
|
440 |
+
"metadata": {},
|
441 |
+
"source": [
|
442 |
+
"We need to convert our sample into a format similar to below for each of the scenarios.\n",
|
443 |
+
"\n",
|
444 |
+
"```\n",
|
445 |
+
"[\n",
|
446 |
+
" {\"content\": user_content, \"role\": \"user\"},\n",
|
447 |
+
" {\"content\": assistant_response, \"role\": \"assistant\"}\n",
|
448 |
+
"]\n",
|
449 |
+
"```\n",
|
450 |
+
"\n",
|
451 |
+
"We should include a helpful system_prompt with a general trivia prefix, and a suffix that contains instructions that fit each scenario.\n",
|
452 |
+
"The `user_content` will have the Question and answer choices.\n",
|
453 |
+
"The `assistant_response` should reflect the scenario. "
|
454 |
+
]
|
455 |
+
},
|
456 |
+
{
|
457 |
+
"cell_type": "code",
|
458 |
+
"execution_count": 7,
|
459 |
+
"id": "1c6554a6-4717-4bf0-ae51-102630d40fd7",
|
460 |
+
"metadata": {
|
461 |
+
"tags": []
|
462 |
+
},
|
463 |
+
"outputs": [],
|
464 |
+
"source": [
|
465 |
+
"df['user_prompt'] = df.apply(lambda row: f\"Question: {row['question_text']}\\nAnswer Choices: {row['answer_choices']}\", axis=1)"
|
466 |
+
]
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"cell_type": "markdown",
|
470 |
+
"id": "1391b7e1-2462-41e3-b2f0-5a8f6c95859b",
|
471 |
+
"metadata": {},
|
472 |
+
"source": [
|
473 |
+
"Here we need to create the structure of our conversation. Each system prompt should reflect the instructions we want, so we can start with a prefix and add in the specifics for each scenario."
|
474 |
+
]
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"cell_type": "markdown",
|
478 |
+
"id": "2c817304-9ecc-43d9-83b9-5bb8dd662797",
|
479 |
+
"metadata": {},
|
480 |
+
"source": [
|
481 |
+
"### Reasoning Final Answer Structured Generation"
|
482 |
+
]
|
483 |
+
},
|
484 |
+
{
|
485 |
+
"cell_type": "code",
|
486 |
+
"execution_count": 8,
|
487 |
+
"id": "878727dc-4801-4376-be26-1cc601cb5f92",
|
488 |
+
"metadata": {},
|
489 |
+
"outputs": [
|
490 |
+
{
|
491 |
+
"name": "stdout",
|
492 |
+
"output_type": "stream",
|
493 |
+
"text": [
|
494 |
+
"<s>[INST] Answer the Question and include your Reasoning and the Final Answer in a json like: {\"Reasoning: \"...\", \"Final Answer\": \"x\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\n",
|
495 |
+
"Question: What is satellite technology used for predicting?\n",
|
496 |
+
"Answer Choices: (a) Seconds and minutes (b) The strength and magnitude of an earthquake (c) What it's like outside each day (d) 70-75 degrees fahrenheit (e) Rapid changes occur (f) Dead-ends and false starts. (g) Snow, ice, and rock (h) Around 5 to 27 degrees celsius[/INST] {'Reasoning': \"a) Seconds and minutes: This option is incorrect because satellite technology is not used for predicting time intervals. Satellite technology is used for various purposes such as communication, navigation, and weather forecasting, but it is not used for predicting time intervals.\\n\\nb) The strength and magnitude of an earthquake: This option is incorrect because satellite technology is not used for predicting earthquakes. Earthquake prediction is a complex process that involves seismology and other scientific methods, but satellite technology is not one of them.\\n\\nd) 70-75 degrees Fahrenheit: This option is incorrect because satellite technology is not used for predicting specific temperature ranges. While satellite technology can provide temperature data, it is not used for predicting specific temperature ranges.\\n\\ne) Rapid changes occur: This option is too vague and does not provide enough information to determine whether it is correct or not. Satellite technology can be used to monitor changes in various environmental factors, but it is not used specifically for predicting rapid changes.\\n\\nf) Dead-ends and false starts: This option is incorrect because it is not related to satellite technology or any type of prediction.\\n\\ng) Snow, ice, and rock: This option is incorrect because it is too specific and does not cover the broad range of predictions that satellite technology can be used for. While satellite technology can be used to monitor snow, ice, and rock formations, it is not used exclusively for this purpose.\\n\\nh) Around 5 to 27 degrees Celsius: This option is incorrect because it is too specific and does not cover the broad range of temperature predictions that satellite technology can be used for. While satellite technology can provide temperature data, it is not used exclusively for predicting temperatures within a specific range.\\n\\nTherefore, the correct answer is c) what it's like outside each day, as satellite technology is commonly used for weather forecasting and predicting daily weather conditions.\", 'Final Answer': 'c'}</s>\n",
|
497 |
+
"\n",
|
498 |
+
"\n",
|
499 |
+
"\n",
|
500 |
+
"<s>[INST] Answer the Question and include your Reasoning and the Final Answer in a json like: {\"Reasoning: \"...\", \"Final Answer\": \"x\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\n",
|
501 |
+
"Question: What is satellite technology used for predicting?\n",
|
502 |
+
"Answer Choices: (a) Seconds and minutes (b) The strength and magnitude of an earthquake (c) What it's like outside each day (d) 70-75 degrees fahrenheit (e) Rapid changes occur (f) Dead-ends and false starts. (g) Snow, ice, and rock (h) Around 5 to 27 degrees celsius[/INST] {'Reasoning': 'Incorrect answers and explanations:\\n\\n1. Electrical energy: Cells are not visible with electrical energy. Cells are physical structures and need to be observed visually, not through electrical means.\\n\\n2. By indirect light or Bright lights: Cells are so small that they are not visible to the naked eye under normal lighting conditions. Indirect light or bright lights are not a viable method to observe cells.\\n\\n3. Camera lenses: Camera lenses are used to focus and capture images, not to observe live objects. In order to observe cells, they must be viewed through a microscope which has a lens.\\n\\n4. Colors: Because cells are transparent, they do not have any specific color. They can appear as white or translucent when observed under a microscope.\\n\\n5. Telescope: A telescope is used for observing distant stars, galaxies, and other celestial objects, not for observing cells which are much smaller and earthbound.\\n\\n6. Rays or beams: Cells cannot be observed using rays or beams. Observation of cells is usually done via light microscopy.\\n\\n7. None of the above do not provide the means to observe cells, as they are not the tools or methods designed for cellular observation.\\n\\nThe correct answer (d) A microscope is the tool that allows us to see cells due to its magnifying capabilities making the cells visible to the naked eye effectively.', 'Final Answer': 'c'}</s>\n"
|
503 |
+
]
|
504 |
+
}
|
505 |
+
],
|
506 |
+
"source": [
|
507 |
+
"# Define system prompt\n",
|
508 |
+
"system_prompt_RFA = 'Answer the Question and include your Reasoning and the Final Answer in a json like: {\"Reasoning: \"...\", \"Final Answer\": \"x\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.'\n",
|
509 |
+
"\n",
|
510 |
+
"df['assistant_prompt_RFA_gpt3_5'] = df.apply(lambda row: {\"Reasoning\": row[\"gpt3_5_reasoning\"].strip(), \"Final Answer\": row[\"answer_key\"]}, axis=1)\n",
|
511 |
+
"df['assistant_prompt_RFA_mistral'] = df.apply(lambda row: {\"Reasoning\": row[\"mistral_reasoning\"].strip(), \"Final Answer\": row[\"answer_key\"]}, axis=1)\n",
|
512 |
+
"\n",
|
513 |
+
"# Step 1: Create user prompt for both gpt3_5 and mistral\n",
|
514 |
+
"df['user_prompt_RFA'] = df.apply(lambda row: {\n",
|
515 |
+
" \"content\": system_prompt_RFA + '\\n' + row['user_prompt'],\n",
|
516 |
+
" \"role\": \"user\"\n",
|
517 |
+
"}, axis=1)\n",
|
518 |
+
"\n",
|
519 |
+
"# Step 2: Create conversation_RFA column using user_prompt_RFA\n",
|
520 |
+
"df['conversation_RFA_gpt3_5'] = df.apply(lambda row: tokenizer.apply_chat_template([\n",
|
521 |
+
" row['user_prompt_RFA'], # Use the precomputed user prompt\n",
|
522 |
+
" {\"content\": row['assistant_prompt_RFA_gpt3_5'], \"role\": \"assistant\"}\n",
|
523 |
+
"], tokenize=False), axis=1)\n",
|
524 |
+
"\n",
|
525 |
+
"df['conversation_RFA_mistral'] = df.apply(lambda row: tokenizer.apply_chat_template([\n",
|
526 |
+
" row['user_prompt_RFA'], # Use the precomputed user prompt\n",
|
527 |
+
" {\"content\": row['assistant_prompt_RFA_mistral'], \"role\": \"assistant\"}\n",
|
528 |
+
"], tokenize=False), axis=1)\n",
|
529 |
+
"\n",
|
530 |
+
"df['user_prompt_RFA'] = df['user_prompt_RFA'].apply(lambda row: tokenizer.apply_chat_template([row], tokenize=False))\n",
|
531 |
+
"\n",
|
532 |
+
"df.drop(['assistant_prompt_RFA_gpt3_5', 'assistant_prompt_RFA_mistral'], inplace=True, axis=1)\n",
|
533 |
+
"\n",
|
534 |
+
"# Example output\n",
|
535 |
+
"gpt3_5_example = df['conversation_RFA_gpt3_5'].iloc[0]\n",
|
536 |
+
"mistral_example = df['conversation_RFA_mistral'].iloc[0]\n",
|
537 |
+
"\n",
|
538 |
+
"print(gpt3_5_example)\n",
|
539 |
+
"print('\\n\\n')\n",
|
540 |
+
"print(mistral_example)"
|
541 |
+
]
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"cell_type": "markdown",
|
545 |
+
"id": "a1150ae9-cc22-44c6-8a36-0e3d7c16111b",
|
546 |
+
"metadata": {},
|
547 |
+
"source": [
|
548 |
+
"### Final Answer Reasoning Structured Generation"
|
549 |
+
]
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"cell_type": "code",
|
553 |
+
"execution_count": 9,
|
554 |
+
"id": "c0234242-7389-4c28-ae85-78a82498d0ac",
|
555 |
+
"metadata": {
|
556 |
+
"tags": []
|
557 |
+
},
|
558 |
+
"outputs": [
|
559 |
+
{
|
560 |
+
"name": "stdout",
|
561 |
+
"output_type": "stream",
|
562 |
+
"text": [
|
563 |
+
"<s>[INST] Answer the Question and include your Final Answer and the Reasoning in a json like: {\"Final Answer\": \"x\", \"Reasoning: \"...\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\n",
|
564 |
+
"Question: What is satellite technology used for predicting?\n",
|
565 |
+
"Answer Choices: (a) Seconds and minutes (b) The strength and magnitude of an earthquake (c) What it's like outside each day (d) 70-75 degrees fahrenheit (e) Rapid changes occur (f) Dead-ends and false starts. (g) Snow, ice, and rock (h) Around 5 to 27 degrees celsius[/INST] {'Final Answer': 'c', 'Reasoning': \"a) Seconds and minutes: This option is incorrect because satellite technology is not used for predicting time intervals. Satellite technology is used for various purposes such as communication, navigation, and weather forecasting, but it is not used for predicting time intervals.\\n\\nb) The strength and magnitude of an earthquake: This option is incorrect because satellite technology is not used for predicting earthquakes. Earthquake prediction is a complex process that involves seismology and other scientific methods, but satellite technology is not one of them.\\n\\nd) 70-75 degrees Fahrenheit: This option is incorrect because satellite technology is not used for predicting specific temperature ranges. While satellite technology can provide temperature data, it is not used for predicting specific temperature ranges.\\n\\ne) Rapid changes occur: This option is too vague and does not provide enough information to determine whether it is correct or not. Satellite technology can be used to monitor changes in various environmental factors, but it is not used specifically for predicting rapid changes.\\n\\nf) Dead-ends and false starts: This option is incorrect because it is not related to satellite technology or any type of prediction.\\n\\ng) Snow, ice, and rock: This option is incorrect because it is too specific and does not cover the broad range of predictions that satellite technology can be used for. While satellite technology can be used to monitor snow, ice, and rock formations, it is not used exclusively for this purpose.\\n\\nh) Around 5 to 27 degrees Celsius: This option is incorrect because it is too specific and does not cover the broad range of temperature predictions that satellite technology can be used for. While satellite technology can provide temperature data, it is not used exclusively for predicting temperatures within a specific range.\\n\\nTherefore, the correct answer is c) what it's like outside each day, as satellite technology is commonly used for weather forecasting and predicting daily weather conditions.\"}</s>\n",
|
566 |
+
"\n",
|
567 |
+
"\n",
|
568 |
+
"\n",
|
569 |
+
"<s>[INST] Answer the Question and include your Final Answer and the Reasoning in a json like: {\"Final Answer\": \"x\", \"Reasoning: \"...\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\n",
|
570 |
+
"Question: What is satellite technology used for predicting?\n",
|
571 |
+
"Answer Choices: (a) Seconds and minutes (b) The strength and magnitude of an earthquake (c) What it's like outside each day (d) 70-75 degrees fahrenheit (e) Rapid changes occur (f) Dead-ends and false starts. (g) Snow, ice, and rock (h) Around 5 to 27 degrees celsius[/INST] {'Final Answer': 'c', 'Reasoning': 'Incorrect answers and explanations:\\n\\n1. Electrical energy: Cells are not visible with electrical energy. Cells are physical structures and need to be observed visually, not through electrical means.\\n\\n2. By indirect light or Bright lights: Cells are so small that they are not visible to the naked eye under normal lighting conditions. Indirect light or bright lights are not a viable method to observe cells.\\n\\n3. Camera lenses: Camera lenses are used to focus and capture images, not to observe live objects. In order to observe cells, they must be viewed through a microscope which has a lens.\\n\\n4. Colors: Because cells are transparent, they do not have any specific color. They can appear as white or translucent when observed under a microscope.\\n\\n5. Telescope: A telescope is used for observing distant stars, galaxies, and other celestial objects, not for observing cells which are much smaller and earthbound.\\n\\n6. Rays or beams: Cells cannot be observed using rays or beams. Observation of cells is usually done via light microscopy.\\n\\n7. None of the above do not provide the means to observe cells, as they are not the tools or methods designed for cellular observation.\\n\\nThe correct answer (d) A microscope is the tool that allows us to see cells due to its magnifying capabilities making the cells visible to the naked eye effectively.'}</s>\n"
|
572 |
+
]
|
573 |
+
}
|
574 |
+
],
|
575 |
+
"source": [
|
576 |
+
"system_prompt_FAR = 'Answer the Question and include your Final Answer and the Reasoning in a json like: {\"Final Answer\": \"x\", \"Reasoning: \"...\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.'\n",
|
577 |
+
"\n",
|
578 |
+
"df['assistant_prompt_FAR_gpt3_5'] = df.apply(lambda row: {\"Final Answer\": row[\"answer_key\"], \"Reasoning\": row[\"gpt3_5_reasoning\"].strip()}, axis=1)\n",
|
579 |
+
"df['assistant_prompt_FAR_mistral'] = df.apply(lambda row: {\"Final Answer\": row[\"answer_key\"], \"Reasoning\": row[\"mistral_reasoning\"].strip()}, axis=1)\n",
|
580 |
+
"\n",
|
581 |
+
"# Step 1: Create user_prompt_FAR column\n",
|
582 |
+
"df['user_prompt_FAR'] = df.apply(lambda row: {\n",
|
583 |
+
" \"content\": system_prompt_FAR + '\\n' + row['user_prompt'],\n",
|
584 |
+
" \"role\": \"user\"\n",
|
585 |
+
"}, axis=1)\n",
|
586 |
+
"\n",
|
587 |
+
"# Step 2: Create conversation_FAR column using user_prompt_FAR\n",
|
588 |
+
"df['conversation_FAR_gpt3_5'] = df.apply(lambda row: tokenizer.apply_chat_template([\n",
|
589 |
+
" row['user_prompt_FAR'], # Use the precomputed user prompt\n",
|
590 |
+
" {\"content\": row['assistant_prompt_FAR_gpt3_5'], \"role\": \"assistant\"}\n",
|
591 |
+
"], tokenize=False), axis=1)\n",
|
592 |
+
"\n",
|
593 |
+
"df['conversation_FAR_mistral'] = df.apply(lambda row: tokenizer.apply_chat_template([\n",
|
594 |
+
" row['user_prompt_FAR'], # Use the precomputed user prompt\n",
|
595 |
+
" {\"content\": row['assistant_prompt_FAR_mistral'], \"role\": \"assistant\"}\n",
|
596 |
+
"], tokenize=False), axis=1)\n",
|
597 |
+
"\n",
|
598 |
+
"df['user_prompt_FAR'] = df['user_prompt_FAR'].apply(lambda row: tokenizer.apply_chat_template([row], tokenize=False))\n",
|
599 |
+
"\n",
|
600 |
+
"df.drop(['assistant_prompt_FAR_gpt3_5', 'assistant_prompt_FAR_mistral'], inplace=True, axis=1)\n",
|
601 |
+
"\n",
|
602 |
+
"# Example output\n",
|
603 |
+
"gpt3_5_example = df['conversation_FAR_gpt3_5'].iloc[0]\n",
|
604 |
+
"mistral_example = df['conversation_FAR_mistral'].iloc[0]\n",
|
605 |
+
"\n",
|
606 |
+
"print(gpt3_5_example)\n",
|
607 |
+
"print('\\n\\n')\n",
|
608 |
+
"print(mistral_example)"
|
609 |
+
]
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"cell_type": "markdown",
|
613 |
+
"id": "2c9f0619-1467-4b4e-ad69-b5ca2c4e58a7",
|
614 |
+
"metadata": {},
|
615 |
+
"source": [
|
616 |
+
"### Final Answer Structured Generation"
|
617 |
+
]
|
618 |
+
},
|
619 |
+
{
|
620 |
+
"cell_type": "code",
|
621 |
+
"execution_count": 10,
|
622 |
+
"id": "64dd3601-a40e-478d-97a5-5728005e5787",
|
623 |
+
"metadata": {
|
624 |
+
"tags": []
|
625 |
+
},
|
626 |
+
"outputs": [
|
627 |
+
{
|
628 |
+
"name": "stdout",
|
629 |
+
"output_type": "stream",
|
630 |
+
"text": [
|
631 |
+
"<s>[INST] Answer the Question and include your Final Answer in a json like: {\"Final Answer\": \"x\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\n",
|
632 |
+
"Question: What is satellite technology used for predicting?\n",
|
633 |
+
"Answer Choices: (a) Seconds and minutes (b) The strength and magnitude of an earthquake (c) What it's like outside each day (d) 70-75 degrees fahrenheit (e) Rapid changes occur (f) Dead-ends and false starts. (g) Snow, ice, and rock (h) Around 5 to 27 degrees celsius[/INST] {'Final Answer': 'c'}</s>\n"
|
634 |
+
]
|
635 |
+
}
|
636 |
+
],
|
637 |
+
"source": [
|
638 |
+
"system_prompt_FA = 'Answer the Question and include your Final Answer in a json like: {\"Final Answer\": \"x\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.'\n",
|
639 |
+
"df['assistant_prompt_FA'] = df.apply(lambda row: {\"Final Answer\": row[\"answer_key\"]}, axis=1)\n",
|
640 |
+
"\n",
|
641 |
+
"# Step 1: Create user_prompt_FA column\n",
|
642 |
+
"df['user_prompt_FA'] = df.apply(lambda row: {\n",
|
643 |
+
" \"content\": system_prompt_FA + '\\n' + row['user_prompt'],\n",
|
644 |
+
" \"role\": \"user\"\n",
|
645 |
+
"}, axis=1)\n",
|
646 |
+
"\n",
|
647 |
+
"# Step 2: Create conversation_FA_R column using user_prompt_FA\n",
|
648 |
+
"df['conversation_FA'] = df.apply(lambda row: tokenizer.apply_chat_template([\n",
|
649 |
+
" row['user_prompt_FA'], # Use the precomputed user prompt\n",
|
650 |
+
" {\"content\": row['assistant_prompt_FA'], \"role\": \"assistant\"}\n",
|
651 |
+
" # {\"content\": json.dumps(row['assistant_prompt_FA']), \"role\": \"assistant\"}\n",
|
652 |
+
"], tokenize=False), axis=1)\n",
|
653 |
+
"\n",
|
654 |
+
"\n",
|
655 |
+
"df['user_prompt_FA'] = df['user_prompt_FA'].apply(lambda row: tokenizer.apply_chat_template([row], tokenize=False))\n",
|
656 |
+
"\n",
|
657 |
+
"df.drop(['assistant_prompt_FA'], inplace=True, axis=1)\n",
|
658 |
+
"\n",
|
659 |
+
"\n",
|
660 |
+
"# Example output\n",
|
661 |
+
"example = df['conversation_FA'].iloc[0]\n",
|
662 |
+
"\n",
|
663 |
+
"print(example)"
|
664 |
+
]
|
665 |
+
},
|
666 |
+
{
|
667 |
+
"cell_type": "markdown",
|
668 |
+
"id": "ad51b99b-cf67-43d8-833c-5ec57d9d4613",
|
669 |
+
"metadata": {},
|
670 |
+
"source": [
|
671 |
+
"### Cleanup"
|
672 |
+
]
|
673 |
+
},
|
674 |
+
{
|
675 |
+
"cell_type": "code",
|
676 |
+
"execution_count": 11,
|
677 |
+
"id": "69a687d5-35ab-4abb-8bb8-f975fa7be3f7",
|
678 |
+
"metadata": {
|
679 |
+
"tags": []
|
680 |
+
},
|
681 |
+
"outputs": [
|
682 |
+
{
|
683 |
+
"data": {
|
684 |
+
"text/plain": [
|
685 |
+
"Index(['formatted_question', 'combined_fact', 'answer_key', 'topic',\n",
|
686 |
+
" 'gpt3_5_reasoning', 'question_text', 'answer_choices',\n",
|
687 |
+
" 'mistral_reasoning', 'user_prompt', 'user_prompt_RFA',\n",
|
688 |
+
" 'conversation_RFA_gpt3_5', 'conversation_RFA_mistral',\n",
|
689 |
+
" 'user_prompt_FAR', 'conversation_FAR_gpt3_5',\n",
|
690 |
+
" 'conversation_FAR_mistral', 'user_prompt_FA', 'conversation_FA'],\n",
|
691 |
+
" dtype='object')"
|
692 |
+
]
|
693 |
+
},
|
694 |
+
"execution_count": 11,
|
695 |
+
"metadata": {},
|
696 |
+
"output_type": "execute_result"
|
697 |
+
}
|
698 |
+
],
|
699 |
+
"source": [
|
700 |
+
"df.columns"
|
701 |
+
]
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"cell_type": "code",
|
705 |
+
"execution_count": 12,
|
706 |
+
"id": "6ec3b6b5-a359-4da8-98c8-4dae75b8a2d4",
|
707 |
+
"metadata": {
|
708 |
+
"tags": []
|
709 |
+
},
|
710 |
+
"outputs": [],
|
711 |
+
"source": [
|
712 |
+
"df = df[['topic', 'question_text', 'answer_key', 'gpt3_5_reasoning', 'mistral_reasoning', 'answer_choices', 'user_prompt', \n",
|
713 |
+
" 'user_prompt_RFA', 'conversation_RFA_gpt3_5', 'conversation_RFA_mistral',\n",
|
714 |
+
" 'user_prompt_FAR', 'conversation_FAR_gpt3_5', 'conversation_FAR_mistral',\n",
|
715 |
+
" 'user_prompt_FA', 'conversation_FA']]"
|
716 |
+
]
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"cell_type": "code",
|
720 |
+
"execution_count": 13,
|
721 |
+
"id": "f9f3a5a8-e4ea-4ed9-9fc0-fdd1a19b1c92",
|
722 |
+
"metadata": {
|
723 |
+
"tags": []
|
724 |
+
},
|
725 |
+
"outputs": [
|
726 |
+
{
|
727 |
+
"data": {
|
728 |
+
"text/html": [
|
729 |
+
"<div>\n",
|
730 |
+
"<style scoped>\n",
|
731 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
732 |
+
" vertical-align: middle;\n",
|
733 |
+
" }\n",
|
734 |
+
"\n",
|
735 |
+
" .dataframe tbody tr th {\n",
|
736 |
+
" vertical-align: top;\n",
|
737 |
+
" }\n",
|
738 |
+
"\n",
|
739 |
+
" .dataframe thead th {\n",
|
740 |
+
" text-align: right;\n",
|
741 |
+
" }\n",
|
742 |
+
"</style>\n",
|
743 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
744 |
+
" <thead>\n",
|
745 |
+
" <tr style=\"text-align: right;\">\n",
|
746 |
+
" <th></th>\n",
|
747 |
+
" <th>topic</th>\n",
|
748 |
+
" <th>question_text</th>\n",
|
749 |
+
" <th>answer_key</th>\n",
|
750 |
+
" <th>gpt3_5_reasoning</th>\n",
|
751 |
+
" <th>mistral_reasoning</th>\n",
|
752 |
+
" <th>answer_choices</th>\n",
|
753 |
+
" <th>user_prompt</th>\n",
|
754 |
+
" <th>user_prompt_RFA</th>\n",
|
755 |
+
" <th>conversation_RFA_gpt3_5</th>\n",
|
756 |
+
" <th>conversation_RFA_mistral</th>\n",
|
757 |
+
" <th>user_prompt_FAR</th>\n",
|
758 |
+
" <th>conversation_FAR_gpt3_5</th>\n",
|
759 |
+
" <th>conversation_FAR_mistral</th>\n",
|
760 |
+
" <th>user_prompt_FA</th>\n",
|
761 |
+
" <th>conversation_FA</th>\n",
|
762 |
+
" </tr>\n",
|
763 |
+
" </thead>\n",
|
764 |
+
" <tbody>\n",
|
765 |
+
" <tr>\n",
|
766 |
+
" <th>0</th>\n",
|
767 |
+
" <td>Technology</td>\n",
|
768 |
+
" <td>What is satellite technology used for predicting?</td>\n",
|
769 |
+
" <td>c</td>\n",
|
770 |
+
" <td>a) Seconds and minutes: This option is incorre...</td>\n",
|
771 |
+
" <td>Incorrect answers and explanations:\\n\\n1. Elec...</td>\n",
|
772 |
+
" <td>(a) Seconds and minutes (b) The strength and m...</td>\n",
|
773 |
+
" <td>Question: What is satellite technology used fo...</td>\n",
|
774 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
775 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
776 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
777 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
778 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
779 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
780 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
781 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
782 |
+
" </tr>\n",
|
783 |
+
" <tr>\n",
|
784 |
+
" <th>1</th>\n",
|
785 |
+
" <td>Food science</td>\n",
|
786 |
+
" <td>What does irradiating food do?</td>\n",
|
787 |
+
" <td>c</td>\n",
|
788 |
+
" <td>(a) Relieve pain: This option is not correct b...</td>\n",
|
789 |
+
" <td>Sure, let's examine each answer and justify wh...</td>\n",
|
790 |
+
" <td>(a) Relieve pain (b) Enhance food's nutrients ...</td>\n",
|
791 |
+
" <td>Question: What does irradiating food do?\\nAnsw...</td>\n",
|
792 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
793 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
794 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
795 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
796 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
797 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
798 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
799 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
800 |
+
" </tr>\n",
|
801 |
+
" <tr>\n",
|
802 |
+
" <th>2</th>\n",
|
803 |
+
" <td>Biology</td>\n",
|
804 |
+
" <td>What protects a mammal's skin?</td>\n",
|
805 |
+
" <td>a</td>\n",
|
806 |
+
" <td>b) Exfoliation: Exfoliation is the process of ...</td>\n",
|
807 |
+
" <td>Sure, let's go through each of the provided an...</td>\n",
|
808 |
+
" <td>(a) Fiber follicles (b) Exfoliation (c) Resist...</td>\n",
|
809 |
+
" <td>Question: What protects a mammal's skin?\\nAnsw...</td>\n",
|
810 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
811 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
812 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
813 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
814 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
815 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
816 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
817 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
818 |
+
" </tr>\n",
|
819 |
+
" <tr>\n",
|
820 |
+
" <th>3</th>\n",
|
821 |
+
" <td>Biology</td>\n",
|
822 |
+
" <td>What do earthworms do when a segment breaks off?</td>\n",
|
823 |
+
" <td>b</td>\n",
|
824 |
+
" <td>a) Dies: This option is not correct because ea...</td>\n",
|
825 |
+
" <td>1. Reading the question carefully, we can see ...</td>\n",
|
826 |
+
" <td>(a) Dies (b) Regrows it (c) Reproduces (d) Sed...</td>\n",
|
827 |
+
" <td>Question: What do earthworms do when a segment...</td>\n",
|
828 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
829 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
830 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
831 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
832 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
833 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
834 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
835 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
836 |
+
" </tr>\n",
|
837 |
+
" <tr>\n",
|
838 |
+
" <th>4</th>\n",
|
839 |
+
" <td>Electricity</td>\n",
|
840 |
+
" <td>Lightning can be bad for what?</td>\n",
|
841 |
+
" <td>a</td>\n",
|
842 |
+
" <td>b) Rainstorms: Lightning is actually a natural...</td>\n",
|
843 |
+
" <td>1. Food: While essential for the growth and he...</td>\n",
|
844 |
+
" <td>(a) The environment (b) Rainstorms (c) Destruc...</td>\n",
|
845 |
+
" <td>Question: Lightning can be bad for what?\\nAnsw...</td>\n",
|
846 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
847 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
848 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
849 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
850 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
851 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
852 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
853 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
854 |
+
" </tr>\n",
|
855 |
+
" <tr>\n",
|
856 |
+
" <th>...</th>\n",
|
857 |
+
" <td>...</td>\n",
|
858 |
+
" <td>...</td>\n",
|
859 |
+
" <td>...</td>\n",
|
860 |
+
" <td>...</td>\n",
|
861 |
+
" <td>...</td>\n",
|
862 |
+
" <td>...</td>\n",
|
863 |
+
" <td>...</td>\n",
|
864 |
+
" <td>...</td>\n",
|
865 |
+
" <td>...</td>\n",
|
866 |
+
" <td>...</td>\n",
|
867 |
+
" <td>...</td>\n",
|
868 |
+
" <td>...</td>\n",
|
869 |
+
" <td>...</td>\n",
|
870 |
+
" <td>...</td>\n",
|
871 |
+
" <td>...</td>\n",
|
872 |
+
" </tr>\n",
|
873 |
+
" <tr>\n",
|
874 |
+
" <th>8408</th>\n",
|
875 |
+
" <td>Biology</td>\n",
|
876 |
+
" <td>Organisms that can cause infection do what?</td>\n",
|
877 |
+
" <td>g</td>\n",
|
878 |
+
" <td>a) Bandaging open sores is not the correct ans...</td>\n",
|
879 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
880 |
+
" <td>(a) Bandage open sores (b) Keep flesh clean (c...</td>\n",
|
881 |
+
" <td>Question: Organisms that can cause infection d...</td>\n",
|
882 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
883 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
884 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
885 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
886 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
887 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
888 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
889 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
890 |
+
" </tr>\n",
|
891 |
+
" <tr>\n",
|
892 |
+
" <th>8409</th>\n",
|
893 |
+
" <td>Biology</td>\n",
|
894 |
+
" <td>Fungi are living things that cannot make their...</td>\n",
|
895 |
+
" <td>a</td>\n",
|
896 |
+
" <td>b) Fungi are living things that can make their...</td>\n",
|
897 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
898 |
+
" <td>(a) Food (b) Cells (c) Energy (d) Fruits (e) H...</td>\n",
|
899 |
+
" <td>Question: Fungi are living things that cannot ...</td>\n",
|
900 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
901 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
902 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
903 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
904 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
905 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
906 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
907 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
908 |
+
" </tr>\n",
|
909 |
+
" <tr>\n",
|
910 |
+
" <th>8410</th>\n",
|
911 |
+
" <td>Biology</td>\n",
|
912 |
+
" <td>An overheated body can use water for:?</td>\n",
|
913 |
+
" <td>g</td>\n",
|
914 |
+
" <td>a) Metabolic reaction: This option is incorrec...</td>\n",
|
915 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
916 |
+
" <td>(a) Metabolic reaction (b) Dehydrating (c) Rai...</td>\n",
|
917 |
+
" <td>Question: An overheated body can use water for...</td>\n",
|
918 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
919 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
920 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
921 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
922 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
923 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
924 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
925 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
926 |
+
" </tr>\n",
|
927 |
+
" <tr>\n",
|
928 |
+
" <th>8411</th>\n",
|
929 |
+
" <td>Biology</td>\n",
|
930 |
+
" <td>What is essential for cellular respiration for...</td>\n",
|
931 |
+
" <td>f</td>\n",
|
932 |
+
" <td>a) Electrons are involved in cellular respirat...</td>\n",
|
933 |
+
" <td>1. First, let's read the question and options ...</td>\n",
|
934 |
+
" <td>(a) Electron (b) Glucose (c) Energy (d) Energy...</td>\n",
|
935 |
+
" <td>Question: What is essential for cellular respi...</td>\n",
|
936 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
937 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
938 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
939 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
940 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
941 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
942 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
943 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
944 |
+
" </tr>\n",
|
945 |
+
" <tr>\n",
|
946 |
+
" <th>8412</th>\n",
|
947 |
+
" <td>Biology</td>\n",
|
948 |
+
" <td>What helps insulate and protect the body?</td>\n",
|
949 |
+
" <td>b</td>\n",
|
950 |
+
" <td>a) H2O: Water is essential for life, but it do...</td>\n",
|
951 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
952 |
+
" <td>(a) H2o (b) Living cells in follicles (c) Laye...</td>\n",
|
953 |
+
" <td>Question: What helps insulate and protect the ...</td>\n",
|
954 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
955 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
956 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
957 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
958 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
959 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
960 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
961 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
962 |
+
" </tr>\n",
|
963 |
+
" </tbody>\n",
|
964 |
+
"</table>\n",
|
965 |
+
"<p>8413 rows × 15 columns</p>\n",
|
966 |
+
"</div>"
|
967 |
+
],
|
968 |
+
"text/plain": [
|
969 |
+
" topic question_text \\\n",
|
970 |
+
"0 Technology What is satellite technology used for predicting? \n",
|
971 |
+
"1 Food science What does irradiating food do? \n",
|
972 |
+
"2 Biology What protects a mammal's skin? \n",
|
973 |
+
"3 Biology What do earthworms do when a segment breaks off? \n",
|
974 |
+
"4 Electricity Lightning can be bad for what? \n",
|
975 |
+
"... ... ... \n",
|
976 |
+
"8408 Biology Organisms that can cause infection do what? \n",
|
977 |
+
"8409 Biology Fungi are living things that cannot make their... \n",
|
978 |
+
"8410 Biology An overheated body can use water for:? \n",
|
979 |
+
"8411 Biology What is essential for cellular respiration for... \n",
|
980 |
+
"8412 Biology What helps insulate and protect the body? \n",
|
981 |
+
"\n",
|
982 |
+
" answer_key gpt3_5_reasoning \\\n",
|
983 |
+
"0 c a) Seconds and minutes: This option is incorre... \n",
|
984 |
+
"1 c (a) Relieve pain: This option is not correct b... \n",
|
985 |
+
"2 a b) Exfoliation: Exfoliation is the process of ... \n",
|
986 |
+
"3 b a) Dies: This option is not correct because ea... \n",
|
987 |
+
"4 a b) Rainstorms: Lightning is actually a natural... \n",
|
988 |
+
"... ... ... \n",
|
989 |
+
"8408 g a) Bandaging open sores is not the correct ans... \n",
|
990 |
+
"8409 a b) Fungi are living things that can make their... \n",
|
991 |
+
"8410 g a) Metabolic reaction: This option is incorrec... \n",
|
992 |
+
"8411 f a) Electrons are involved in cellular respirat... \n",
|
993 |
+
"8412 b a) H2O: Water is essential for life, but it do... \n",
|
994 |
+
"\n",
|
995 |
+
" mistral_reasoning \\\n",
|
996 |
+
"0 Incorrect answers and explanations:\\n\\n1. Elec... \n",
|
997 |
+
"1 Sure, let's examine each answer and justify wh... \n",
|
998 |
+
"2 Sure, let's go through each of the provided an... \n",
|
999 |
+
"3 1. Reading the question carefully, we can see ... \n",
|
1000 |
+
"4 1. Food: While essential for the growth and he... \n",
|
1001 |
+
"... ... \n",
|
1002 |
+
"8408 1. Read the question and options carefully: Th... \n",
|
1003 |
+
"8409 1. Read the question and options carefully: Th... \n",
|
1004 |
+
"8410 1. Read the question and options carefully: Th... \n",
|
1005 |
+
"8411 1. First, let's read the question and options ... \n",
|
1006 |
+
"8412 1. Read the question and options carefully: Th... \n",
|
1007 |
+
"\n",
|
1008 |
+
" answer_choices \\\n",
|
1009 |
+
"0 (a) Seconds and minutes (b) The strength and m... \n",
|
1010 |
+
"1 (a) Relieve pain (b) Enhance food's nutrients ... \n",
|
1011 |
+
"2 (a) Fiber follicles (b) Exfoliation (c) Resist... \n",
|
1012 |
+
"3 (a) Dies (b) Regrows it (c) Reproduces (d) Sed... \n",
|
1013 |
+
"4 (a) The environment (b) Rainstorms (c) Destruc... \n",
|
1014 |
+
"... ... \n",
|
1015 |
+
"8408 (a) Bandage open sores (b) Keep flesh clean (c... \n",
|
1016 |
+
"8409 (a) Food (b) Cells (c) Energy (d) Fruits (e) H... \n",
|
1017 |
+
"8410 (a) Metabolic reaction (b) Dehydrating (c) Rai... \n",
|
1018 |
+
"8411 (a) Electron (b) Glucose (c) Energy (d) Energy... \n",
|
1019 |
+
"8412 (a) H2o (b) Living cells in follicles (c) Laye... \n",
|
1020 |
+
"\n",
|
1021 |
+
" user_prompt \\\n",
|
1022 |
+
"0 Question: What is satellite technology used fo... \n",
|
1023 |
+
"1 Question: What does irradiating food do?\\nAnsw... \n",
|
1024 |
+
"2 Question: What protects a mammal's skin?\\nAnsw... \n",
|
1025 |
+
"3 Question: What do earthworms do when a segment... \n",
|
1026 |
+
"4 Question: Lightning can be bad for what?\\nAnsw... \n",
|
1027 |
+
"... ... \n",
|
1028 |
+
"8408 Question: Organisms that can cause infection d... \n",
|
1029 |
+
"8409 Question: Fungi are living things that cannot ... \n",
|
1030 |
+
"8410 Question: An overheated body can use water for... \n",
|
1031 |
+
"8411 Question: What is essential for cellular respi... \n",
|
1032 |
+
"8412 Question: What helps insulate and protect the ... \n",
|
1033 |
+
"\n",
|
1034 |
+
" user_prompt_RFA \\\n",
|
1035 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1036 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1037 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1038 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1039 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1040 |
+
"... ... \n",
|
1041 |
+
"8408 <s>[INST] Answer the Question and include your... \n",
|
1042 |
+
"8409 <s>[INST] Answer the Question and include your... \n",
|
1043 |
+
"8410 <s>[INST] Answer the Question and include your... \n",
|
1044 |
+
"8411 <s>[INST] Answer the Question and include your... \n",
|
1045 |
+
"8412 <s>[INST] Answer the Question and include your... \n",
|
1046 |
+
"\n",
|
1047 |
+
" conversation_RFA_gpt3_5 \\\n",
|
1048 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1049 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1050 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1051 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1052 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1053 |
+
"... ... \n",
|
1054 |
+
"8408 <s>[INST] Answer the Question and include your... \n",
|
1055 |
+
"8409 <s>[INST] Answer the Question and include your... \n",
|
1056 |
+
"8410 <s>[INST] Answer the Question and include your... \n",
|
1057 |
+
"8411 <s>[INST] Answer the Question and include your... \n",
|
1058 |
+
"8412 <s>[INST] Answer the Question and include your... \n",
|
1059 |
+
"\n",
|
1060 |
+
" conversation_RFA_mistral \\\n",
|
1061 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1062 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1063 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1064 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1065 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1066 |
+
"... ... \n",
|
1067 |
+
"8408 <s>[INST] Answer the Question and include your... \n",
|
1068 |
+
"8409 <s>[INST] Answer the Question and include your... \n",
|
1069 |
+
"8410 <s>[INST] Answer the Question and include your... \n",
|
1070 |
+
"8411 <s>[INST] Answer the Question and include your... \n",
|
1071 |
+
"8412 <s>[INST] Answer the Question and include your... \n",
|
1072 |
+
"\n",
|
1073 |
+
" user_prompt_FAR \\\n",
|
1074 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1075 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1076 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1077 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1078 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1079 |
+
"... ... \n",
|
1080 |
+
"8408 <s>[INST] Answer the Question and include your... \n",
|
1081 |
+
"8409 <s>[INST] Answer the Question and include your... \n",
|
1082 |
+
"8410 <s>[INST] Answer the Question and include your... \n",
|
1083 |
+
"8411 <s>[INST] Answer the Question and include your... \n",
|
1084 |
+
"8412 <s>[INST] Answer the Question and include your... \n",
|
1085 |
+
"\n",
|
1086 |
+
" conversation_FAR_gpt3_5 \\\n",
|
1087 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1088 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1089 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1090 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1091 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1092 |
+
"... ... \n",
|
1093 |
+
"8408 <s>[INST] Answer the Question and include your... \n",
|
1094 |
+
"8409 <s>[INST] Answer the Question and include your... \n",
|
1095 |
+
"8410 <s>[INST] Answer the Question and include your... \n",
|
1096 |
+
"8411 <s>[INST] Answer the Question and include your... \n",
|
1097 |
+
"8412 <s>[INST] Answer the Question and include your... \n",
|
1098 |
+
"\n",
|
1099 |
+
" conversation_FAR_mistral \\\n",
|
1100 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1101 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1102 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1103 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1104 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1105 |
+
"... ... \n",
|
1106 |
+
"8408 <s>[INST] Answer the Question and include your... \n",
|
1107 |
+
"8409 <s>[INST] Answer the Question and include your... \n",
|
1108 |
+
"8410 <s>[INST] Answer the Question and include your... \n",
|
1109 |
+
"8411 <s>[INST] Answer the Question and include your... \n",
|
1110 |
+
"8412 <s>[INST] Answer the Question and include your... \n",
|
1111 |
+
"\n",
|
1112 |
+
" user_prompt_FA \\\n",
|
1113 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1114 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1115 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1116 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1117 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1118 |
+
"... ... \n",
|
1119 |
+
"8408 <s>[INST] Answer the Question and include your... \n",
|
1120 |
+
"8409 <s>[INST] Answer the Question and include your... \n",
|
1121 |
+
"8410 <s>[INST] Answer the Question and include your... \n",
|
1122 |
+
"8411 <s>[INST] Answer the Question and include your... \n",
|
1123 |
+
"8412 <s>[INST] Answer the Question and include your... \n",
|
1124 |
+
"\n",
|
1125 |
+
" conversation_FA \n",
|
1126 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1127 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1128 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1129 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1130 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1131 |
+
"... ... \n",
|
1132 |
+
"8408 <s>[INST] Answer the Question and include your... \n",
|
1133 |
+
"8409 <s>[INST] Answer the Question and include your... \n",
|
1134 |
+
"8410 <s>[INST] Answer the Question and include your... \n",
|
1135 |
+
"8411 <s>[INST] Answer the Question and include your... \n",
|
1136 |
+
"8412 <s>[INST] Answer the Question and include your... \n",
|
1137 |
+
"\n",
|
1138 |
+
"[8413 rows x 15 columns]"
|
1139 |
+
]
|
1140 |
+
},
|
1141 |
+
"execution_count": 13,
|
1142 |
+
"metadata": {},
|
1143 |
+
"output_type": "execute_result"
|
1144 |
+
}
|
1145 |
+
],
|
1146 |
+
"source": [
|
1147 |
+
"df"
|
1148 |
+
]
|
1149 |
+
},
|
1150 |
+
{
|
1151 |
+
"cell_type": "markdown",
|
1152 |
+
"id": "9fe52e77-0985-49d4-964e-7a17372d7007",
|
1153 |
+
"metadata": {},
|
1154 |
+
"source": [
|
1155 |
+
"## Explore Prompts\n",
|
1156 |
+
"Gradio can be really useful for quick inline apps. Here I want to make sure everything is as I expect.\n",
|
1157 |
+
"\n",
|
1158 |
+
"While the above print statements helped me see the format, the gradio app helps me explore a large volume of output easily. \n",
|
1159 |
+
"\n",
|
1160 |
+
"Note: Its tricky as I cant easily render newlines in strings. So be careful!"
|
1161 |
+
]
|
1162 |
+
},
|
1163 |
+
{
|
1164 |
+
"cell_type": "code",
|
1165 |
+
"execution_count": 14,
|
1166 |
+
"id": "a50d9d6c-18e6-476d-9a40-ed7a3f699477",
|
1167 |
+
"metadata": {
|
1168 |
+
"tags": []
|
1169 |
+
},
|
1170 |
+
"outputs": [
|
1171 |
+
{
|
1172 |
+
"name": "stdout",
|
1173 |
+
"output_type": "stream",
|
1174 |
+
"text": [
|
1175 |
+
"* Running on local URL: http://127.0.0.1:7861\n",
|
1176 |
+
"\n",
|
1177 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
1178 |
+
]
|
1179 |
+
},
|
1180 |
+
{
|
1181 |
+
"data": {
|
1182 |
+
"text/html": [
|
1183 |
+
"<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"840\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
1184 |
+
],
|
1185 |
+
"text/plain": [
|
1186 |
+
"<IPython.core.display.HTML object>"
|
1187 |
+
]
|
1188 |
+
},
|
1189 |
+
"metadata": {},
|
1190 |
+
"output_type": "display_data"
|
1191 |
+
},
|
1192 |
+
{
|
1193 |
+
"data": {
|
1194 |
+
"text/plain": []
|
1195 |
+
},
|
1196 |
+
"execution_count": 14,
|
1197 |
+
"metadata": {},
|
1198 |
+
"output_type": "execute_result"
|
1199 |
+
}
|
1200 |
+
],
|
1201 |
+
"source": [
|
1202 |
+
"import json\n",
|
1203 |
+
"import gradio as gr\n",
|
1204 |
+
"\n",
|
1205 |
+
"# Gradio app to browse prompts with left and right buttons\n",
|
1206 |
+
"# index = 0\n",
|
1207 |
+
"\n",
|
1208 |
+
"# Functions to handle prompts\n",
|
1209 |
+
"def get_prompt(index, prompt_type):\n",
|
1210 |
+
" return df.iloc[index][prompt_type]\n",
|
1211 |
+
"\n",
|
1212 |
+
"def next_prompt(index, prompt_type):\n",
|
1213 |
+
" if index < len(df) - 1:\n",
|
1214 |
+
" index += 1\n",
|
1215 |
+
" return index, get_prompt(index, prompt_type)\n",
|
1216 |
+
"\n",
|
1217 |
+
"def previous_prompt(index, prompt_type):\n",
|
1218 |
+
" if index > 0:\n",
|
1219 |
+
" index -= 1\n",
|
1220 |
+
" return index, get_prompt(index, prompt_type)\n",
|
1221 |
+
"\n",
|
1222 |
+
"# Gradio App\n",
|
1223 |
+
"with gr.Blocks() as demo:\n",
|
1224 |
+
" gr.Markdown(\"# Prompt Browser\")\n",
|
1225 |
+
" with gr.Row():\n",
|
1226 |
+
" prompt_type_dropdown = gr.Dropdown(\n",
|
1227 |
+
" choices=['conversation_RFA_gpt3_5', 'conversation_RFA_mistral', 'conversation_FAR_gpt3_5', 'conversation_FAR_mistral', 'conversation_FA_gpt3_5', 'conversation_FA_mistral'],\n",
|
1228 |
+
" value='conversation_RFA_gpt3_5',\n",
|
1229 |
+
" label=\"Select Prompt Type\"\n",
|
1230 |
+
" )\n",
|
1231 |
+
" index_display = gr.Textbox(\"0\", label=\"Index\", interactive=False)\n",
|
1232 |
+
"\n",
|
1233 |
+
" prompt_display = gr.Textbox(value=df.iloc[0]['conversation_RFA_gpt3_5'], label=\"Prompt\")\n",
|
1234 |
+
" \n",
|
1235 |
+
" with gr.Row():\n",
|
1236 |
+
" prev_button = gr.Button(\"⬅️ Previous\")\n",
|
1237 |
+
" next_button = gr.Button(\"Next ➡️\")\n",
|
1238 |
+
" \n",
|
1239 |
+
" # State to hold the current index\n",
|
1240 |
+
" index_state = gr.State(value=0)\n",
|
1241 |
+
"\n",
|
1242 |
+
" # Button click events\n",
|
1243 |
+
" prev_button.click(\n",
|
1244 |
+
" fn=previous_prompt,\n",
|
1245 |
+
" inputs=[index_state, prompt_type_dropdown],\n",
|
1246 |
+
" outputs=[index_state, prompt_display]\n",
|
1247 |
+
" )\n",
|
1248 |
+
" next_button.click(\n",
|
1249 |
+
" fn=next_prompt,\n",
|
1250 |
+
" inputs=[index_state, prompt_type_dropdown],\n",
|
1251 |
+
" outputs=[index_state, prompt_display]\n",
|
1252 |
+
" )\n",
|
1253 |
+
"\n",
|
1254 |
+
" # Dropdown change event\n",
|
1255 |
+
" prompt_type_dropdown.change(\n",
|
1256 |
+
" fn=lambda index, prompt_type: get_prompt(index, prompt_type),\n",
|
1257 |
+
" inputs=[index_state, prompt_type_dropdown],\n",
|
1258 |
+
" outputs=prompt_display\n",
|
1259 |
+
" )\n",
|
1260 |
+
"\n",
|
1261 |
+
" # Update index display\n",
|
1262 |
+
" index_state.change(\n",
|
1263 |
+
" fn=lambda index: str(index),\n",
|
1264 |
+
" inputs=index_state,\n",
|
1265 |
+
" outputs=index_display\n",
|
1266 |
+
" )\n",
|
1267 |
+
"\n",
|
1268 |
+
"# Launch the app\n",
|
1269 |
+
"demo.launch(height=840)"
|
1270 |
+
]
|
1271 |
+
},
|
1272 |
+
{
|
1273 |
+
"cell_type": "markdown",
|
1274 |
+
"id": "3838daf0-8a3a-4513-ad2b-3589d60dfa3d",
|
1275 |
+
"metadata": {},
|
1276 |
+
"source": [
|
1277 |
+
"## Push Dataset to the Hub\n",
|
1278 |
+
"Its useful to get a train, test split, then we convert to `Dataset` and push to the hub. We also want to stratify on `'topic'`."
|
1279 |
+
]
|
1280 |
+
},
|
1281 |
+
{
|
1282 |
+
"cell_type": "code",
|
1283 |
+
"execution_count": 15,
|
1284 |
+
"id": "25f62e9b-09f8-4912-94fd-0ded680614b2",
|
1285 |
+
"metadata": {
|
1286 |
+
"tags": []
|
1287 |
+
},
|
1288 |
+
"outputs": [],
|
1289 |
+
"source": [
|
1290 |
+
"from datasets import Dataset, DatasetDict\n",
|
1291 |
+
"from sklearn.model_selection import train_test_split\n",
|
1292 |
+
"\n",
|
1293 |
+
"# First split to create train and remaining (val + test)\n",
|
1294 |
+
"train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['topic'], random_state=42)\n",
|
1295 |
+
"\n",
|
1296 |
+
"# Reset index to avoid index column in the Hugging Face Dataset\n",
|
1297 |
+
"train_df.reset_index(drop=True, inplace=True)\n",
|
1298 |
+
"test_df.reset_index(drop=True, inplace=True)\n",
|
1299 |
+
"\n",
|
1300 |
+
"# Convert each DataFrame to a Dataset object\n",
|
1301 |
+
"train_dataset = Dataset.from_pandas(train_df)\n",
|
1302 |
+
"test_dataset = Dataset.from_pandas(test_df)\n",
|
1303 |
+
"\n",
|
1304 |
+
"# Create a DatasetDict with the train, validation, and test datasets\n",
|
1305 |
+
"dataset_dict = DatasetDict({\n",
|
1306 |
+
" 'train': train_dataset,\n",
|
1307 |
+
" 'test': test_dataset\n",
|
1308 |
+
"})"
|
1309 |
+
]
|
1310 |
+
},
|
1311 |
+
{
|
1312 |
+
"cell_type": "code",
|
1313 |
+
"execution_count": 16,
|
1314 |
+
"id": "18a206c5-0e40-46b3-8dfb-20000789b6b5",
|
1315 |
+
"metadata": {
|
1316 |
+
"tags": []
|
1317 |
+
},
|
1318 |
+
"outputs": [
|
1319 |
+
{
|
1320 |
+
"data": {
|
1321 |
+
"application/vnd.jupyter.widget-view+json": {
|
1322 |
+
"model_id": "0e40274a780c4da0868362339d48e6de",
|
1323 |
+
"version_major": 2,
|
1324 |
+
"version_minor": 0
|
1325 |
+
},
|
1326 |
+
"text/plain": [
|
1327 |
+
"Uploading the dataset shards: 0%| | 0/1 [00:00<?, ?it/s]"
|
1328 |
+
]
|
1329 |
+
},
|
1330 |
+
"metadata": {},
|
1331 |
+
"output_type": "display_data"
|
1332 |
+
},
|
1333 |
+
{
|
1334 |
+
"data": {
|
1335 |
+
"application/vnd.jupyter.widget-view+json": {
|
1336 |
+
"model_id": "d8ed56081baa4b4a98af22b3d904a231",
|
1337 |
+
"version_major": 2,
|
1338 |
+
"version_minor": 0
|
1339 |
+
},
|
1340 |
+
"text/plain": [
|
1341 |
+
"Creating parquet from Arrow format: 0%| | 0/7 [00:00<?, ?ba/s]"
|
1342 |
+
]
|
1343 |
+
},
|
1344 |
+
"metadata": {},
|
1345 |
+
"output_type": "display_data"
|
1346 |
+
},
|
1347 |
+
{
|
1348 |
+
"data": {
|
1349 |
+
"application/vnd.jupyter.widget-view+json": {
|
1350 |
+
"model_id": "c6b2c92b5aaa4943ab8b086f58455440",
|
1351 |
+
"version_major": 2,
|
1352 |
+
"version_minor": 0
|
1353 |
+
},
|
1354 |
+
"text/plain": [
|
1355 |
+
"Uploading the dataset shards: 0%| | 0/1 [00:00<?, ?it/s]"
|
1356 |
+
]
|
1357 |
+
},
|
1358 |
+
"metadata": {},
|
1359 |
+
"output_type": "display_data"
|
1360 |
+
},
|
1361 |
+
{
|
1362 |
+
"data": {
|
1363 |
+
"application/vnd.jupyter.widget-view+json": {
|
1364 |
+
"model_id": "f4a05ea3069c4f60bca73b488f8221d5",
|
1365 |
+
"version_major": 2,
|
1366 |
+
"version_minor": 0
|
1367 |
+
},
|
1368 |
+
"text/plain": [
|
1369 |
+
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
|
1370 |
+
]
|
1371 |
+
},
|
1372 |
+
"metadata": {},
|
1373 |
+
"output_type": "display_data"
|
1374 |
+
},
|
1375 |
+
{
|
1376 |
+
"data": {
|
1377 |
+
"text/plain": [
|
1378 |
+
"CommitInfo(commit_url='https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-tokenized/commit/f96a8487961dcfe6077df67b5351c041a4523eb1', commit_message='Upload dataset', commit_description='', oid='f96a8487961dcfe6077df67b5351c041a4523eb1', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', endpoint='https://huggingface.co', repo_type='dataset', repo_id='derek-thomas/labeled-multiple-choice-explained-mistral-tokenized'), pr_revision=None, pr_num=None)"
|
1379 |
+
]
|
1380 |
+
},
|
1381 |
+
"execution_count": 16,
|
1382 |
+
"metadata": {},
|
1383 |
+
"output_type": "execute_result"
|
1384 |
+
}
|
1385 |
+
],
|
1386 |
+
"source": [
|
1387 |
+
"# Push the dataset to the Hugging Face Hub\n",
|
1388 |
+
"dataset_dict.push_to_hub(OUTPUT_DATASET)"
|
1389 |
+
]
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"cell_type": "code",
|
1393 |
+
"execution_count": null,
|
1394 |
+
"id": "14c279ea-4f3b-40bd-9795-edd5ea3694f7",
|
1395 |
+
"metadata": {},
|
1396 |
+
"outputs": [],
|
1397 |
+
"source": []
|
1398 |
+
}
|
1399 |
+
],
|
1400 |
+
"metadata": {
|
1401 |
+
"kernelspec": {
|
1402 |
+
"display_name": "Python 3 (ipykernel)",
|
1403 |
+
"language": "python",
|
1404 |
+
"name": "python3"
|
1405 |
+
},
|
1406 |
+
"language_info": {
|
1407 |
+
"codemirror_mode": {
|
1408 |
+
"name": "ipython",
|
1409 |
+
"version": 3
|
1410 |
+
},
|
1411 |
+
"file_extension": ".py",
|
1412 |
+
"mimetype": "text/x-python",
|
1413 |
+
"name": "python",
|
1414 |
+
"nbconvert_exporter": "python",
|
1415 |
+
"pygments_lexer": "ipython3",
|
1416 |
+
"version": "3.11.10"
|
1417 |
+
}
|
1418 |
+
},
|
1419 |
+
"nbformat": 4,
|
1420 |
+
"nbformat_minor": 5
|
1421 |
+
}
|
02-poe-token-count-exploration.ipynb
ADDED
@@ -0,0 +1,435 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "b3fc8862-0c2b-45f3-badf-e591c7b8f891",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# Token Count Exploration\n",
|
9 |
+
"It would be really useful for deployment to know our input/output expectations. We know that our output is quite verbose relative to the input since the explanations are long. With a model like `mistralai/Mistral-7B-Instruct-v0.3` Id expect that our real output with explanations will be shorter. Thats perfect since our training data will give us a reliable upper bound, which is great to prevent truncation.\n",
|
10 |
+
"\n",
|
11 |
+
"Lets figure out how to split input and output tokens, and then we can build a histogram."
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "markdown",
|
16 |
+
"id": "3a501f2f-ba98-4c0f-aa30-f4768bd80dcb",
|
17 |
+
"metadata": {},
|
18 |
+
"source": [
|
19 |
+
"## Config"
|
20 |
+
]
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"cell_type": "code",
|
24 |
+
"execution_count": 1,
|
25 |
+
"id": "5d0bd22f-293e-4c15-9dfe-8070553f42b5",
|
26 |
+
"metadata": {
|
27 |
+
"tags": []
|
28 |
+
},
|
29 |
+
"outputs": [],
|
30 |
+
"source": [
|
31 |
+
"INPUT_DATASET = 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized'\n",
|
32 |
+
"BASE_MODEL = 'mistralai/Mistral-7B-Instruct-v0.3'"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"cell_type": "markdown",
|
37 |
+
"id": "c1c3b00c-17bf-4b00-9ee7-d10c598c53e9",
|
38 |
+
"metadata": {},
|
39 |
+
"source": [
|
40 |
+
"## Setup"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"execution_count": 2,
|
46 |
+
"id": "af2330f3-403c-401c-8028-46ae4971546e",
|
47 |
+
"metadata": {},
|
48 |
+
"outputs": [
|
49 |
+
{
|
50 |
+
"data": {
|
51 |
+
"application/vnd.jupyter.widget-view+json": {
|
52 |
+
"model_id": "d675da3076694064ba0c69ed97f938f8",
|
53 |
+
"version_major": 2,
|
54 |
+
"version_minor": 0
|
55 |
+
},
|
56 |
+
"text/plain": [
|
57 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
58 |
+
]
|
59 |
+
},
|
60 |
+
"metadata": {},
|
61 |
+
"output_type": "display_data"
|
62 |
+
}
|
63 |
+
],
|
64 |
+
"source": [
|
65 |
+
"from huggingface_hub import login, get_token\n",
|
66 |
+
"login()"
|
67 |
+
]
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"cell_type": "code",
|
71 |
+
"execution_count": 3,
|
72 |
+
"id": "a9e2d29c-1f8e-4a70-839f-f61ae396d6f6",
|
73 |
+
"metadata": {},
|
74 |
+
"outputs": [
|
75 |
+
{
|
76 |
+
"data": {
|
77 |
+
"application/vnd.jupyter.widget-view+json": {
|
78 |
+
"model_id": "dd06a12730fa4af1b863273c333c6a4c",
|
79 |
+
"version_major": 2,
|
80 |
+
"version_minor": 0
|
81 |
+
},
|
82 |
+
"text/plain": [
|
83 |
+
"README.md: 0%| | 0.00/1.18k [00:00<?, ?B/s]"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"metadata": {},
|
87 |
+
"output_type": "display_data"
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"data": {
|
91 |
+
"application/vnd.jupyter.widget-view+json": {
|
92 |
+
"model_id": "0ab7651927dc407f83c819dffc2c6cf1",
|
93 |
+
"version_major": 2,
|
94 |
+
"version_minor": 0
|
95 |
+
},
|
96 |
+
"text/plain": [
|
97 |
+
"train-00000-of-00001.parquet: 0%| | 0.00/40.5M [00:00<?, ?B/s]"
|
98 |
+
]
|
99 |
+
},
|
100 |
+
"metadata": {},
|
101 |
+
"output_type": "display_data"
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"data": {
|
105 |
+
"application/vnd.jupyter.widget-view+json": {
|
106 |
+
"model_id": "93eec90b188d4b4b862cba87fbc65f26",
|
107 |
+
"version_major": 2,
|
108 |
+
"version_minor": 0
|
109 |
+
},
|
110 |
+
"text/plain": [
|
111 |
+
"test-00000-of-00001.parquet: 0%| | 0.00/10.1M [00:00<?, ?B/s]"
|
112 |
+
]
|
113 |
+
},
|
114 |
+
"metadata": {},
|
115 |
+
"output_type": "display_data"
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"data": {
|
119 |
+
"application/vnd.jupyter.widget-view+json": {
|
120 |
+
"model_id": "21a9ed213b2c4da49f6171206102499d",
|
121 |
+
"version_major": 2,
|
122 |
+
"version_minor": 0
|
123 |
+
},
|
124 |
+
"text/plain": [
|
125 |
+
"Generating train split: 0%| | 0/6730 [00:00<?, ? examples/s]"
|
126 |
+
]
|
127 |
+
},
|
128 |
+
"metadata": {},
|
129 |
+
"output_type": "display_data"
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"data": {
|
133 |
+
"application/vnd.jupyter.widget-view+json": {
|
134 |
+
"model_id": "52a994708c2a466f8ed72a2fd881aa77",
|
135 |
+
"version_major": 2,
|
136 |
+
"version_minor": 0
|
137 |
+
},
|
138 |
+
"text/plain": [
|
139 |
+
"Generating test split: 0%| | 0/1683 [00:00<?, ? examples/s]"
|
140 |
+
]
|
141 |
+
},
|
142 |
+
"metadata": {},
|
143 |
+
"output_type": "display_data"
|
144 |
+
}
|
145 |
+
],
|
146 |
+
"source": [
|
147 |
+
"from transformers import AutoTokenizer\n",
|
148 |
+
"from datasets import load_dataset\n",
|
149 |
+
"\n",
|
150 |
+
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=get_token())\n",
|
151 |
+
"\n",
|
152 |
+
"dataset = load_dataset(INPUT_DATASET, split='train')\n",
|
153 |
+
"df = dataset.to_pandas()"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"cell_type": "markdown",
|
158 |
+
"id": "f3a644bf-b532-4f30-bd6b-c637dfde5fb2",
|
159 |
+
"metadata": {},
|
160 |
+
"source": [
|
161 |
+
"## Exploration"
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "code",
|
166 |
+
"execution_count": 5,
|
167 |
+
"id": "bf5b3e0c-2b7f-42f3-852d-7039c530ed86",
|
168 |
+
"metadata": {},
|
169 |
+
"outputs": [
|
170 |
+
{
|
171 |
+
"data": {
|
172 |
+
"text/plain": [
|
173 |
+
"'<s>[INST] Answer the Question and include your Reasoning and the Final Answer in a json like: {\"Reasoning: \"...\", \"Final Answer\": \"x\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\\nQuestion: What can genetic material have?\\nAnswer Choices: (a) Resistance (b) Mutations (c) Clorophyll (d) Nucleotide (e) Symmetry (f) Allow growth (g) Contamination (h) Warmth[/INST] {\\'Reasoning\\': \\'a) Resistance: Genetic material can carry genes that provide resistance to certain diseases or environmental factors, but this is not a characteristic of genetic material itself. Therefore, this option is incorrect.\\\\n\\\\nc) Chlorophyll: Chlorophyll is a pigment found in plants that is responsible for photosynthesis. It is not a characteristic of genetic material. Therefore, this option is incorrect.\\\\n\\\\nd) Nucleotide: Nucleotides are the building blocks of DNA and RNA, which are types of genetic material. However, this option is too broad and does not fully answer the question. Therefore, this option is incorrect.\\\\n\\\\ne) Symmetry: Symmetry is a characteristic of physical objects and organisms, but it is not a characteristic of genetic material. Therefore, this option is incorrect.\\\\n\\\\nf) Allow growth: Genetic material provides the instructions for the growth and development of organisms, but it is not a characteristic of genetic material itself. Therefore, this option is incorrect.\\\\n\\\\ng) Contamination: Contamination is the presence of unwanted substances or impurities, and it is not a characteristic of genetic material. Therefore, this option is incorrect.\\\\n\\\\nh) Warmth: Warmth is a physical property of objects and is not related to genetic material. Therefore, this option is incorrect.\\\\n\\\\nIn conclusion, the only option that correctly describes a characteristic of genetic material is b) mutations. Genetic material can have mutations, which are changes in the DNA sequence that can lead to genetic variation and evolution.\\', \\'Final Answer\\': \\'b\\'}</s>'"
|
174 |
+
]
|
175 |
+
},
|
176 |
+
"execution_count": 5,
|
177 |
+
"metadata": {},
|
178 |
+
"output_type": "execute_result"
|
179 |
+
}
|
180 |
+
],
|
181 |
+
"source": [
|
182 |
+
"df['conversation_RFA_sg_gpt3_5'].iloc[0]"
|
183 |
+
]
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"cell_type": "code",
|
187 |
+
"execution_count": 6,
|
188 |
+
"id": "0dc985d7-32e3-413f-8640-55829da19838",
|
189 |
+
"metadata": {},
|
190 |
+
"outputs": [
|
191 |
+
{
|
192 |
+
"data": {
|
193 |
+
"text/plain": [
|
194 |
+
"[4]"
|
195 |
+
]
|
196 |
+
},
|
197 |
+
"execution_count": 6,
|
198 |
+
"metadata": {},
|
199 |
+
"output_type": "execute_result"
|
200 |
+
}
|
201 |
+
],
|
202 |
+
"source": [
|
203 |
+
"tokenizer.encode('[/INST]', add_special_tokens=False)"
|
204 |
+
]
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"cell_type": "code",
|
208 |
+
"execution_count": 8,
|
209 |
+
"id": "bc9b3856-7652-483c-8dbc-2b9bdc85f9d7",
|
210 |
+
"metadata": {},
|
211 |
+
"outputs": [
|
212 |
+
{
|
213 |
+
"name": "stdout",
|
214 |
+
"output_type": "stream",
|
215 |
+
"text": [
|
216 |
+
"[1, 3, 27075, 1040, 23246, 1072, 3792, 1342, 2066, 2180, 1056, 1072, 1040, 10990, 27075, 1065, 1032, 8379, 1505, 29515, 10598, 20569, 1056, 29515, 1113, 1869, 1316, 1113, 18268, 27075, 2032, 1113, 29512, 18163, 1738, 2086, 1117, 1032, 6266, 1137, 17303, 1066, 1040, 5140, 5550, 1458, 1117, 1032, 6266, 2212, 1032, 1072, 1063, 29491, 781, 25762, 29515, 2592, 1309, 20637, 4156, 1274, 29572, 781, 3588, 17749, 26173, 1982, 29515, 1093, 29476, 29499, 2760, 5400, 1093, 29494, 29499, 17737, 1465, 1093, 29485, 29499, 2134, 1039, 3894, 20298, 1093, 29483, 29499, 1186, 2253, 1059, 1090, 1315, 1093, 29474, 29499, 13124, 17409, 1093, 29490, 29499, 26780, 6825, 1093, 29489, 29499, 3767, 26682, 1093, 29484, 29499, 1162, 2553, 1130, 4, 12780, 20569, 1056, 2637, 1232, 29476, 29499, 2760, 5400, 29515, 7010, 11130, 4156, 1309, 7864, 24971, 1137, 3852, 13336, 1066, 3320, 19025, 1210, 13275, 9380, 29493, 1330, 1224, 1117, 1227, 1032, 18613, 1070, 20637, 4156, 4605, 29491, 9237, 29493, 1224, 4319, 1117, 17158, 6691, 29479, 29524, 15538, 29499, 1457, 6406, 3894, 20298, 29515, 1457, 6406, 3894, 20298, 1117, 1032, 19726, 1234, 2187, 1065, 10691, 1137, 1117, 8100, 1122, 9654, 29492, 1216, 22305, 29491, 1429, 1117, 1227, 1032, 18613, 1070, 20637, 4156, 29491, 9237, 29493, 1224, 4319, 1117, 17158, 6691, 29479, 29524, 1060, 29499, 1186, 2253, 1059, 1090, 1315, 29515, 1186, 2253, 1059, 1090, 2694, 1228, 1040, 4435, 10246, 1070, 16775, 1072, 1167, 4152, 29493, 1458, 1228, 5282, 1070, 20637, 4156, 29491, 3761, 29493, 1224, 4319, 1117, 2136, 6609, 1072, 2003, 1227, 6662, 5140, 1040, 3764, 29491, 9237, 29493, 1224, 4319, 1117, 17158, 6691, 29479, 29524, 1253, 29499, 13124, 17409, 29515, 13124, 17409, 1117, 1032, 18613, 1070, 6045, 7465, 1072, 2938, 11589, 29493, 1330, 1146, 1117, 1227, 1032, 18613, 1070, 20637, 4156, 29491, 9237, 29493, 1224, 4319, 1117, 17158, 6691, 29479, 29524, 24412, 29499, 26780, 6825, 29515, 7010, 11130, 4156, 6080, 1040, 12150, 1122, 1040, 6825, 1072, 4867, 1070, 2938, 11589, 29493, 1330, 1146, 1117, 1227, 1032, 18613, 1070, 20637, 4156, 4605, 29491, 9237, 29493, 1224, 4319, 1117, 17158, 6691, 29479, 29524, 1585, 29499, 3767, 26682, 29515, 3767, 26682, 1117, 1040, 7471, 1070, 13460, 8034, 1851, 9500, 1210, 3592, 1092, 1986, 29493, 1072, 1146, 1117, 1227, 1032, 18613, 1070, 20637, 4156, 29491, 9237, 29493, 1224, 4319, 1117, 17158, 6691, 29479, 29524, 25779, 29499, 1162, 2553, 1130, 29515, 1162, 2553, 1130, 1117, 1032, 6045, 4089, 1070, 7465, 1072, 1117, 1227, 5970, 1066, 20637, 4156, 29491, 9237, 29493, 1224, 4319, 1117, 17158, 6691, 29479, 29524, 29479, 1425, 13654, 29493, 1040, 1633, 4319, 1137, 13510, 14734, 1032, 18613, 1070, 20637, 4156, 1117, 1055, 29499, 5316, 1465, 29491, 7010, 11130, 4156, 1309, 1274, 5316, 1465, 29493, 1458, 1228, 5203, 1065, 1040, 16775, 8536, 1137, 1309, 2504, 1066, 20637, 19191, 1072, 10963, 13775, 1232, 18268, 27075, 2637, 1232, 29494, 15259, 2]\n"
|
217 |
+
]
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"data": {
|
221 |
+
"text/plain": [
|
222 |
+
"[4]"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
"execution_count": 8,
|
226 |
+
"metadata": {},
|
227 |
+
"output_type": "execute_result"
|
228 |
+
}
|
229 |
+
],
|
230 |
+
"source": [
|
231 |
+
"print(tokenizer.encode(df['conversation_RFA_sg_gpt3_5'].iloc[0], add_special_tokens=False))\n",
|
232 |
+
"tokenizer.encode('[/INST]', add_special_tokens=False)"
|
233 |
+
]
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"cell_type": "markdown",
|
237 |
+
"id": "677e792e-a85f-448c-ab36-ed0aec84ca8e",
|
238 |
+
"metadata": {},
|
239 |
+
"source": [
|
240 |
+
"Great, we can see that there is a special token `[/INST]` that we will want to split on. We can count the tokens before and including `[/INST]` and that should be our input tokens, and the tokens after will be our output tokens.\n",
|
241 |
+
"\n",
|
242 |
+
"Lets count those for each row in `conversation_RFA` and build a histogram of the results. `conversation_RFA` should be a good max since its just a reshuffle or superset of the other columns."
|
243 |
+
]
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"cell_type": "code",
|
247 |
+
"execution_count": 20,
|
248 |
+
"id": "3c8cd920-4d58-4b1d-b172-098c35dcdfbf",
|
249 |
+
"metadata": {
|
250 |
+
"scrolled": true
|
251 |
+
},
|
252 |
+
"outputs": [],
|
253 |
+
"source": [
|
254 |
+
"import pandas as pd\n",
|
255 |
+
"from datasets import load_dataset\n",
|
256 |
+
"from transformers import AutoTokenizer\n",
|
257 |
+
"\n",
|
258 |
+
"# Load the dataset and convert it to a DataFrame\n",
|
259 |
+
"dataset = load_dataset(INPUT_DATASET, split='test')\n",
|
260 |
+
"df = dataset.to_pandas()\n",
|
261 |
+
"\n",
|
262 |
+
"df_token_gpt3_5 = df[['conversation_RFA_sg_gpt3_5']].copy()\n",
|
263 |
+
"df_token_gpt3_5['tokens_gpt3_5'] = df['conversation_RFA_sg_gpt3_5'].apply(lambda x: tokenizer.encode(x))\n",
|
264 |
+
"\n",
|
265 |
+
"df_token_mistral = df[['conversation_RFA_sg_mistral']].copy()\n",
|
266 |
+
"df_token_mistral['tokens_mistral'] = df['conversation_RFA_sg_mistral'].apply(lambda x: tokenizer.encode(x))\n",
|
267 |
+
"\n",
|
268 |
+
"def split_and_measure(lst):\n",
|
269 |
+
" if 4 in lst:\n",
|
270 |
+
" index_of_4 = lst.index(4)\n",
|
271 |
+
" length_before = index_of_4 + 1 # Including 4\n",
|
272 |
+
" length_after = len(lst) - length_before\n",
|
273 |
+
" return length_before, length_after\n",
|
274 |
+
" else:\n",
|
275 |
+
" return None, len(lst) # If 4 is not present\n",
|
276 |
+
"\n",
|
277 |
+
"df_token_gpt3_5[['input_tokens', 'output_tokens']] = df_token_gpt3_5['tokens_gpt3_5'].apply(split_and_measure).apply(pd.Series)\n",
|
278 |
+
"df_token_mistral[['input_tokens', 'output_tokens']] = df_token_mistral['tokens_mistral'].apply(split_and_measure).apply(pd.Series)"
|
279 |
+
]
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"cell_type": "code",
|
283 |
+
"execution_count": 22,
|
284 |
+
"id": "9b23b7a3-5448-4b2e-9253-5d1b66ef1e0a",
|
285 |
+
"metadata": {},
|
286 |
+
"outputs": [
|
287 |
+
{
|
288 |
+
"data": {
|
289 |
+
"image/png": "",
|
290 |
+
"text/plain": [
|
291 |
+
"<Figure size 1000x600 with 1 Axes>"
|
292 |
+
]
|
293 |
+
},
|
294 |
+
"metadata": {},
|
295 |
+
"output_type": "display_data"
|
296 |
+
}
|
297 |
+
],
|
298 |
+
"source": [
|
299 |
+
"import matplotlib.pyplot as plt\n",
|
300 |
+
"\n",
|
301 |
+
"# Plot the histograms\n",
|
302 |
+
"plt.figure(figsize=(10, 6))\n",
|
303 |
+
"\n",
|
304 |
+
"# Histogram for Input Tokens\n",
|
305 |
+
"plt.hist(df_token_gpt3_5['input_tokens'], bins=10, alpha=0.6, label='Input Tokens')\n",
|
306 |
+
"\n",
|
307 |
+
"# Histogram for Output Tokens\n",
|
308 |
+
"plt.hist(df_token_gpt3_5['output_tokens'], bins=10, alpha=0.6, label='Output Tokens')\n",
|
309 |
+
"\n",
|
310 |
+
"# Add titles and labels\n",
|
311 |
+
"plt.title(\"Token Summary\")\n",
|
312 |
+
"plt.xlabel(\"Token Count\")\n",
|
313 |
+
"plt.ylabel(\"Frequency\")\n",
|
314 |
+
"plt.legend()\n",
|
315 |
+
"\n",
|
316 |
+
"# Show the plot\n",
|
317 |
+
"plt.show()\n"
|
318 |
+
]
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"cell_type": "code",
|
322 |
+
"execution_count": 23,
|
323 |
+
"id": "9d81d486-bafd-454b-9a44-934ec111ad4d",
|
324 |
+
"metadata": {},
|
325 |
+
"outputs": [
|
326 |
+
{
|
327 |
+
"name": "stdout",
|
328 |
+
"output_type": "stream",
|
329 |
+
"text": [
|
330 |
+
"Our Max Input Tokens:\t162\n",
|
331 |
+
"Our Max Output Tokens:\t572\n"
|
332 |
+
]
|
333 |
+
}
|
334 |
+
],
|
335 |
+
"source": [
|
336 |
+
"print(f\"Our Max Input Tokens:\\t{max(df_token_gpt3_5.input_tokens)}\\nOur Max Output Tokens:\\t{max(df_token_gpt3_5.output_tokens)}\")"
|
337 |
+
]
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"cell_type": "code",
|
341 |
+
"execution_count": null,
|
342 |
+
"id": "e6e235c3-75f4-48dd-b0cb-d7cc42426e69",
|
343 |
+
"metadata": {},
|
344 |
+
"outputs": [],
|
345 |
+
"source": []
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"cell_type": "code",
|
349 |
+
"execution_count": 24,
|
350 |
+
"id": "7dea222b-a974-4ff6-9e3c-07de766b76c4",
|
351 |
+
"metadata": {},
|
352 |
+
"outputs": [
|
353 |
+
{
|
354 |
+
"data": {
|
355 |
+
"image/png": "",
|
356 |
+
"text/plain": [
|
357 |
+
"<Figure size 1000x600 with 1 Axes>"
|
358 |
+
]
|
359 |
+
},
|
360 |
+
"metadata": {},
|
361 |
+
"output_type": "display_data"
|
362 |
+
}
|
363 |
+
],
|
364 |
+
"source": [
|
365 |
+
"import matplotlib.pyplot as plt\n",
|
366 |
+
"\n",
|
367 |
+
"# Plot the histograms\n",
|
368 |
+
"plt.figure(figsize=(10, 6))\n",
|
369 |
+
"\n",
|
370 |
+
"# Histogram for Input Tokens\n",
|
371 |
+
"plt.hist(df_token_mistral['input_tokens'], bins=10, alpha=0.6, label='Input Tokens')\n",
|
372 |
+
"\n",
|
373 |
+
"# Histogram for Output Tokens\n",
|
374 |
+
"plt.hist(df_token_mistral['output_tokens'], bins=10, alpha=0.6, label='Output Tokens')\n",
|
375 |
+
"\n",
|
376 |
+
"# Add titles and labels\n",
|
377 |
+
"plt.title(\"Token Summary\")\n",
|
378 |
+
"plt.xlabel(\"Token Count\")\n",
|
379 |
+
"plt.ylabel(\"Frequency\")\n",
|
380 |
+
"plt.legend()\n",
|
381 |
+
"\n",
|
382 |
+
"# Show the plot\n",
|
383 |
+
"plt.show()\n"
|
384 |
+
]
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"cell_type": "code",
|
388 |
+
"execution_count": 26,
|
389 |
+
"id": "d6a78d92-2fc4-4354-8825-b17cba59eee4",
|
390 |
+
"metadata": {},
|
391 |
+
"outputs": [
|
392 |
+
{
|
393 |
+
"name": "stdout",
|
394 |
+
"output_type": "stream",
|
395 |
+
"text": [
|
396 |
+
"Our Max Input Tokens:\t162\n",
|
397 |
+
"Our Max Output Tokens:\t1148\n"
|
398 |
+
]
|
399 |
+
}
|
400 |
+
],
|
401 |
+
"source": [
|
402 |
+
"print(f\"Our Max Input Tokens:\\t{max(df_token_mistral.input_tokens)}\\nOur Max Output Tokens:\\t{max(df_token_mistral.output_tokens)}\")"
|
403 |
+
]
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"cell_type": "code",
|
407 |
+
"execution_count": null,
|
408 |
+
"id": "fdfc0581-1c57-436c-8c76-9bfeab278603",
|
409 |
+
"metadata": {},
|
410 |
+
"outputs": [],
|
411 |
+
"source": []
|
412 |
+
}
|
413 |
+
],
|
414 |
+
"metadata": {
|
415 |
+
"kernelspec": {
|
416 |
+
"display_name": "Python 3 (ipykernel)",
|
417 |
+
"language": "python",
|
418 |
+
"name": "python3"
|
419 |
+
},
|
420 |
+
"language_info": {
|
421 |
+
"codemirror_mode": {
|
422 |
+
"name": "ipython",
|
423 |
+
"version": 3
|
424 |
+
},
|
425 |
+
"file_extension": ".py",
|
426 |
+
"mimetype": "text/x-python",
|
427 |
+
"name": "python",
|
428 |
+
"nbconvert_exporter": "python",
|
429 |
+
"pygments_lexer": "ipython3",
|
430 |
+
"version": "3.11.10"
|
431 |
+
}
|
432 |
+
},
|
433 |
+
"nbformat": 4,
|
434 |
+
"nbformat_minor": 5
|
435 |
+
}
|
03-poe-eval-sg.ipynb
ADDED
@@ -0,0 +1,1981 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "7eb11cd7-6200-45f9-822f-45b8b392c4bd",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# Setup"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "markdown",
|
13 |
+
"id": "4ca90151-0b89-4870-8213-9d49cb68c555",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"## Config\n",
|
17 |
+
"Set the tokens based on the numbers in [02-poe-token-count-exploration.ipynb](02-poe-token-count-exploration.ipynb). I like to give a little buffer in-case an explanation goes over."
|
18 |
+
]
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"cell_type": "code",
|
22 |
+
"execution_count": 1,
|
23 |
+
"id": "5238c6e9-9425-4ced-a16a-998e775e7342",
|
24 |
+
"metadata": {},
|
25 |
+
"outputs": [],
|
26 |
+
"source": [
|
27 |
+
"INPUT_TOKENS = 300\n",
|
28 |
+
"OUTPUT_TOKENS = 1600\n",
|
29 |
+
"\n",
|
30 |
+
"INPUT_DATASET = 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized'\n",
|
31 |
+
"OUTPUT_DATASET = 'derek-thomas/labeled-multiple-choice-explained-mistral-results'\n",
|
32 |
+
"BASE_MODEL = 'mistralai/Mistral-7B-Instruct-v0.3'"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"cell_type": "markdown",
|
37 |
+
"id": "f4eca659-f11f-4d25-886a-9d7af4f38411",
|
38 |
+
"metadata": {},
|
39 |
+
"source": [
|
40 |
+
"# Setup\n",
|
41 |
+
"Here we create the pydantic models for each of our experiments. Note because of how you specify field names in pydantic, we need to use an `alias` and `populate_by_name`. Given that our `Final Answer` is always a letter between a-h we can use an enumeration."
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 2,
|
47 |
+
"id": "c5367700-0e9d-435b-875a-02a73b292ade",
|
48 |
+
"metadata": {},
|
49 |
+
"outputs": [],
|
50 |
+
"source": [
|
51 |
+
"from pydantic import BaseModel, Field\n",
|
52 |
+
"from typing import List\n",
|
53 |
+
"from enum import Enum\n",
|
54 |
+
"import json\n",
|
55 |
+
"\n",
|
56 |
+
"\n",
|
57 |
+
"class FinalAnswerEnum(str, Enum):\n",
|
58 |
+
" a = \"a\"\n",
|
59 |
+
" b = \"b\"\n",
|
60 |
+
" c = \"c\"\n",
|
61 |
+
" d = \"d\"\n",
|
62 |
+
" e = \"e\"\n",
|
63 |
+
" f = \"f\"\n",
|
64 |
+
" g = \"g\"\n",
|
65 |
+
" h = \"h\"\n",
|
66 |
+
"\n",
|
67 |
+
"class RFAModel(BaseModel):\n",
|
68 |
+
" reasoning: str = Field(..., alias=\"Reasoning\")\n",
|
69 |
+
" final_answer: FinalAnswerEnum = Field(..., alias=\"Final Answer\")\n",
|
70 |
+
"\n",
|
71 |
+
" class Config:\n",
|
72 |
+
" populate_by_name = True\n",
|
73 |
+
" \n",
|
74 |
+
"class FARModel(BaseModel):\n",
|
75 |
+
" final_answer: FinalAnswerEnum = Field(..., alias=\"Final Answer\")\n",
|
76 |
+
" reasoning: str = Field(..., alias=\"Reasoning\")\n",
|
77 |
+
"\n",
|
78 |
+
" class Config:\n",
|
79 |
+
" populate_by_name = True\n",
|
80 |
+
" \n",
|
81 |
+
"class FAModel(BaseModel):\n",
|
82 |
+
" final_answer: FinalAnswerEnum = Field(..., alias=\"Final Answer\")\n",
|
83 |
+
"\n",
|
84 |
+
" class Config:\n",
|
85 |
+
" populate_by_name = True"
|
86 |
+
]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"cell_type": "markdown",
|
90 |
+
"id": "7e0f51c0-c4f7-4299-9a24-a4a90d4a9f2a",
|
91 |
+
"metadata": {},
|
92 |
+
"source": [
|
93 |
+
"We generated lots of experiments in [derek-thomas/labeled-multiple-choice-explained-mistral-tokenized](https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-tokenized/viewer?row=0). Now we will aggregate everything we need in `experiments` for convenience."
|
94 |
+
]
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"cell_type": "code",
|
98 |
+
"execution_count": 3,
|
99 |
+
"id": "5d0bd22f-293e-4c15-9dfe-8070553f42b5",
|
100 |
+
"metadata": {
|
101 |
+
"tags": []
|
102 |
+
},
|
103 |
+
"outputs": [
|
104 |
+
{
|
105 |
+
"data": {
|
106 |
+
"text/plain": [
|
107 |
+
"'derek-thomas/mistral-v03-poe-RFA-mistral,derek-thomas/mistral-v03-poe-FAR-mistral,derek-thomas/mistral-v03-poe-RFA-gpt3-5,derek-thomas/mistral-v03-poe-FAR-gpt3-5,derek-thomas/mistral-v03-poe-FAR'"
|
108 |
+
]
|
109 |
+
},
|
110 |
+
"execution_count": 3,
|
111 |
+
"metadata": {},
|
112 |
+
"output_type": "execute_result"
|
113 |
+
}
|
114 |
+
],
|
115 |
+
"source": [
|
116 |
+
"\n",
|
117 |
+
"experiments = {\n",
|
118 |
+
" 'RFA-mistral': {\n",
|
119 |
+
" 'pydantic': RFAModel,\n",
|
120 |
+
" \"lora\": \"derek-thomas/mistral-v03-poe-RFA-mistral\",\n",
|
121 |
+
" \"column\": 'user_prompt_RFA',\n",
|
122 |
+
" },\n",
|
123 |
+
" 'FAR-mistral': {\n",
|
124 |
+
" 'pydantic': FARModel,\n",
|
125 |
+
" \"lora\": \"derek-thomas/mistral-v03-poe-FAR-mistral\",\n",
|
126 |
+
" \"column\": 'user_prompt_FAR',\n",
|
127 |
+
" },\n",
|
128 |
+
" 'RFA-gpt3-5': {\n",
|
129 |
+
" 'pydantic': RFAModel,\n",
|
130 |
+
" \"lora\": \"derek-thomas/mistral-v03-poe-RFA-gpt3-5\",\n",
|
131 |
+
" \"column\": 'user_prompt_RFA',\n",
|
132 |
+
" },\n",
|
133 |
+
" 'FAR-gpt3-5': {\n",
|
134 |
+
" 'pydantic': FARModel,\n",
|
135 |
+
" \"lora\": \"derek-thomas/mistral-v03-poe-FAR-gpt3-5\",\n",
|
136 |
+
" \"column\": 'user_prompt_FAR',\n",
|
137 |
+
" },\n",
|
138 |
+
" 'FA': {\n",
|
139 |
+
" 'pydantic': FAModel,\n",
|
140 |
+
" \"lora\": \"derek-thomas/mistral-v03-poe-FAR\",\n",
|
141 |
+
" \"column\": 'user_prompt_FA',\n",
|
142 |
+
" },\n",
|
143 |
+
" 'base': {\n",
|
144 |
+
" 'pydantic': FAModel,\n",
|
145 |
+
" \"lora\": None,\n",
|
146 |
+
" \"column\": 'user_prompt_FA',\n",
|
147 |
+
" },\n",
|
148 |
+
"}\n",
|
149 |
+
"\n",
|
150 |
+
"LORAS_STRING = ','.join([v['lora'] for _, v in experiments.items() if v and v.get('lora') is not None])\n",
|
151 |
+
"LORAS_STRING"
|
152 |
+
]
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"cell_type": "code",
|
156 |
+
"execution_count": 4,
|
157 |
+
"id": "6f8826fb-76ea-464f-8146-262bda0b58bc",
|
158 |
+
"metadata": {
|
159 |
+
"tags": []
|
160 |
+
},
|
161 |
+
"outputs": [
|
162 |
+
{
|
163 |
+
"data": {
|
164 |
+
"application/vnd.jupyter.widget-view+json": {
|
165 |
+
"model_id": "f9a6617489af4d86be59e614e3c505c4",
|
166 |
+
"version_major": 2,
|
167 |
+
"version_minor": 0
|
168 |
+
},
|
169 |
+
"text/plain": [
|
170 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
171 |
+
]
|
172 |
+
},
|
173 |
+
"metadata": {},
|
174 |
+
"output_type": "display_data"
|
175 |
+
}
|
176 |
+
],
|
177 |
+
"source": [
|
178 |
+
"from huggingface_hub import login, get_token\n",
|
179 |
+
"\n",
|
180 |
+
"# Log in to your Hugging Face account\n",
|
181 |
+
"login() "
|
182 |
+
]
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"cell_type": "code",
|
186 |
+
"execution_count": 5,
|
187 |
+
"id": "a9e2d29c-1f8e-4a70-839f-f61ae396d6f6",
|
188 |
+
"metadata": {},
|
189 |
+
"outputs": [],
|
190 |
+
"source": [
|
191 |
+
"from transformers import AutoTokenizer\n",
|
192 |
+
"\n",
|
193 |
+
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=get_token())"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"cell_type": "code",
|
198 |
+
"execution_count": 6,
|
199 |
+
"id": "3aa16403-88e4-4eba-a45b-3d91db9bb58d",
|
200 |
+
"metadata": {},
|
201 |
+
"outputs": [],
|
202 |
+
"source": [
|
203 |
+
"from datasets import load_dataset\n",
|
204 |
+
"\n",
|
205 |
+
"# Load dataset (test split)\n",
|
206 |
+
"dataset = load_dataset(INPUT_DATASET, split='test')\n",
|
207 |
+
"df = dataset.to_pandas()"
|
208 |
+
]
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"cell_type": "markdown",
|
212 |
+
"id": "c787fbb8-beae-4bb0-860d-0ce4d0c9b9a4",
|
213 |
+
"metadata": {},
|
214 |
+
"source": [
|
215 |
+
"# Evaluation"
|
216 |
+
]
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"cell_type": "markdown",
|
220 |
+
"id": "a35d4955-61e5-4143-ab92-7e9b8a49ea1b",
|
221 |
+
"metadata": {},
|
222 |
+
"source": [
|
223 |
+
"## Endpoint Configuration\n",
|
224 |
+
"Im using 8 replicas so I can move quickly! The try/except is in-case I need to make manual changes and I want to load the endpoint."
|
225 |
+
]
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"cell_type": "code",
|
229 |
+
"execution_count": 7,
|
230 |
+
"id": "619fbe5e-e35d-430b-a617-9223ba6babd0",
|
231 |
+
"metadata": {
|
232 |
+
"scrolled": true
|
233 |
+
},
|
234 |
+
"outputs": [],
|
235 |
+
"source": [
|
236 |
+
"from huggingface_hub import create_inference_endpoint\n",
|
237 |
+
"from huggingface_hub import get_inference_endpoint\n",
|
238 |
+
"\n",
|
239 |
+
"\n",
|
240 |
+
"def get_my_endpoint():\n",
|
241 |
+
" name = f\"prompt-order-experiment\"\n",
|
242 |
+
" namespace='HF-test-lab'\n",
|
243 |
+
" try:\n",
|
244 |
+
" endpoint = get_inference_endpoint(name, namespace=namespace)\n",
|
245 |
+
" endpoint.wait()\n",
|
246 |
+
" except:\n",
|
247 |
+
" # Custom Docker image details\n",
|
248 |
+
" custom_image = {\n",
|
249 |
+
" \"health_route\": \"/health\",\n",
|
250 |
+
" \"url\": \"ghcr.io/huggingface/text-generation-inference:sha-caff779\", # Needs to be >=2.4.2 to get ordering of json outputs\n",
|
251 |
+
" \"env\": {\n",
|
252 |
+
" \"LORA_ADAPTERS\": LORAS_STRING,\n",
|
253 |
+
" \"MAX_BATCH_PREFILL_TOKENS\": str(20*INPUT_TOKENS),\n",
|
254 |
+
" \"MAX_INPUT_TOKENS\": str(INPUT_TOKENS), \n",
|
255 |
+
" \"MAX_TOTAL_TOKENS\": str(INPUT_TOKENS + OUTPUT_TOKENS), \n",
|
256 |
+
" \"DISABLE_CUSTOM_KERNELS\": 'false',\n",
|
257 |
+
" \"MODEL_ID\": \"/repository\"\n",
|
258 |
+
" },\n",
|
259 |
+
" }\n",
|
260 |
+
" \n",
|
261 |
+
" secrets = {\n",
|
262 |
+
" \"HF_TOKEN\": get_token()\n",
|
263 |
+
" }\n",
|
264 |
+
" \n",
|
265 |
+
" # Creating the inference endpoint\n",
|
266 |
+
" endpoint = create_inference_endpoint(\n",
|
267 |
+
" name=name,\n",
|
268 |
+
" namespace=namespace,\n",
|
269 |
+
" repository='mistralai/Mistral-7B-Instruct-v0.3',\n",
|
270 |
+
" framework=\"pytorch\",\n",
|
271 |
+
" accelerator=\"gpu\",\n",
|
272 |
+
" instance_size=\"x1\",\n",
|
273 |
+
" instance_type=\"nvidia-l4\",\n",
|
274 |
+
" region=\"us-east-1\",\n",
|
275 |
+
" vendor=\"aws\",\n",
|
276 |
+
" min_replica=8,\n",
|
277 |
+
" max_replica=8,\n",
|
278 |
+
" task=\"text-generation\",\n",
|
279 |
+
" custom_image=custom_image,\n",
|
280 |
+
" secrets=secrets\n",
|
281 |
+
" )\n",
|
282 |
+
" # endpoint.wait()\n",
|
283 |
+
" \n",
|
284 |
+
" print(\"Your model is ready to use!\")\n",
|
285 |
+
" endpoint.wait()\n",
|
286 |
+
" return endpoint"
|
287 |
+
]
|
288 |
+
},
|
289 |
+
{
|
290 |
+
"cell_type": "code",
|
291 |
+
"execution_count": 8,
|
292 |
+
"id": "000b907a-224d-4dbf-aa0d-e0dbee1b8787",
|
293 |
+
"metadata": {},
|
294 |
+
"outputs": [
|
295 |
+
{
|
296 |
+
"name": "stdout",
|
297 |
+
"output_type": "stream",
|
298 |
+
"text": [
|
299 |
+
"Your model is ready to use!\n",
|
300 |
+
"CPU times: user 22.3 ms, sys: 7.64 ms, total: 30 ms\n",
|
301 |
+
"Wall time: 2.07 s\n"
|
302 |
+
]
|
303 |
+
}
|
304 |
+
],
|
305 |
+
"source": [
|
306 |
+
"%%time\n",
|
307 |
+
"endpoint = get_my_endpoint()"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"cell_type": "markdown",
|
312 |
+
"id": "5708f348-c11e-4b66-aeff-93f5ec08ab49",
|
313 |
+
"metadata": {},
|
314 |
+
"source": [
|
315 |
+
"## Manual Evaluation\n",
|
316 |
+
"Since we havent seen our models in use yet, its a good time to check them out!"
|
317 |
+
]
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"cell_type": "markdown",
|
321 |
+
"id": "328dd842-eaa4-470c-b761-7c403b453321",
|
322 |
+
"metadata": {},
|
323 |
+
"source": [
|
324 |
+
"### Reasoning Final Answer\n",
|
325 |
+
"In both mistral and gpt3-5 we should see the **Reasoning** first and then the **Final Answer** in the prompt and the responses."
|
326 |
+
]
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"cell_type": "code",
|
330 |
+
"execution_count": 9,
|
331 |
+
"id": "47af6191-7765-4047-bd8f-64aadb08434e",
|
332 |
+
"metadata": {},
|
333 |
+
"outputs": [
|
334 |
+
{
|
335 |
+
"data": {
|
336 |
+
"text/plain": [
|
337 |
+
"'<s>[INST] Answer the Question and include your Reasoning and the Final Answer in a json like: {\"Reasoning: \"...\", \"Final Answer\": \"x\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\\nQuestion: What are busses used for?\\nAnswer Choices: (a) Protective shelter (b) Transporting humans (c) Help other species benefit (d) Transporting airplanes (e) A backbone (f) Communication (g) Safe operation (h) Safe driving[/INST]'"
|
338 |
+
]
|
339 |
+
},
|
340 |
+
"execution_count": 9,
|
341 |
+
"metadata": {},
|
342 |
+
"output_type": "execute_result"
|
343 |
+
}
|
344 |
+
],
|
345 |
+
"source": [
|
346 |
+
"key = 'RFA-mistral'\n",
|
347 |
+
"user_prompt_RFA = df.iloc[0][experiments[key]['column']]\n",
|
348 |
+
"user_prompt_RFA"
|
349 |
+
]
|
350 |
+
},
|
351 |
+
{
|
352 |
+
"cell_type": "code",
|
353 |
+
"execution_count": 10,
|
354 |
+
"id": "1f976218-f33c-4db3-9797-3935e121e6b2",
|
355 |
+
"metadata": {
|
356 |
+
"scrolled": true
|
357 |
+
},
|
358 |
+
"outputs": [
|
359 |
+
{
|
360 |
+
"data": {
|
361 |
+
"text/plain": [
|
362 |
+
"'{\"Reasoning\": \"Busses are primarily used for transporting humans, so the correct answer is (b) Transporting humans. The other options are either incorrect (a, c, d, e, f, g, h) or not specific enough to the function of a bus (a, c, e, f, g, h).\", \"Final Answer\": \"b\"}'"
|
363 |
+
]
|
364 |
+
},
|
365 |
+
"execution_count": 10,
|
366 |
+
"metadata": {},
|
367 |
+
"output_type": "execute_result"
|
368 |
+
}
|
369 |
+
],
|
370 |
+
"source": [
|
371 |
+
"response = endpoint.client.text_generation(\n",
|
372 |
+
" prompt=user_prompt_RFA,\n",
|
373 |
+
" max_new_tokens=OUTPUT_TOKENS,\n",
|
374 |
+
" adapter_id=experiments[key]['lora'],\n",
|
375 |
+
" grammar={\"type\": \"json\", \"value\": experiments[key]['pydantic'].schema()},\n",
|
376 |
+
")\n",
|
377 |
+
"response"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"cell_type": "code",
|
382 |
+
"execution_count": 11,
|
383 |
+
"id": "222e33b7-0158-44f8-8848-da5318e699b4",
|
384 |
+
"metadata": {
|
385 |
+
"scrolled": true
|
386 |
+
},
|
387 |
+
"outputs": [
|
388 |
+
{
|
389 |
+
"data": {
|
390 |
+
"text/plain": [
|
391 |
+
"'{\"Reasoning\": \"Busses are primarily used for transporting humans, especially in urban areas, schools, and tourist destinations. They provide a means of public transportation, making it easier for people to travel to various locations without the need for personal vehicles. Therefore, the correct answer is (b) transporting humans.\", \"Final Answer\": \"b\"}'"
|
392 |
+
]
|
393 |
+
},
|
394 |
+
"execution_count": 11,
|
395 |
+
"metadata": {},
|
396 |
+
"output_type": "execute_result"
|
397 |
+
}
|
398 |
+
],
|
399 |
+
"source": [
|
400 |
+
"key = 'RFA-gpt3-5'\n",
|
401 |
+
"response = endpoint.client.text_generation(\n",
|
402 |
+
" prompt=user_prompt_RFA,\n",
|
403 |
+
" max_new_tokens=575,\n",
|
404 |
+
" adapter_id=experiments[key]['lora'],\n",
|
405 |
+
" grammar={\"type\": \"json\", \"value\": experiments[key]['pydantic'].schema()},\n",
|
406 |
+
")\n",
|
407 |
+
"response"
|
408 |
+
]
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"cell_type": "markdown",
|
412 |
+
"id": "138fd46d-896f-4b4b-90d3-d3fd7075f149",
|
413 |
+
"metadata": {},
|
414 |
+
"source": [
|
415 |
+
"### Final Answer Reasoning \n",
|
416 |
+
"In both mistral and gpt3-5 we should see the **Final Answer** first and then the **Reasoning** in the prompt and the responses."
|
417 |
+
]
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"cell_type": "code",
|
421 |
+
"execution_count": 12,
|
422 |
+
"id": "ec3e574b-f63f-4513-a6ae-335136543a8c",
|
423 |
+
"metadata": {},
|
424 |
+
"outputs": [
|
425 |
+
{
|
426 |
+
"data": {
|
427 |
+
"text/plain": [
|
428 |
+
"'<s>[INST] Answer the Question and include your Final Answer and the Reasoning in a json like: {\"Final Answer\": \"x\", \"Reasoning: \"...\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\\nQuestion: What are busses used for?\\nAnswer Choices: (a) Protective shelter (b) Transporting humans (c) Help other species benefit (d) Transporting airplanes (e) A backbone (f) Communication (g) Safe operation (h) Safe driving[/INST]'"
|
429 |
+
]
|
430 |
+
},
|
431 |
+
"execution_count": 12,
|
432 |
+
"metadata": {},
|
433 |
+
"output_type": "execute_result"
|
434 |
+
}
|
435 |
+
],
|
436 |
+
"source": [
|
437 |
+
"key = 'FAR-gpt3-5'\n",
|
438 |
+
"user_prompt_FAR = df.iloc[0][experiments[key]['column']]\n",
|
439 |
+
"user_prompt_FAR"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
{
|
443 |
+
"cell_type": "code",
|
444 |
+
"execution_count": 13,
|
445 |
+
"id": "24f30a15-5ec0-4f26-b32f-b4ccb429e6f9",
|
446 |
+
"metadata": {
|
447 |
+
"scrolled": true
|
448 |
+
},
|
449 |
+
"outputs": [
|
450 |
+
{
|
451 |
+
"data": {
|
452 |
+
"text/plain": [
|
453 |
+
"'{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are primarily used for transporting humans, especially in urban areas, to facilitate public transportation. They provide a means of transportation for a large number of people at once, reducing the number of vehicles on the road and helping to alleviate traffic congestion. They also serve as a protective shelter for passengers, shielding them from the elements during travel.\"}'"
|
454 |
+
]
|
455 |
+
},
|
456 |
+
"execution_count": 13,
|
457 |
+
"metadata": {},
|
458 |
+
"output_type": "execute_result"
|
459 |
+
}
|
460 |
+
],
|
461 |
+
"source": [
|
462 |
+
"response = endpoint.client.text_generation(\n",
|
463 |
+
" prompt=user_prompt_FAR,\n",
|
464 |
+
" max_new_tokens=575,\n",
|
465 |
+
" adapter_id=experiments[key]['lora'],\n",
|
466 |
+
" grammar={\"type\": \"json\", \"value\": experiments[key]['pydantic'].schema()},\n",
|
467 |
+
")\n",
|
468 |
+
"response"
|
469 |
+
]
|
470 |
+
},
|
471 |
+
{
|
472 |
+
"cell_type": "code",
|
473 |
+
"execution_count": 14,
|
474 |
+
"id": "32536844-211d-4856-983c-d5787734d420",
|
475 |
+
"metadata": {
|
476 |
+
"scrolled": true
|
477 |
+
},
|
478 |
+
"outputs": [
|
479 |
+
{
|
480 |
+
"data": {
|
481 |
+
"text/plain": [
|
482 |
+
"'{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are primarily used for transporting humans. While they can provide a protective shelter and ensure safe operation, they are not used for transporting airplanes, helping other species benefit, serving as a backbone, or being used for communication.\"}'"
|
483 |
+
]
|
484 |
+
},
|
485 |
+
"execution_count": 14,
|
486 |
+
"metadata": {},
|
487 |
+
"output_type": "execute_result"
|
488 |
+
}
|
489 |
+
],
|
490 |
+
"source": [
|
491 |
+
"key = 'FAR-mistral'\n",
|
492 |
+
"response = endpoint.client.text_generation(\n",
|
493 |
+
" prompt=user_prompt_FAR,\n",
|
494 |
+
" max_new_tokens=575,\n",
|
495 |
+
" adapter_id=experiments[key]['lora'],\n",
|
496 |
+
" grammar={\"type\": \"json\", \"value\": experiments[key]['pydantic'].schema()},\n",
|
497 |
+
")\n",
|
498 |
+
"response"
|
499 |
+
]
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"cell_type": "markdown",
|
503 |
+
"id": "5482cf12-880c-4112-ba87-059a03a3f466",
|
504 |
+
"metadata": {},
|
505 |
+
"source": [
|
506 |
+
"### Final Answer \n",
|
507 |
+
"Here we should juse see the **Final Answer** and no **Reasoning**."
|
508 |
+
]
|
509 |
+
},
|
510 |
+
{
|
511 |
+
"cell_type": "code",
|
512 |
+
"execution_count": 15,
|
513 |
+
"id": "71a9f634-319c-40c2-8f66-18e282732320",
|
514 |
+
"metadata": {},
|
515 |
+
"outputs": [
|
516 |
+
{
|
517 |
+
"data": {
|
518 |
+
"text/plain": [
|
519 |
+
"'<s>[INST] Answer the Question and include your Final Answer in a json like: {\"Final Answer\": \"x\"} where x is a letter that corresponds to the answer choice which is a letter between a and h.\\nQuestion: What are busses used for?\\nAnswer Choices: (a) Protective shelter (b) Transporting humans (c) Help other species benefit (d) Transporting airplanes (e) A backbone (f) Communication (g) Safe operation (h) Safe driving[/INST]'"
|
520 |
+
]
|
521 |
+
},
|
522 |
+
"execution_count": 15,
|
523 |
+
"metadata": {},
|
524 |
+
"output_type": "execute_result"
|
525 |
+
}
|
526 |
+
],
|
527 |
+
"source": [
|
528 |
+
"key = 'FA'\n",
|
529 |
+
"user_prompt_FA = df.iloc[0][experiments[key]['column']]\n",
|
530 |
+
"user_prompt_FA"
|
531 |
+
]
|
532 |
+
},
|
533 |
+
{
|
534 |
+
"cell_type": "code",
|
535 |
+
"execution_count": 16,
|
536 |
+
"id": "1cded37d-b907-4f4d-9b8c-c2167c6ba213",
|
537 |
+
"metadata": {
|
538 |
+
"scrolled": true
|
539 |
+
},
|
540 |
+
"outputs": [
|
541 |
+
{
|
542 |
+
"data": {
|
543 |
+
"text/plain": [
|
544 |
+
"'{\"Final Answer\": \"b\"}'"
|
545 |
+
]
|
546 |
+
},
|
547 |
+
"execution_count": 16,
|
548 |
+
"metadata": {},
|
549 |
+
"output_type": "execute_result"
|
550 |
+
}
|
551 |
+
],
|
552 |
+
"source": [
|
553 |
+
"response = endpoint.client.text_generation(\n",
|
554 |
+
" prompt=user_prompt_FA,\n",
|
555 |
+
" max_new_tokens=575,\n",
|
556 |
+
" adapter_id=experiments[key]['lora'],\n",
|
557 |
+
")\n",
|
558 |
+
"response"
|
559 |
+
]
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"cell_type": "markdown",
|
563 |
+
"id": "122b8563-3220-43ac-9e0d-ef84ebcbb9e1",
|
564 |
+
"metadata": {},
|
565 |
+
"source": [
|
566 |
+
"## Evaluation Loop\n",
|
567 |
+
"I used 20x the prefill than the input and 8 replicas so I should capacity for ~160 parallel requests. Im only using 128 but it should be pretty fast."
|
568 |
+
]
|
569 |
+
},
|
570 |
+
{
|
571 |
+
"cell_type": "code",
|
572 |
+
"execution_count": 17,
|
573 |
+
"id": "fbc69ea7-e2ca-4405-82f0-5af95375ec88",
|
574 |
+
"metadata": {},
|
575 |
+
"outputs": [],
|
576 |
+
"source": [
|
577 |
+
"import nest_asyncio\n",
|
578 |
+
"import asyncio\n",
|
579 |
+
"from transformers import AutoTokenizer\n",
|
580 |
+
"from tqdm.auto import tqdm\n",
|
581 |
+
"\n",
|
582 |
+
"# Allow nested event loops in Jupyter\n",
|
583 |
+
"nest_asyncio.apply()\n",
|
584 |
+
"\n",
|
585 |
+
"\n",
|
586 |
+
"# Semaphore to limit concurrency\n",
|
587 |
+
"CONCURRENCY_LIMIT = 100 \n",
|
588 |
+
"MAX_NEW_TOKENS = OUTPUT_TOKENS\n",
|
589 |
+
"semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT)\n",
|
590 |
+
"\n",
|
591 |
+
"# Progress bar\n",
|
592 |
+
"progress_bar = None # Global to allow updates from within async functions\n",
|
593 |
+
"\n",
|
594 |
+
"# Function to send asynchronous requests to the endpoint\n",
|
595 |
+
"async def fetch_response_async(async_client, prompt, lora_id, pydantic_model):\n",
|
596 |
+
" async with semaphore: # Limit the number of concurrent requests\n",
|
597 |
+
" response = await async_client.text_generation(\n",
|
598 |
+
" prompt=prompt,\n",
|
599 |
+
" max_new_tokens=MAX_NEW_TOKENS,\n",
|
600 |
+
" adapter_id=lora_id if lora_id else None,\n",
|
601 |
+
" grammar={\"type\": \"json\", \"value\": pydantic_model.schema()}\n",
|
602 |
+
" )\n",
|
603 |
+
" progress_bar.update(1) # Update the progress bar when the request is complete\n",
|
604 |
+
" return response\n",
|
605 |
+
"\n",
|
606 |
+
"# Function to process a single conversation type asynchronously\n",
|
607 |
+
"async def process_conversation_type(conversation_type, model_info, df, tokenizer, async_client):\n",
|
608 |
+
" response_column = f\"responses_{conversation_type.replace('-','_')}\"\n",
|
609 |
+
" responses = [] # Temporary list to hold responses for the current conversation type\n",
|
610 |
+
"\n",
|
611 |
+
" tasks = []\n",
|
612 |
+
" for _, item in df.iterrows():\n",
|
613 |
+
" prompt = item.get(model_info[\"column\"])\n",
|
614 |
+
" tasks.append(fetch_response_async(async_client, prompt, model_info[\"lora\"], model_info[\"pydantic\"]))\n",
|
615 |
+
"\n",
|
616 |
+
" # Wait for all tasks to complete\n",
|
617 |
+
" responses = await asyncio.gather(*tasks)\n",
|
618 |
+
"\n",
|
619 |
+
" # If responses are strings, use them directly; otherwise, extract 'generated_text'\n",
|
620 |
+
" try:\n",
|
621 |
+
" df[response_column] = [resp[\"generated_text\"] for resp in responses]\n",
|
622 |
+
" except TypeError: # Fallback in case responses are raw strings\n",
|
623 |
+
" df[response_column] = responses\n",
|
624 |
+
"\n",
|
625 |
+
"# Main function to handle all conversation types\n",
|
626 |
+
"async def main(df, models, tokenizer, async_client):\n",
|
627 |
+
" global progress_bar\n",
|
628 |
+
" total_requests = len(df) * len(models) # Total number of requests across all conversation types\n",
|
629 |
+
" progress_bar = tqdm(total=total_requests, desc=\"Processing requests\")\n",
|
630 |
+
"\n",
|
631 |
+
" tasks = []\n",
|
632 |
+
" for conversation_type, model_info in models.items():\n",
|
633 |
+
" tasks.append(process_conversation_type(conversation_type, model_info, df, tokenizer, async_client))\n",
|
634 |
+
" await asyncio.gather(*tasks)\n",
|
635 |
+
"\n",
|
636 |
+
" progress_bar.close() # Close the progress bar when done\n",
|
637 |
+
"\n",
|
638 |
+
"# Define parameters and run\n",
|
639 |
+
"# await main(df, experiments, tokenizer, endpoint.async_client)"
|
640 |
+
]
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"cell_type": "markdown",
|
644 |
+
"id": "9662adde-4ea4-4254-a6be-c619ed9557c8",
|
645 |
+
"metadata": {},
|
646 |
+
"source": [
|
647 |
+
"This is the same as above but with a couple nice features like time-out in case you run into any issues."
|
648 |
+
]
|
649 |
+
},
|
650 |
+
{
|
651 |
+
"cell_type": "code",
|
652 |
+
"execution_count": 18,
|
653 |
+
"id": "e2671c78-6cd1-4bb7-aa04-b695bfb02115",
|
654 |
+
"metadata": {},
|
655 |
+
"outputs": [
|
656 |
+
{
|
657 |
+
"data": {
|
658 |
+
"application/vnd.jupyter.widget-view+json": {
|
659 |
+
"model_id": "f29c84d7a4654e9aa6af47cfe1bbde39",
|
660 |
+
"version_major": 2,
|
661 |
+
"version_minor": 0
|
662 |
+
},
|
663 |
+
"text/plain": [
|
664 |
+
"Processing requests: 0%| | 0/10098 [00:00<?, ?it/s]"
|
665 |
+
]
|
666 |
+
},
|
667 |
+
"metadata": {},
|
668 |
+
"output_type": "display_data"
|
669 |
+
}
|
670 |
+
],
|
671 |
+
"source": [
|
672 |
+
"import nest_asyncio\n",
|
673 |
+
"import asyncio\n",
|
674 |
+
"from transformers import AutoTokenizer\n",
|
675 |
+
"from tqdm.auto import tqdm\n",
|
676 |
+
"import time\n",
|
677 |
+
"\n",
|
678 |
+
"# Allow nested event loops in Jupyter\n",
|
679 |
+
"nest_asyncio.apply()\n",
|
680 |
+
"\n",
|
681 |
+
"# Semaphore to limit concurrency\n",
|
682 |
+
"CONCURRENCY_LIMIT = 100 \n",
|
683 |
+
"MAX_NEW_TOKENS = OUTPUT_TOKENS\n",
|
684 |
+
"semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT)\n",
|
685 |
+
"\n",
|
686 |
+
"# Progress bar\n",
|
687 |
+
"progress_bar = None # Global to allow updates from within async functions\n",
|
688 |
+
"\n",
|
689 |
+
"# Retry parameters\n",
|
690 |
+
"MAX_RETRIES = 3\n",
|
691 |
+
"BACKOFF_TIME = 2 # Time in seconds before retrying\n",
|
692 |
+
"\n",
|
693 |
+
"# Function to send asynchronous requests to the endpoint with retries\n",
|
694 |
+
"async def fetch_response_async(async_client, prompt, lora_id, pydantic_model):\n",
|
695 |
+
" retries = 0\n",
|
696 |
+
" while retries < MAX_RETRIES:\n",
|
697 |
+
" try:\n",
|
698 |
+
" async with semaphore: # Limit the number of concurrent requests\n",
|
699 |
+
" response = await async_client.text_generation(\n",
|
700 |
+
" prompt=prompt,\n",
|
701 |
+
" max_new_tokens=MAX_NEW_TOKENS,\n",
|
702 |
+
" adapter_id=lora_id if lora_id else None,\n",
|
703 |
+
" grammar={\"type\": \"json\", \"value\": pydantic_model.schema()}\n",
|
704 |
+
" )\n",
|
705 |
+
" progress_bar.update(1) # Update the progress bar when the request is complete\n",
|
706 |
+
" return response\n",
|
707 |
+
" except Exception as e:\n",
|
708 |
+
" retries += 1\n",
|
709 |
+
" if retries >= MAX_RETRIES:\n",
|
710 |
+
" raise e # If we've exhausted retries, re-raise the error\n",
|
711 |
+
" else:\n",
|
712 |
+
" print(f\"Error: {e}. Retrying... ({retries}/{MAX_RETRIES})\")\n",
|
713 |
+
" await asyncio.sleep(BACKOFF_TIME) # Wait before retrying\n",
|
714 |
+
"\n",
|
715 |
+
"# Function to process a single conversation type asynchronously\n",
|
716 |
+
"async def process_conversation_type(conversation_type, model_info, df, tokenizer, async_client):\n",
|
717 |
+
" response_column = f\"responses_{conversation_type.replace('-','_')}\"\n",
|
718 |
+
" responses = [] # Temporary list to hold responses for the current conversation type\n",
|
719 |
+
"\n",
|
720 |
+
" tasks = []\n",
|
721 |
+
" for _, item in df.iterrows():\n",
|
722 |
+
" prompt = item.get(model_info[\"column\"])\n",
|
723 |
+
" tasks.append(fetch_response_async(async_client, prompt, model_info[\"lora\"], model_info[\"pydantic\"]))\n",
|
724 |
+
"\n",
|
725 |
+
" # Wait for all tasks to complete\n",
|
726 |
+
" responses = await asyncio.gather(*tasks)\n",
|
727 |
+
"\n",
|
728 |
+
" # If responses are strings, use them directly; otherwise, extract 'generated_text'\n",
|
729 |
+
" try:\n",
|
730 |
+
" df[response_column] = [resp[\"generated_text\"] for resp in responses]\n",
|
731 |
+
" except TypeError: # Fallback in case responses are raw strings\n",
|
732 |
+
" df[response_column] = responses\n",
|
733 |
+
"\n",
|
734 |
+
"# Main function to handle all conversation types\n",
|
735 |
+
"async def main(df, models, tokenizer, async_client):\n",
|
736 |
+
" global progress_bar\n",
|
737 |
+
" total_requests = len(df) * len(models) # Total number of requests across all conversation types\n",
|
738 |
+
" progress_bar = tqdm(total=total_requests, desc=\"Processing requests\")\n",
|
739 |
+
"\n",
|
740 |
+
" tasks = []\n",
|
741 |
+
" for conversation_type, model_info in models.items():\n",
|
742 |
+
" tasks.append(process_conversation_type(conversation_type, model_info, df, tokenizer, async_client))\n",
|
743 |
+
" await asyncio.gather(*tasks)\n",
|
744 |
+
"\n",
|
745 |
+
" progress_bar.close() # Close the progress bar when done\n",
|
746 |
+
"\n",
|
747 |
+
"# Define parameters and run\n",
|
748 |
+
"await main(df, experiments, tokenizer, endpoint.async_client)\n"
|
749 |
+
]
|
750 |
+
},
|
751 |
+
{
|
752 |
+
"cell_type": "markdown",
|
753 |
+
"id": "d562e0b4-96ae-4259-925d-4d62b8c49641",
|
754 |
+
"metadata": {},
|
755 |
+
"source": [
|
756 |
+
"It took `00:10:43`. Not bad! That should be around `$1.14` total at `$0.80/gpu/hr`."
|
757 |
+
]
|
758 |
+
},
|
759 |
+
{
|
760 |
+
"cell_type": "code",
|
761 |
+
"execution_count": 19,
|
762 |
+
"id": "8f81466e-80fb-4915-9c68-dfbe168e052b",
|
763 |
+
"metadata": {},
|
764 |
+
"outputs": [
|
765 |
+
{
|
766 |
+
"data": {
|
767 |
+
"text/plain": [
|
768 |
+
"InferenceEndpoint(name='prompt-order-experiment', namespace='HF-test-lab', repository='mistralai/Mistral-7B-Instruct-v0.3', status='paused', url=None)"
|
769 |
+
]
|
770 |
+
},
|
771 |
+
"execution_count": 19,
|
772 |
+
"metadata": {},
|
773 |
+
"output_type": "execute_result"
|
774 |
+
}
|
775 |
+
],
|
776 |
+
"source": [
|
777 |
+
"endpoint.pause()"
|
778 |
+
]
|
779 |
+
},
|
780 |
+
{
|
781 |
+
"cell_type": "code",
|
782 |
+
"execution_count": 20,
|
783 |
+
"id": "f5a79dad-2475-4324-8a2b-77b33e9c0822",
|
784 |
+
"metadata": {},
|
785 |
+
"outputs": [],
|
786 |
+
"source": [
|
787 |
+
"df_backup = df.copy()"
|
788 |
+
]
|
789 |
+
},
|
790 |
+
{
|
791 |
+
"cell_type": "code",
|
792 |
+
"execution_count": 21,
|
793 |
+
"id": "3473d555-927a-49ab-8e15-9097ed455c48",
|
794 |
+
"metadata": {},
|
795 |
+
"outputs": [
|
796 |
+
{
|
797 |
+
"data": {
|
798 |
+
"text/html": [
|
799 |
+
"<div>\n",
|
800 |
+
"<style scoped>\n",
|
801 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
802 |
+
" vertical-align: middle;\n",
|
803 |
+
" }\n",
|
804 |
+
"\n",
|
805 |
+
" .dataframe tbody tr th {\n",
|
806 |
+
" vertical-align: top;\n",
|
807 |
+
" }\n",
|
808 |
+
"\n",
|
809 |
+
" .dataframe thead th {\n",
|
810 |
+
" text-align: right;\n",
|
811 |
+
" }\n",
|
812 |
+
"</style>\n",
|
813 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
814 |
+
" <thead>\n",
|
815 |
+
" <tr style=\"text-align: right;\">\n",
|
816 |
+
" <th></th>\n",
|
817 |
+
" <th>topic</th>\n",
|
818 |
+
" <th>question_text</th>\n",
|
819 |
+
" <th>answer_key</th>\n",
|
820 |
+
" <th>gpt3_5_reasoning</th>\n",
|
821 |
+
" <th>mistral_reasoning</th>\n",
|
822 |
+
" <th>answer_choices</th>\n",
|
823 |
+
" <th>user_prompt</th>\n",
|
824 |
+
" <th>user_prompt_RFA</th>\n",
|
825 |
+
" <th>conversation_RFA_gpt3_5</th>\n",
|
826 |
+
" <th>conversation_RFA_mistral</th>\n",
|
827 |
+
" <th>...</th>\n",
|
828 |
+
" <th>conversation_FAR_gpt3_5</th>\n",
|
829 |
+
" <th>conversation_FAR_mistral</th>\n",
|
830 |
+
" <th>user_prompt_FA</th>\n",
|
831 |
+
" <th>conversation_FA</th>\n",
|
832 |
+
" <th>responses_RFA_mistral</th>\n",
|
833 |
+
" <th>responses_FAR_mistral</th>\n",
|
834 |
+
" <th>responses_RFA_gpt3_5</th>\n",
|
835 |
+
" <th>responses_FAR_gpt3_5</th>\n",
|
836 |
+
" <th>responses_FA</th>\n",
|
837 |
+
" <th>responses_base</th>\n",
|
838 |
+
" </tr>\n",
|
839 |
+
" </thead>\n",
|
840 |
+
" <tbody>\n",
|
841 |
+
" <tr>\n",
|
842 |
+
" <th>0</th>\n",
|
843 |
+
" <td>Transportation</td>\n",
|
844 |
+
" <td>What are busses used for?</td>\n",
|
845 |
+
" <td>b</td>\n",
|
846 |
+
" <td>a) Protective shelter: This option is incorrec...</td>\n",
|
847 |
+
" <td>1. Start by reading the question carefully: \"C...</td>\n",
|
848 |
+
" <td>(a) Protective shelter (b) Transporting humans...</td>\n",
|
849 |
+
" <td>Question: What are busses used for?\\nAnswer Ch...</td>\n",
|
850 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
851 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
852 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
853 |
+
" <td>...</td>\n",
|
854 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
855 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
856 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
857 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
858 |
+
" <td>{\"Reasoning\": \"Busses are primarily used for t...</td>\n",
|
859 |
+
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
860 |
+
" <td>{\"Reasoning\": \"Busses are primarily used for t...</td>\n",
|
861 |
+
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
862 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
863 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
864 |
+
" </tr>\n",
|
865 |
+
" <tr>\n",
|
866 |
+
" <th>1</th>\n",
|
867 |
+
" <td>Climate change</td>\n",
|
868 |
+
" <td>Which of the following does not contribute to ...</td>\n",
|
869 |
+
" <td>g</td>\n",
|
870 |
+
" <td>a) Nucleus of a cell: This option is not relat...</td>\n",
|
871 |
+
" <td>To solve this question, let's first understand...</td>\n",
|
872 |
+
" <td>(a) Nucleus of a cell (b) Flying in a plane (c...</td>\n",
|
873 |
+
" <td>Question: Which of the following does not cont...</td>\n",
|
874 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
875 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
876 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
877 |
+
" <td>...</td>\n",
|
878 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
879 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
880 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
881 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
882 |
+
" <td>{\"Reasoning\": \"Global warming is primarily cau...</td>\n",
|
883 |
+
" <td>{\"Final Answer\": \"a\", \"Reasoning\": \"The nucleu...</td>\n",
|
884 |
+
" <td>{\"Reasoning\": \"The nucleus of a cell (option a...</td>\n",
|
885 |
+
" <td>{\"Final Answer\": \"a\", \"Reasoning\": \"The nucleu...</td>\n",
|
886 |
+
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
887 |
+
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
888 |
+
" </tr>\n",
|
889 |
+
" <tr>\n",
|
890 |
+
" <th>2</th>\n",
|
891 |
+
" <td>Photography</td>\n",
|
892 |
+
" <td>What uses electrical energy converted from che...</td>\n",
|
893 |
+
" <td>b</td>\n",
|
894 |
+
" <td>a) Sunlight: Sunlight is a form of energy that...</td>\n",
|
895 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
896 |
+
" <td>(a) Sunlight (b) Cameras (c) Cells (d) Buses (...</td>\n",
|
897 |
+
" <td>Question: What uses electrical energy converte...</td>\n",
|
898 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
899 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
900 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
901 |
+
" <td>...</td>\n",
|
902 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
903 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
904 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
905 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
906 |
+
" <td>{\"Reasoning\": \"The question asks for an organi...</td>\n",
|
907 |
+
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"The proces...</td>\n",
|
908 |
+
" <td>{\"Reasoning\": \"The process of converting chemi...</td>\n",
|
909 |
+
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Bacteria u...</td>\n",
|
910 |
+
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
911 |
+
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
912 |
+
" </tr>\n",
|
913 |
+
" <tr>\n",
|
914 |
+
" <th>3</th>\n",
|
915 |
+
" <td>Microbiology</td>\n",
|
916 |
+
" <td>Bacteria causes what to be harmed?</td>\n",
|
917 |
+
" <td>a</td>\n",
|
918 |
+
" <td>Now, let's go through each option and explain ...</td>\n",
|
919 |
+
" <td>To answer this question correctly, let's follo...</td>\n",
|
920 |
+
" <td>(a) Plants (b) Electronics (c) Fossils (d) Hum...</td>\n",
|
921 |
+
" <td>Question: Bacteria causes what to be harmed?\\n...</td>\n",
|
922 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
923 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
924 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
925 |
+
" <td>...</td>\n",
|
926 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
927 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
928 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
929 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
930 |
+
" <td>{\"Reasoning\": \"Bacteria are microorganisms tha...</td>\n",
|
931 |
+
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
932 |
+
" <td>{ \"Reasoning\": \"Bacteria can cause harm to var...</td>\n",
|
933 |
+
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
934 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
935 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
936 |
+
" </tr>\n",
|
937 |
+
" <tr>\n",
|
938 |
+
" <th>4</th>\n",
|
939 |
+
" <td>Biology</td>\n",
|
940 |
+
" <td>Plants and snakes live _.?</td>\n",
|
941 |
+
" <td>a</td>\n",
|
942 |
+
" <td>b) Important habitats: This option is incorrec...</td>\n",
|
943 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
944 |
+
" <td>(a) Almost everywhere (b) Important habitats (...</td>\n",
|
945 |
+
" <td>Question: Plants and snakes live _.?\\nAnswer C...</td>\n",
|
946 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
947 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
948 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
949 |
+
" <td>...</td>\n",
|
950 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
951 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
952 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
953 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
954 |
+
" <td>{\"Reasoning\": \"The question asks about the liv...</td>\n",
|
955 |
+
" <td>{\"Final Answer\": \"a\", \"Reasoning\": \"Plants and...</td>\n",
|
956 |
+
" <td>{\"Reasoning\": \"The question asks about the rel...</td>\n",
|
957 |
+
" <td>{\"Final Answer\": \"f\", \"Reasoning\": \"Plants and...</td>\n",
|
958 |
+
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
959 |
+
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
960 |
+
" </tr>\n",
|
961 |
+
" <tr>\n",
|
962 |
+
" <th>...</th>\n",
|
963 |
+
" <td>...</td>\n",
|
964 |
+
" <td>...</td>\n",
|
965 |
+
" <td>...</td>\n",
|
966 |
+
" <td>...</td>\n",
|
967 |
+
" <td>...</td>\n",
|
968 |
+
" <td>...</td>\n",
|
969 |
+
" <td>...</td>\n",
|
970 |
+
" <td>...</td>\n",
|
971 |
+
" <td>...</td>\n",
|
972 |
+
" <td>...</td>\n",
|
973 |
+
" <td>...</td>\n",
|
974 |
+
" <td>...</td>\n",
|
975 |
+
" <td>...</td>\n",
|
976 |
+
" <td>...</td>\n",
|
977 |
+
" <td>...</td>\n",
|
978 |
+
" <td>...</td>\n",
|
979 |
+
" <td>...</td>\n",
|
980 |
+
" <td>...</td>\n",
|
981 |
+
" <td>...</td>\n",
|
982 |
+
" <td>...</td>\n",
|
983 |
+
" <td>...</td>\n",
|
984 |
+
" </tr>\n",
|
985 |
+
" <tr>\n",
|
986 |
+
" <th>1678</th>\n",
|
987 |
+
" <td>Biology</td>\n",
|
988 |
+
" <td>New resources required for creation can be red...</td>\n",
|
989 |
+
" <td>g</td>\n",
|
990 |
+
" <td>a) Mining: Mining involves extracting minerals...</td>\n",
|
991 |
+
" <td>1. Start by reading the question and options c...</td>\n",
|
992 |
+
" <td>(a) Mining (b) Mutations (c) Fossil fuels (d) ...</td>\n",
|
993 |
+
" <td>Question: New resources required for creation ...</td>\n",
|
994 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
995 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
996 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
997 |
+
" <td>...</td>\n",
|
998 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
999 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1000 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1001 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1002 |
+
" <td>{\"Reasoning\": \"The question asks for a way to ...</td>\n",
|
1003 |
+
" <td>{\"Final Answer\": \"g\", \"Reasoning\": \"Recycling ...</td>\n",
|
1004 |
+
" <td>{\"Reasoning\": \"Mining, fossil fuels, deforesta...</td>\n",
|
1005 |
+
" <td>{\"Final Answer\": \"g\", \"Reasoning\": \"Recycling ...</td>\n",
|
1006 |
+
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1007 |
+
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1008 |
+
" </tr>\n",
|
1009 |
+
" <tr>\n",
|
1010 |
+
" <th>1679</th>\n",
|
1011 |
+
" <td>Biology</td>\n",
|
1012 |
+
" <td>A drought dehydrates an entire what?</td>\n",
|
1013 |
+
" <td>d</td>\n",
|
1014 |
+
" <td>a) Body water: This option is incorrect becaus...</td>\n",
|
1015 |
+
" <td>(a) Watershred - This is not a scientific term...</td>\n",
|
1016 |
+
" <td>(a) Body water (b) Dried fruit (c) Bodily wate...</td>\n",
|
1017 |
+
" <td>Question: A drought dehydrates an entire what?...</td>\n",
|
1018 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1019 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1020 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1021 |
+
" <td>...</td>\n",
|
1022 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1023 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1024 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1025 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1026 |
+
" <td>{\"Reasoning\": \"The question asks for a term th...</td>\n",
|
1027 |
+
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"A drought ...</td>\n",
|
1028 |
+
" <td>{ \"Reasoning\": \"A drought is a prolonged perio...</td>\n",
|
1029 |
+
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"A drought ...</td>\n",
|
1030 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1031 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1032 |
+
" </tr>\n",
|
1033 |
+
" <tr>\n",
|
1034 |
+
" <th>1680</th>\n",
|
1035 |
+
" <td>Biology</td>\n",
|
1036 |
+
" <td>An animal requires ingestion to do what?</td>\n",
|
1037 |
+
" <td>e</td>\n",
|
1038 |
+
" <td>a) Aerobic capacity: This option is not logica...</td>\n",
|
1039 |
+
" <td>1. Read the question and options carefully: \"W...</td>\n",
|
1040 |
+
" <td>(a) Aerobic capacity (b) Die (c) Water conserv...</td>\n",
|
1041 |
+
" <td>Question: An animal requires ingestion to do w...</td>\n",
|
1042 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1043 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1044 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1045 |
+
" <td>...</td>\n",
|
1046 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1047 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1048 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1049 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1050 |
+
" <td>{\"Reasoning\": \"Ingestion is the process of tak...</td>\n",
|
1051 |
+
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
1052 |
+
" <td>{\"Reasoning\": \"Ingestion is the process of tak...</td>\n",
|
1053 |
+
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
1054 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1055 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1056 |
+
" </tr>\n",
|
1057 |
+
" <tr>\n",
|
1058 |
+
" <th>1681</th>\n",
|
1059 |
+
" <td>Biology</td>\n",
|
1060 |
+
" <td>Ultraviolet light can cause what?</td>\n",
|
1061 |
+
" <td>b</td>\n",
|
1062 |
+
" <td>a) Ultraviolet light does not cause heat energ...</td>\n",
|
1063 |
+
" <td>1. First, read the question and options carefu...</td>\n",
|
1064 |
+
" <td>(a) Heat energy (b) Skin cancer (c) Killing in...</td>\n",
|
1065 |
+
" <td>Question: Ultraviolet light can cause what?\\nA...</td>\n",
|
1066 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1067 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1068 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1069 |
+
" <td>...</td>\n",
|
1070 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1071 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1072 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1073 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1074 |
+
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1075 |
+
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Ultraviole...</td>\n",
|
1076 |
+
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1077 |
+
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Ultraviole...</td>\n",
|
1078 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1079 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1080 |
+
" </tr>\n",
|
1081 |
+
" <tr>\n",
|
1082 |
+
" <th>1682</th>\n",
|
1083 |
+
" <td>Physical activity</td>\n",
|
1084 |
+
" <td>What can increase a body's strength?</td>\n",
|
1085 |
+
" <td>c</td>\n",
|
1086 |
+
" <td>a) Four limbs: This option is not correct beca...</td>\n",
|
1087 |
+
" <td>(a) Communication: In this context, the questi...</td>\n",
|
1088 |
+
" <td>(a) Four limbs (b) Disease (c) Running (d) Bic...</td>\n",
|
1089 |
+
" <td>Question: What can increase a body's strength?...</td>\n",
|
1090 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1091 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1092 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1093 |
+
" <td>...</td>\n",
|
1094 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1095 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1096 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1097 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1098 |
+
" <td>{\"Reasoning\": \"The question asks for something...</td>\n",
|
1099 |
+
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1100 |
+
" <td>{\"Reasoning\": \"A body's strength is primarily ...</td>\n",
|
1101 |
+
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1102 |
+
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1103 |
+
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1104 |
+
" </tr>\n",
|
1105 |
+
" </tbody>\n",
|
1106 |
+
"</table>\n",
|
1107 |
+
"<p>1683 rows × 21 columns</p>\n",
|
1108 |
+
"</div>"
|
1109 |
+
],
|
1110 |
+
"text/plain": [
|
1111 |
+
" topic question_text \\\n",
|
1112 |
+
"0 Transportation What are busses used for? \n",
|
1113 |
+
"1 Climate change Which of the following does not contribute to ... \n",
|
1114 |
+
"2 Photography What uses electrical energy converted from che... \n",
|
1115 |
+
"3 Microbiology Bacteria causes what to be harmed? \n",
|
1116 |
+
"4 Biology Plants and snakes live _.? \n",
|
1117 |
+
"... ... ... \n",
|
1118 |
+
"1678 Biology New resources required for creation can be red... \n",
|
1119 |
+
"1679 Biology A drought dehydrates an entire what? \n",
|
1120 |
+
"1680 Biology An animal requires ingestion to do what? \n",
|
1121 |
+
"1681 Biology Ultraviolet light can cause what? \n",
|
1122 |
+
"1682 Physical activity What can increase a body's strength? \n",
|
1123 |
+
"\n",
|
1124 |
+
" answer_key gpt3_5_reasoning \\\n",
|
1125 |
+
"0 b a) Protective shelter: This option is incorrec... \n",
|
1126 |
+
"1 g a) Nucleus of a cell: This option is not relat... \n",
|
1127 |
+
"2 b a) Sunlight: Sunlight is a form of energy that... \n",
|
1128 |
+
"3 a Now, let's go through each option and explain ... \n",
|
1129 |
+
"4 a b) Important habitats: This option is incorrec... \n",
|
1130 |
+
"... ... ... \n",
|
1131 |
+
"1678 g a) Mining: Mining involves extracting minerals... \n",
|
1132 |
+
"1679 d a) Body water: This option is incorrect becaus... \n",
|
1133 |
+
"1680 e a) Aerobic capacity: This option is not logica... \n",
|
1134 |
+
"1681 b a) Ultraviolet light does not cause heat energ... \n",
|
1135 |
+
"1682 c a) Four limbs: This option is not correct beca... \n",
|
1136 |
+
"\n",
|
1137 |
+
" mistral_reasoning \\\n",
|
1138 |
+
"0 1. Start by reading the question carefully: \"C... \n",
|
1139 |
+
"1 To solve this question, let's first understand... \n",
|
1140 |
+
"2 1. Read the question and options carefully: Th... \n",
|
1141 |
+
"3 To answer this question correctly, let's follo... \n",
|
1142 |
+
"4 1. Read the question and options carefully: Th... \n",
|
1143 |
+
"... ... \n",
|
1144 |
+
"1678 1. Start by reading the question and options c... \n",
|
1145 |
+
"1679 (a) Watershred - This is not a scientific term... \n",
|
1146 |
+
"1680 1. Read the question and options carefully: \"W... \n",
|
1147 |
+
"1681 1. First, read the question and options carefu... \n",
|
1148 |
+
"1682 (a) Communication: In this context, the questi... \n",
|
1149 |
+
"\n",
|
1150 |
+
" answer_choices \\\n",
|
1151 |
+
"0 (a) Protective shelter (b) Transporting humans... \n",
|
1152 |
+
"1 (a) Nucleus of a cell (b) Flying in a plane (c... \n",
|
1153 |
+
"2 (a) Sunlight (b) Cameras (c) Cells (d) Buses (... \n",
|
1154 |
+
"3 (a) Plants (b) Electronics (c) Fossils (d) Hum... \n",
|
1155 |
+
"4 (a) Almost everywhere (b) Important habitats (... \n",
|
1156 |
+
"... ... \n",
|
1157 |
+
"1678 (a) Mining (b) Mutations (c) Fossil fuels (d) ... \n",
|
1158 |
+
"1679 (a) Body water (b) Dried fruit (c) Bodily wate... \n",
|
1159 |
+
"1680 (a) Aerobic capacity (b) Die (c) Water conserv... \n",
|
1160 |
+
"1681 (a) Heat energy (b) Skin cancer (c) Killing in... \n",
|
1161 |
+
"1682 (a) Four limbs (b) Disease (c) Running (d) Bic... \n",
|
1162 |
+
"\n",
|
1163 |
+
" user_prompt \\\n",
|
1164 |
+
"0 Question: What are busses used for?\\nAnswer Ch... \n",
|
1165 |
+
"1 Question: Which of the following does not cont... \n",
|
1166 |
+
"2 Question: What uses electrical energy converte... \n",
|
1167 |
+
"3 Question: Bacteria causes what to be harmed?\\n... \n",
|
1168 |
+
"4 Question: Plants and snakes live _.?\\nAnswer C... \n",
|
1169 |
+
"... ... \n",
|
1170 |
+
"1678 Question: New resources required for creation ... \n",
|
1171 |
+
"1679 Question: A drought dehydrates an entire what?... \n",
|
1172 |
+
"1680 Question: An animal requires ingestion to do w... \n",
|
1173 |
+
"1681 Question: Ultraviolet light can cause what?\\nA... \n",
|
1174 |
+
"1682 Question: What can increase a body's strength?... \n",
|
1175 |
+
"\n",
|
1176 |
+
" user_prompt_RFA \\\n",
|
1177 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1178 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1179 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1180 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1181 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1182 |
+
"... ... \n",
|
1183 |
+
"1678 <s>[INST] Answer the Question and include your... \n",
|
1184 |
+
"1679 <s>[INST] Answer the Question and include your... \n",
|
1185 |
+
"1680 <s>[INST] Answer the Question and include your... \n",
|
1186 |
+
"1681 <s>[INST] Answer the Question and include your... \n",
|
1187 |
+
"1682 <s>[INST] Answer the Question and include your... \n",
|
1188 |
+
"\n",
|
1189 |
+
" conversation_RFA_gpt3_5 \\\n",
|
1190 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1191 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1192 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1193 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1194 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1195 |
+
"... ... \n",
|
1196 |
+
"1678 <s>[INST] Answer the Question and include your... \n",
|
1197 |
+
"1679 <s>[INST] Answer the Question and include your... \n",
|
1198 |
+
"1680 <s>[INST] Answer the Question and include your... \n",
|
1199 |
+
"1681 <s>[INST] Answer the Question and include your... \n",
|
1200 |
+
"1682 <s>[INST] Answer the Question and include your... \n",
|
1201 |
+
"\n",
|
1202 |
+
" conversation_RFA_mistral ... \\\n",
|
1203 |
+
"0 <s>[INST] Answer the Question and include your... ... \n",
|
1204 |
+
"1 <s>[INST] Answer the Question and include your... ... \n",
|
1205 |
+
"2 <s>[INST] Answer the Question and include your... ... \n",
|
1206 |
+
"3 <s>[INST] Answer the Question and include your... ... \n",
|
1207 |
+
"4 <s>[INST] Answer the Question and include your... ... \n",
|
1208 |
+
"... ... ... \n",
|
1209 |
+
"1678 <s>[INST] Answer the Question and include your... ... \n",
|
1210 |
+
"1679 <s>[INST] Answer the Question and include your... ... \n",
|
1211 |
+
"1680 <s>[INST] Answer the Question and include your... ... \n",
|
1212 |
+
"1681 <s>[INST] Answer the Question and include your... ... \n",
|
1213 |
+
"1682 <s>[INST] Answer the Question and include your... ... \n",
|
1214 |
+
"\n",
|
1215 |
+
" conversation_FAR_gpt3_5 \\\n",
|
1216 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1217 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1218 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1219 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1220 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1221 |
+
"... ... \n",
|
1222 |
+
"1678 <s>[INST] Answer the Question and include your... \n",
|
1223 |
+
"1679 <s>[INST] Answer the Question and include your... \n",
|
1224 |
+
"1680 <s>[INST] Answer the Question and include your... \n",
|
1225 |
+
"1681 <s>[INST] Answer the Question and include your... \n",
|
1226 |
+
"1682 <s>[INST] Answer the Question and include your... \n",
|
1227 |
+
"\n",
|
1228 |
+
" conversation_FAR_mistral \\\n",
|
1229 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1230 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1231 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1232 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1233 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1234 |
+
"... ... \n",
|
1235 |
+
"1678 <s>[INST] Answer the Question and include your... \n",
|
1236 |
+
"1679 <s>[INST] Answer the Question and include your... \n",
|
1237 |
+
"1680 <s>[INST] Answer the Question and include your... \n",
|
1238 |
+
"1681 <s>[INST] Answer the Question and include your... \n",
|
1239 |
+
"1682 <s>[INST] Answer the Question and include your... \n",
|
1240 |
+
"\n",
|
1241 |
+
" user_prompt_FA \\\n",
|
1242 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1243 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1244 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1245 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1246 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1247 |
+
"... ... \n",
|
1248 |
+
"1678 <s>[INST] Answer the Question and include your... \n",
|
1249 |
+
"1679 <s>[INST] Answer the Question and include your... \n",
|
1250 |
+
"1680 <s>[INST] Answer the Question and include your... \n",
|
1251 |
+
"1681 <s>[INST] Answer the Question and include your... \n",
|
1252 |
+
"1682 <s>[INST] Answer the Question and include your... \n",
|
1253 |
+
"\n",
|
1254 |
+
" conversation_FA \\\n",
|
1255 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1256 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1257 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1258 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1259 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1260 |
+
"... ... \n",
|
1261 |
+
"1678 <s>[INST] Answer the Question and include your... \n",
|
1262 |
+
"1679 <s>[INST] Answer the Question and include your... \n",
|
1263 |
+
"1680 <s>[INST] Answer the Question and include your... \n",
|
1264 |
+
"1681 <s>[INST] Answer the Question and include your... \n",
|
1265 |
+
"1682 <s>[INST] Answer the Question and include your... \n",
|
1266 |
+
"\n",
|
1267 |
+
" responses_RFA_mistral \\\n",
|
1268 |
+
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1269 |
+
"1 {\"Reasoning\": \"Global warming is primarily cau... \n",
|
1270 |
+
"2 {\"Reasoning\": \"The question asks for an organi... \n",
|
1271 |
+
"3 {\"Reasoning\": \"Bacteria are microorganisms tha... \n",
|
1272 |
+
"4 {\"Reasoning\": \"The question asks about the liv... \n",
|
1273 |
+
"... ... \n",
|
1274 |
+
"1678 {\"Reasoning\": \"The question asks for a way to ... \n",
|
1275 |
+
"1679 {\"Reasoning\": \"The question asks for a term th... \n",
|
1276 |
+
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1277 |
+
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1278 |
+
"1682 {\"Reasoning\": \"The question asks for something... \n",
|
1279 |
+
"\n",
|
1280 |
+
" responses_FAR_mistral \\\n",
|
1281 |
+
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1282 |
+
"1 {\"Final Answer\": \"a\", \"Reasoning\": \"The nucleu... \n",
|
1283 |
+
"2 {\"Final Answer\": \"c\", \"Reasoning\": \"The proces... \n",
|
1284 |
+
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1285 |
+
"4 {\"Final Answer\": \"a\", \"Reasoning\": \"Plants and... \n",
|
1286 |
+
"... ... \n",
|
1287 |
+
"1678 {\"Final Answer\": \"g\", \"Reasoning\": \"Recycling ... \n",
|
1288 |
+
"1679 {\"Final Answer\": \"d\", \"Reasoning\": \"A drought ... \n",
|
1289 |
+
"1680 {\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ... \n",
|
1290 |
+
"1681 {\"Final Answer\": \"b\", \"Reasoning\": \"Ultraviole... \n",
|
1291 |
+
"1682 {\"Final Answer\": \"c\", \"Reasoning\": \"Running is... \n",
|
1292 |
+
"\n",
|
1293 |
+
" responses_RFA_gpt3_5 \\\n",
|
1294 |
+
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1295 |
+
"1 {\"Reasoning\": \"The nucleus of a cell (option a... \n",
|
1296 |
+
"2 {\"Reasoning\": \"The process of converting chemi... \n",
|
1297 |
+
"3 { \"Reasoning\": \"Bacteria can cause harm to var... \n",
|
1298 |
+
"4 {\"Reasoning\": \"The question asks about the rel... \n",
|
1299 |
+
"... ... \n",
|
1300 |
+
"1678 {\"Reasoning\": \"Mining, fossil fuels, deforesta... \n",
|
1301 |
+
"1679 { \"Reasoning\": \"A drought is a prolonged perio... \n",
|
1302 |
+
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1303 |
+
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1304 |
+
"1682 {\"Reasoning\": \"A body's strength is primarily ... \n",
|
1305 |
+
"\n",
|
1306 |
+
" responses_FAR_gpt3_5 \\\n",
|
1307 |
+
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1308 |
+
"1 {\"Final Answer\": \"a\", \"Reasoning\": \"The nucleu... \n",
|
1309 |
+
"2 {\"Final Answer\": \"e\", \"Reasoning\": \"Bacteria u... \n",
|
1310 |
+
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1311 |
+
"4 {\"Final Answer\": \"f\", \"Reasoning\": \"Plants and... \n",
|
1312 |
+
"... ... \n",
|
1313 |
+
"1678 {\"Final Answer\": \"g\", \"Reasoning\": \"Recycling ... \n",
|
1314 |
+
"1679 {\"Final Answer\": \"d\", \"Reasoning\": \"A drought ... \n",
|
1315 |
+
"1680 {\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ... \n",
|
1316 |
+
"1681 {\"Final Answer\": \"d\", \"Reasoning\": \"Ultraviole... \n",
|
1317 |
+
"1682 {\"Final Answer\": \"c\", \"Reasoning\": \"Running is... \n",
|
1318 |
+
"\n",
|
1319 |
+
" responses_FA responses_base \n",
|
1320 |
+
"0 {\"Final Answer\": \"b\"} {\"Final Answer\": \"b\"} \n",
|
1321 |
+
"1 {\"Final Answer\": \"a\"} {\"Final Answer\": \"a\"} \n",
|
1322 |
+
"2 {\"Final Answer\": \"c\"} {\"Final Answer\": \"c\"} \n",
|
1323 |
+
"3 {\"Final Answer\": \"d\"} {\"Final Answer\": \"d\"} \n",
|
1324 |
+
"4 {\"Final Answer\": \"g\"} {\"Final Answer\": \"g\"} \n",
|
1325 |
+
"... ... ... \n",
|
1326 |
+
"1678 {\"Final Answer\": \"g\"} {\"Final Answer\": \"g\"} \n",
|
1327 |
+
"1679 {\"Final Answer\": \"d\"} {\"Final Answer\": \"d\"} \n",
|
1328 |
+
"1680 {\"Final Answer\": \"d\"} {\"Final Answer\": \"d\"} \n",
|
1329 |
+
"1681 {\"Final Answer\": \"b\"} {\"Final Answer\": \"b\"} \n",
|
1330 |
+
"1682 {\"Final Answer\": \"c\"} {\"Final Answer\": \"c\"} \n",
|
1331 |
+
"\n",
|
1332 |
+
"[1683 rows x 21 columns]"
|
1333 |
+
]
|
1334 |
+
},
|
1335 |
+
"execution_count": 21,
|
1336 |
+
"metadata": {},
|
1337 |
+
"output_type": "execute_result"
|
1338 |
+
}
|
1339 |
+
],
|
1340 |
+
"source": [
|
1341 |
+
"df"
|
1342 |
+
]
|
1343 |
+
},
|
1344 |
+
{
|
1345 |
+
"cell_type": "code",
|
1346 |
+
"execution_count": 22,
|
1347 |
+
"id": "8619f9f5-9fe4-433e-b524-51c2b12e8d12",
|
1348 |
+
"metadata": {},
|
1349 |
+
"outputs": [],
|
1350 |
+
"source": [
|
1351 |
+
"def extract_final_answer(response):\n",
|
1352 |
+
" return json.loads(response).get(\"Final Answer\")\n",
|
1353 |
+
"\n",
|
1354 |
+
"# Create new columns for predictions\n",
|
1355 |
+
"df['predictions_base'] = df['responses_base'].apply(extract_final_answer)\n",
|
1356 |
+
"df['predictions_FA'] = df['responses_FA'].apply(extract_final_answer)\n",
|
1357 |
+
"df['predictions_RFA_mistral'] = df['responses_RFA_mistral'].apply(extract_final_answer)\n",
|
1358 |
+
"df['predictions_FAR_mistral'] = df['responses_FAR_mistral'].apply(extract_final_answer)\n",
|
1359 |
+
"df['predictions_RFA_gpt3_5'] = df['responses_RFA_gpt3_5'].apply(extract_final_answer)\n",
|
1360 |
+
"df['predictions_FAR_gpt3_5'] = df['responses_FAR_gpt3_5'].apply(extract_final_answer)\n"
|
1361 |
+
]
|
1362 |
+
},
|
1363 |
+
{
|
1364 |
+
"cell_type": "code",
|
1365 |
+
"execution_count": 23,
|
1366 |
+
"id": "938cf2a3-2fed-42a3-82ec-a56cb0ea9f37",
|
1367 |
+
"metadata": {},
|
1368 |
+
"outputs": [
|
1369 |
+
{
|
1370 |
+
"name": "stdout",
|
1371 |
+
"output_type": "stream",
|
1372 |
+
"text": [
|
1373 |
+
"Base: \t\t\t\t\t\t45.28%\n",
|
1374 |
+
"Final Answer: \t\t\t\t\t45.4%\n",
|
1375 |
+
"Reasoning and then the Final Answer (Mistral): \t53.89%\n",
|
1376 |
+
"Final Answer and then the Reasoning (Mistral): \t60.72%\n",
|
1377 |
+
"Reasoning and then the Final Answer (GPT-3.5): \t59.06%\n",
|
1378 |
+
"Final Answer and then the Reasoning (GPT-3.5): \t60.31%\n"
|
1379 |
+
]
|
1380 |
+
}
|
1381 |
+
],
|
1382 |
+
"source": [
|
1383 |
+
"from sklearn.metrics import accuracy_score\n",
|
1384 |
+
"\n",
|
1385 |
+
"print(f\"Base: \\t\\t\\t\\t\\t\\t{round(accuracy_score(y_true=df['answer_key'], y_pred=df['predictions_base']) * 100, 2)}%\")\n",
|
1386 |
+
"print(f\"Final Answer: \\t\\t\\t\\t\\t{round(accuracy_score(y_true=df['answer_key'], y_pred=df['predictions_FA']) * 100, 2)}%\")\n",
|
1387 |
+
"print(f\"Reasoning and then the Final Answer (Mistral): \\t{round(accuracy_score(y_true=df['answer_key'], y_pred=df['predictions_RFA_mistral']) * 100, 2)}%\")\n",
|
1388 |
+
"print(f\"Final Answer and then the Reasoning (Mistral): \\t{round(accuracy_score(y_true=df['answer_key'], y_pred=df['predictions_FAR_mistral']) * 100, 2)}%\")\n",
|
1389 |
+
"print(f\"Reasoning and then the Final Answer (GPT-3.5): \\t{round(accuracy_score(y_true=df['answer_key'], y_pred=df['predictions_RFA_gpt3_5']) * 100, 2)}%\")\n",
|
1390 |
+
"print(f\"Final Answer and then the Reasoning (GPT-3.5): \\t{round(accuracy_score(y_true=df['answer_key'], y_pred=df['predictions_FAR_gpt3_5']) * 100, 2)}%\")"
|
1391 |
+
]
|
1392 |
+
},
|
1393 |
+
{
|
1394 |
+
"cell_type": "code",
|
1395 |
+
"execution_count": 24,
|
1396 |
+
"id": "83aae472-513b-43c3-9ee8-64d4cda775e0",
|
1397 |
+
"metadata": {},
|
1398 |
+
"outputs": [
|
1399 |
+
{
|
1400 |
+
"data": {
|
1401 |
+
"text/html": [
|
1402 |
+
"<div>\n",
|
1403 |
+
"<style scoped>\n",
|
1404 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
1405 |
+
" vertical-align: middle;\n",
|
1406 |
+
" }\n",
|
1407 |
+
"\n",
|
1408 |
+
" .dataframe tbody tr th {\n",
|
1409 |
+
" vertical-align: top;\n",
|
1410 |
+
" }\n",
|
1411 |
+
"\n",
|
1412 |
+
" .dataframe thead th {\n",
|
1413 |
+
" text-align: right;\n",
|
1414 |
+
" }\n",
|
1415 |
+
"</style>\n",
|
1416 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1417 |
+
" <thead>\n",
|
1418 |
+
" <tr style=\"text-align: right;\">\n",
|
1419 |
+
" <th></th>\n",
|
1420 |
+
" <th>topic</th>\n",
|
1421 |
+
" <th>question_text</th>\n",
|
1422 |
+
" <th>answer_key</th>\n",
|
1423 |
+
" <th>gpt3_5_reasoning</th>\n",
|
1424 |
+
" <th>mistral_reasoning</th>\n",
|
1425 |
+
" <th>answer_choices</th>\n",
|
1426 |
+
" <th>user_prompt</th>\n",
|
1427 |
+
" <th>user_prompt_RFA</th>\n",
|
1428 |
+
" <th>conversation_RFA_gpt3_5</th>\n",
|
1429 |
+
" <th>conversation_RFA_mistral</th>\n",
|
1430 |
+
" <th>...</th>\n",
|
1431 |
+
" <th>responses_RFA_gpt3_5</th>\n",
|
1432 |
+
" <th>responses_FAR_gpt3_5</th>\n",
|
1433 |
+
" <th>responses_FA</th>\n",
|
1434 |
+
" <th>responses_base</th>\n",
|
1435 |
+
" <th>predictions_base</th>\n",
|
1436 |
+
" <th>predictions_FA</th>\n",
|
1437 |
+
" <th>predictions_RFA_mistral</th>\n",
|
1438 |
+
" <th>predictions_FAR_mistral</th>\n",
|
1439 |
+
" <th>predictions_RFA_gpt3_5</th>\n",
|
1440 |
+
" <th>predictions_FAR_gpt3_5</th>\n",
|
1441 |
+
" </tr>\n",
|
1442 |
+
" </thead>\n",
|
1443 |
+
" <tbody>\n",
|
1444 |
+
" <tr>\n",
|
1445 |
+
" <th>0</th>\n",
|
1446 |
+
" <td>Transportation</td>\n",
|
1447 |
+
" <td>What are busses used for?</td>\n",
|
1448 |
+
" <td>b</td>\n",
|
1449 |
+
" <td>a) Protective shelter: This option is incorrec...</td>\n",
|
1450 |
+
" <td>1. Start by reading the question carefully: \"C...</td>\n",
|
1451 |
+
" <td>(a) Protective shelter (b) Transporting humans...</td>\n",
|
1452 |
+
" <td>Question: What are busses used for?\\nAnswer Ch...</td>\n",
|
1453 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1454 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1455 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1456 |
+
" <td>...</td>\n",
|
1457 |
+
" <td>{\"Reasoning\": \"Busses are primarily used for t...</td>\n",
|
1458 |
+
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
1459 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1460 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1461 |
+
" <td>b</td>\n",
|
1462 |
+
" <td>b</td>\n",
|
1463 |
+
" <td>b</td>\n",
|
1464 |
+
" <td>b</td>\n",
|
1465 |
+
" <td>b</td>\n",
|
1466 |
+
" <td>b</td>\n",
|
1467 |
+
" </tr>\n",
|
1468 |
+
" <tr>\n",
|
1469 |
+
" <th>1</th>\n",
|
1470 |
+
" <td>Climate change</td>\n",
|
1471 |
+
" <td>Which of the following does not contribute to ...</td>\n",
|
1472 |
+
" <td>g</td>\n",
|
1473 |
+
" <td>a) Nucleus of a cell: This option is not relat...</td>\n",
|
1474 |
+
" <td>To solve this question, let's first understand...</td>\n",
|
1475 |
+
" <td>(a) Nucleus of a cell (b) Flying in a plane (c...</td>\n",
|
1476 |
+
" <td>Question: Which of the following does not cont...</td>\n",
|
1477 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1478 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1479 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1480 |
+
" <td>...</td>\n",
|
1481 |
+
" <td>{\"Reasoning\": \"The nucleus of a cell (option a...</td>\n",
|
1482 |
+
" <td>{\"Final Answer\": \"a\", \"Reasoning\": \"The nucleu...</td>\n",
|
1483 |
+
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1484 |
+
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1485 |
+
" <td>a</td>\n",
|
1486 |
+
" <td>a</td>\n",
|
1487 |
+
" <td>g</td>\n",
|
1488 |
+
" <td>a</td>\n",
|
1489 |
+
" <td>a</td>\n",
|
1490 |
+
" <td>a</td>\n",
|
1491 |
+
" </tr>\n",
|
1492 |
+
" <tr>\n",
|
1493 |
+
" <th>2</th>\n",
|
1494 |
+
" <td>Photography</td>\n",
|
1495 |
+
" <td>What uses electrical energy converted from che...</td>\n",
|
1496 |
+
" <td>b</td>\n",
|
1497 |
+
" <td>a) Sunlight: Sunlight is a form of energy that...</td>\n",
|
1498 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
1499 |
+
" <td>(a) Sunlight (b) Cameras (c) Cells (d) Buses (...</td>\n",
|
1500 |
+
" <td>Question: What uses electrical energy converte...</td>\n",
|
1501 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1502 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1503 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1504 |
+
" <td>...</td>\n",
|
1505 |
+
" <td>{\"Reasoning\": \"The process of converting chemi...</td>\n",
|
1506 |
+
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Bacteria u...</td>\n",
|
1507 |
+
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1508 |
+
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1509 |
+
" <td>c</td>\n",
|
1510 |
+
" <td>c</td>\n",
|
1511 |
+
" <td>e</td>\n",
|
1512 |
+
" <td>c</td>\n",
|
1513 |
+
" <td>c</td>\n",
|
1514 |
+
" <td>e</td>\n",
|
1515 |
+
" </tr>\n",
|
1516 |
+
" <tr>\n",
|
1517 |
+
" <th>3</th>\n",
|
1518 |
+
" <td>Microbiology</td>\n",
|
1519 |
+
" <td>Bacteria causes what to be harmed?</td>\n",
|
1520 |
+
" <td>a</td>\n",
|
1521 |
+
" <td>Now, let's go through each option and explain ...</td>\n",
|
1522 |
+
" <td>To answer this question correctly, let's follo...</td>\n",
|
1523 |
+
" <td>(a) Plants (b) Electronics (c) Fossils (d) Hum...</td>\n",
|
1524 |
+
" <td>Question: Bacteria causes what to be harmed?\\n...</td>\n",
|
1525 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1526 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1527 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1528 |
+
" <td>...</td>\n",
|
1529 |
+
" <td>{ \"Reasoning\": \"Bacteria can cause harm to var...</td>\n",
|
1530 |
+
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
1531 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1532 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1533 |
+
" <td>d</td>\n",
|
1534 |
+
" <td>d</td>\n",
|
1535 |
+
" <td>d</td>\n",
|
1536 |
+
" <td>d</td>\n",
|
1537 |
+
" <td>d</td>\n",
|
1538 |
+
" <td>d</td>\n",
|
1539 |
+
" </tr>\n",
|
1540 |
+
" <tr>\n",
|
1541 |
+
" <th>4</th>\n",
|
1542 |
+
" <td>Biology</td>\n",
|
1543 |
+
" <td>Plants and snakes live _.?</td>\n",
|
1544 |
+
" <td>a</td>\n",
|
1545 |
+
" <td>b) Important habitats: This option is incorrec...</td>\n",
|
1546 |
+
" <td>1. Read the question and options carefully: Th...</td>\n",
|
1547 |
+
" <td>(a) Almost everywhere (b) Important habitats (...</td>\n",
|
1548 |
+
" <td>Question: Plants and snakes live _.?\\nAnswer C...</td>\n",
|
1549 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1550 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1551 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1552 |
+
" <td>...</td>\n",
|
1553 |
+
" <td>{\"Reasoning\": \"The question asks about the rel...</td>\n",
|
1554 |
+
" <td>{\"Final Answer\": \"f\", \"Reasoning\": \"Plants and...</td>\n",
|
1555 |
+
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1556 |
+
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1557 |
+
" <td>g</td>\n",
|
1558 |
+
" <td>g</td>\n",
|
1559 |
+
" <td>b</td>\n",
|
1560 |
+
" <td>a</td>\n",
|
1561 |
+
" <td>f</td>\n",
|
1562 |
+
" <td>f</td>\n",
|
1563 |
+
" </tr>\n",
|
1564 |
+
" <tr>\n",
|
1565 |
+
" <th>...</th>\n",
|
1566 |
+
" <td>...</td>\n",
|
1567 |
+
" <td>...</td>\n",
|
1568 |
+
" <td>...</td>\n",
|
1569 |
+
" <td>...</td>\n",
|
1570 |
+
" <td>...</td>\n",
|
1571 |
+
" <td>...</td>\n",
|
1572 |
+
" <td>...</td>\n",
|
1573 |
+
" <td>...</td>\n",
|
1574 |
+
" <td>...</td>\n",
|
1575 |
+
" <td>...</td>\n",
|
1576 |
+
" <td>...</td>\n",
|
1577 |
+
" <td>...</td>\n",
|
1578 |
+
" <td>...</td>\n",
|
1579 |
+
" <td>...</td>\n",
|
1580 |
+
" <td>...</td>\n",
|
1581 |
+
" <td>...</td>\n",
|
1582 |
+
" <td>...</td>\n",
|
1583 |
+
" <td>...</td>\n",
|
1584 |
+
" <td>...</td>\n",
|
1585 |
+
" <td>...</td>\n",
|
1586 |
+
" <td>...</td>\n",
|
1587 |
+
" </tr>\n",
|
1588 |
+
" <tr>\n",
|
1589 |
+
" <th>1678</th>\n",
|
1590 |
+
" <td>Biology</td>\n",
|
1591 |
+
" <td>New resources required for creation can be red...</td>\n",
|
1592 |
+
" <td>g</td>\n",
|
1593 |
+
" <td>a) Mining: Mining involves extracting minerals...</td>\n",
|
1594 |
+
" <td>1. Start by reading the question and options c...</td>\n",
|
1595 |
+
" <td>(a) Mining (b) Mutations (c) Fossil fuels (d) ...</td>\n",
|
1596 |
+
" <td>Question: New resources required for creation ...</td>\n",
|
1597 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1598 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1599 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1600 |
+
" <td>...</td>\n",
|
1601 |
+
" <td>{\"Reasoning\": \"Mining, fossil fuels, deforesta...</td>\n",
|
1602 |
+
" <td>{\"Final Answer\": \"g\", \"Reasoning\": \"Recycling ...</td>\n",
|
1603 |
+
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1604 |
+
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1605 |
+
" <td>g</td>\n",
|
1606 |
+
" <td>g</td>\n",
|
1607 |
+
" <td>g</td>\n",
|
1608 |
+
" <td>g</td>\n",
|
1609 |
+
" <td>g</td>\n",
|
1610 |
+
" <td>g</td>\n",
|
1611 |
+
" </tr>\n",
|
1612 |
+
" <tr>\n",
|
1613 |
+
" <th>1679</th>\n",
|
1614 |
+
" <td>Biology</td>\n",
|
1615 |
+
" <td>A drought dehydrates an entire what?</td>\n",
|
1616 |
+
" <td>d</td>\n",
|
1617 |
+
" <td>a) Body water: This option is incorrect becaus...</td>\n",
|
1618 |
+
" <td>(a) Watershred - This is not a scientific term...</td>\n",
|
1619 |
+
" <td>(a) Body water (b) Dried fruit (c) Bodily wate...</td>\n",
|
1620 |
+
" <td>Question: A drought dehydrates an entire what?...</td>\n",
|
1621 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1622 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1623 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1624 |
+
" <td>...</td>\n",
|
1625 |
+
" <td>{ \"Reasoning\": \"A drought is a prolonged perio...</td>\n",
|
1626 |
+
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"A drought ...</td>\n",
|
1627 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1628 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1629 |
+
" <td>d</td>\n",
|
1630 |
+
" <td>d</td>\n",
|
1631 |
+
" <td>d</td>\n",
|
1632 |
+
" <td>d</td>\n",
|
1633 |
+
" <td>f</td>\n",
|
1634 |
+
" <td>d</td>\n",
|
1635 |
+
" </tr>\n",
|
1636 |
+
" <tr>\n",
|
1637 |
+
" <th>1680</th>\n",
|
1638 |
+
" <td>Biology</td>\n",
|
1639 |
+
" <td>An animal requires ingestion to do what?</td>\n",
|
1640 |
+
" <td>e</td>\n",
|
1641 |
+
" <td>a) Aerobic capacity: This option is not logica...</td>\n",
|
1642 |
+
" <td>1. Read the question and options carefully: \"W...</td>\n",
|
1643 |
+
" <td>(a) Aerobic capacity (b) Die (c) Water conserv...</td>\n",
|
1644 |
+
" <td>Question: An animal requires ingestion to do w...</td>\n",
|
1645 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1646 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1647 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1648 |
+
" <td>...</td>\n",
|
1649 |
+
" <td>{\"Reasoning\": \"Ingestion is the process of tak...</td>\n",
|
1650 |
+
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
1651 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1652 |
+
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1653 |
+
" <td>d</td>\n",
|
1654 |
+
" <td>d</td>\n",
|
1655 |
+
" <td>e</td>\n",
|
1656 |
+
" <td>e</td>\n",
|
1657 |
+
" <td>e</td>\n",
|
1658 |
+
" <td>e</td>\n",
|
1659 |
+
" </tr>\n",
|
1660 |
+
" <tr>\n",
|
1661 |
+
" <th>1681</th>\n",
|
1662 |
+
" <td>Biology</td>\n",
|
1663 |
+
" <td>Ultraviolet light can cause what?</td>\n",
|
1664 |
+
" <td>b</td>\n",
|
1665 |
+
" <td>a) Ultraviolet light does not cause heat energ...</td>\n",
|
1666 |
+
" <td>1. First, read the question and options carefu...</td>\n",
|
1667 |
+
" <td>(a) Heat energy (b) Skin cancer (c) Killing in...</td>\n",
|
1668 |
+
" <td>Question: Ultraviolet light can cause what?\\nA...</td>\n",
|
1669 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1670 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1671 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1672 |
+
" <td>...</td>\n",
|
1673 |
+
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1674 |
+
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Ultraviole...</td>\n",
|
1675 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1676 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1677 |
+
" <td>b</td>\n",
|
1678 |
+
" <td>b</td>\n",
|
1679 |
+
" <td>b</td>\n",
|
1680 |
+
" <td>b</td>\n",
|
1681 |
+
" <td>b</td>\n",
|
1682 |
+
" <td>d</td>\n",
|
1683 |
+
" </tr>\n",
|
1684 |
+
" <tr>\n",
|
1685 |
+
" <th>1682</th>\n",
|
1686 |
+
" <td>Physical activity</td>\n",
|
1687 |
+
" <td>What can increase a body's strength?</td>\n",
|
1688 |
+
" <td>c</td>\n",
|
1689 |
+
" <td>a) Four limbs: This option is not correct beca...</td>\n",
|
1690 |
+
" <td>(a) Communication: In this context, the questi...</td>\n",
|
1691 |
+
" <td>(a) Four limbs (b) Disease (c) Running (d) Bic...</td>\n",
|
1692 |
+
" <td>Question: What can increase a body's strength?...</td>\n",
|
1693 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1694 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1695 |
+
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1696 |
+
" <td>...</td>\n",
|
1697 |
+
" <td>{\"Reasoning\": \"A body's strength is primarily ...</td>\n",
|
1698 |
+
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1699 |
+
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1700 |
+
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1701 |
+
" <td>c</td>\n",
|
1702 |
+
" <td>c</td>\n",
|
1703 |
+
" <td>f</td>\n",
|
1704 |
+
" <td>c</td>\n",
|
1705 |
+
" <td>f</td>\n",
|
1706 |
+
" <td>c</td>\n",
|
1707 |
+
" </tr>\n",
|
1708 |
+
" </tbody>\n",
|
1709 |
+
"</table>\n",
|
1710 |
+
"<p>1683 rows × 27 columns</p>\n",
|
1711 |
+
"</div>"
|
1712 |
+
],
|
1713 |
+
"text/plain": [
|
1714 |
+
" topic question_text \\\n",
|
1715 |
+
"0 Transportation What are busses used for? \n",
|
1716 |
+
"1 Climate change Which of the following does not contribute to ... \n",
|
1717 |
+
"2 Photography What uses electrical energy converted from che... \n",
|
1718 |
+
"3 Microbiology Bacteria causes what to be harmed? \n",
|
1719 |
+
"4 Biology Plants and snakes live _.? \n",
|
1720 |
+
"... ... ... \n",
|
1721 |
+
"1678 Biology New resources required for creation can be red... \n",
|
1722 |
+
"1679 Biology A drought dehydrates an entire what? \n",
|
1723 |
+
"1680 Biology An animal requires ingestion to do what? \n",
|
1724 |
+
"1681 Biology Ultraviolet light can cause what? \n",
|
1725 |
+
"1682 Physical activity What can increase a body's strength? \n",
|
1726 |
+
"\n",
|
1727 |
+
" answer_key gpt3_5_reasoning \\\n",
|
1728 |
+
"0 b a) Protective shelter: This option is incorrec... \n",
|
1729 |
+
"1 g a) Nucleus of a cell: This option is not relat... \n",
|
1730 |
+
"2 b a) Sunlight: Sunlight is a form of energy that... \n",
|
1731 |
+
"3 a Now, let's go through each option and explain ... \n",
|
1732 |
+
"4 a b) Important habitats: This option is incorrec... \n",
|
1733 |
+
"... ... ... \n",
|
1734 |
+
"1678 g a) Mining: Mining involves extracting minerals... \n",
|
1735 |
+
"1679 d a) Body water: This option is incorrect becaus... \n",
|
1736 |
+
"1680 e a) Aerobic capacity: This option is not logica... \n",
|
1737 |
+
"1681 b a) Ultraviolet light does not cause heat energ... \n",
|
1738 |
+
"1682 c a) Four limbs: This option is not correct beca... \n",
|
1739 |
+
"\n",
|
1740 |
+
" mistral_reasoning \\\n",
|
1741 |
+
"0 1. Start by reading the question carefully: \"C... \n",
|
1742 |
+
"1 To solve this question, let's first understand... \n",
|
1743 |
+
"2 1. Read the question and options carefully: Th... \n",
|
1744 |
+
"3 To answer this question correctly, let's follo... \n",
|
1745 |
+
"4 1. Read the question and options carefully: Th... \n",
|
1746 |
+
"... ... \n",
|
1747 |
+
"1678 1. Start by reading the question and options c... \n",
|
1748 |
+
"1679 (a) Watershred - This is not a scientific term... \n",
|
1749 |
+
"1680 1. Read the question and options carefully: \"W... \n",
|
1750 |
+
"1681 1. First, read the question and options carefu... \n",
|
1751 |
+
"1682 (a) Communication: In this context, the questi... \n",
|
1752 |
+
"\n",
|
1753 |
+
" answer_choices \\\n",
|
1754 |
+
"0 (a) Protective shelter (b) Transporting humans... \n",
|
1755 |
+
"1 (a) Nucleus of a cell (b) Flying in a plane (c... \n",
|
1756 |
+
"2 (a) Sunlight (b) Cameras (c) Cells (d) Buses (... \n",
|
1757 |
+
"3 (a) Plants (b) Electronics (c) Fossils (d) Hum... \n",
|
1758 |
+
"4 (a) Almost everywhere (b) Important habitats (... \n",
|
1759 |
+
"... ... \n",
|
1760 |
+
"1678 (a) Mining (b) Mutations (c) Fossil fuels (d) ... \n",
|
1761 |
+
"1679 (a) Body water (b) Dried fruit (c) Bodily wate... \n",
|
1762 |
+
"1680 (a) Aerobic capacity (b) Die (c) Water conserv... \n",
|
1763 |
+
"1681 (a) Heat energy (b) Skin cancer (c) Killing in... \n",
|
1764 |
+
"1682 (a) Four limbs (b) Disease (c) Running (d) Bic... \n",
|
1765 |
+
"\n",
|
1766 |
+
" user_prompt \\\n",
|
1767 |
+
"0 Question: What are busses used for?\\nAnswer Ch... \n",
|
1768 |
+
"1 Question: Which of the following does not cont... \n",
|
1769 |
+
"2 Question: What uses electrical energy converte... \n",
|
1770 |
+
"3 Question: Bacteria causes what to be harmed?\\n... \n",
|
1771 |
+
"4 Question: Plants and snakes live _.?\\nAnswer C... \n",
|
1772 |
+
"... ... \n",
|
1773 |
+
"1678 Question: New resources required for creation ... \n",
|
1774 |
+
"1679 Question: A drought dehydrates an entire what?... \n",
|
1775 |
+
"1680 Question: An animal requires ingestion to do w... \n",
|
1776 |
+
"1681 Question: Ultraviolet light can cause what?\\nA... \n",
|
1777 |
+
"1682 Question: What can increase a body's strength?... \n",
|
1778 |
+
"\n",
|
1779 |
+
" user_prompt_RFA \\\n",
|
1780 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1781 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1782 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1783 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1784 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1785 |
+
"... ... \n",
|
1786 |
+
"1678 <s>[INST] Answer the Question and include your... \n",
|
1787 |
+
"1679 <s>[INST] Answer the Question and include your... \n",
|
1788 |
+
"1680 <s>[INST] Answer the Question and include your... \n",
|
1789 |
+
"1681 <s>[INST] Answer the Question and include your... \n",
|
1790 |
+
"1682 <s>[INST] Answer the Question and include your... \n",
|
1791 |
+
"\n",
|
1792 |
+
" conversation_RFA_gpt3_5 \\\n",
|
1793 |
+
"0 <s>[INST] Answer the Question and include your... \n",
|
1794 |
+
"1 <s>[INST] Answer the Question and include your... \n",
|
1795 |
+
"2 <s>[INST] Answer the Question and include your... \n",
|
1796 |
+
"3 <s>[INST] Answer the Question and include your... \n",
|
1797 |
+
"4 <s>[INST] Answer the Question and include your... \n",
|
1798 |
+
"... ... \n",
|
1799 |
+
"1678 <s>[INST] Answer the Question and include your... \n",
|
1800 |
+
"1679 <s>[INST] Answer the Question and include your... \n",
|
1801 |
+
"1680 <s>[INST] Answer the Question and include your... \n",
|
1802 |
+
"1681 <s>[INST] Answer the Question and include your... \n",
|
1803 |
+
"1682 <s>[INST] Answer the Question and include your... \n",
|
1804 |
+
"\n",
|
1805 |
+
" conversation_RFA_mistral ... \\\n",
|
1806 |
+
"0 <s>[INST] Answer the Question and include your... ... \n",
|
1807 |
+
"1 <s>[INST] Answer the Question and include your... ... \n",
|
1808 |
+
"2 <s>[INST] Answer the Question and include your... ... \n",
|
1809 |
+
"3 <s>[INST] Answer the Question and include your... ... \n",
|
1810 |
+
"4 <s>[INST] Answer the Question and include your... ... \n",
|
1811 |
+
"... ... ... \n",
|
1812 |
+
"1678 <s>[INST] Answer the Question and include your... ... \n",
|
1813 |
+
"1679 <s>[INST] Answer the Question and include your... ... \n",
|
1814 |
+
"1680 <s>[INST] Answer the Question and include your... ... \n",
|
1815 |
+
"1681 <s>[INST] Answer the Question and include your... ... \n",
|
1816 |
+
"1682 <s>[INST] Answer the Question and include your... ... \n",
|
1817 |
+
"\n",
|
1818 |
+
" responses_RFA_gpt3_5 \\\n",
|
1819 |
+
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1820 |
+
"1 {\"Reasoning\": \"The nucleus of a cell (option a... \n",
|
1821 |
+
"2 {\"Reasoning\": \"The process of converting chemi... \n",
|
1822 |
+
"3 { \"Reasoning\": \"Bacteria can cause harm to var... \n",
|
1823 |
+
"4 {\"Reasoning\": \"The question asks about the rel... \n",
|
1824 |
+
"... ... \n",
|
1825 |
+
"1678 {\"Reasoning\": \"Mining, fossil fuels, deforesta... \n",
|
1826 |
+
"1679 { \"Reasoning\": \"A drought is a prolonged perio... \n",
|
1827 |
+
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1828 |
+
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1829 |
+
"1682 {\"Reasoning\": \"A body's strength is primarily ... \n",
|
1830 |
+
"\n",
|
1831 |
+
" responses_FAR_gpt3_5 \\\n",
|
1832 |
+
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1833 |
+
"1 {\"Final Answer\": \"a\", \"Reasoning\": \"The nucleu... \n",
|
1834 |
+
"2 {\"Final Answer\": \"e\", \"Reasoning\": \"Bacteria u... \n",
|
1835 |
+
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1836 |
+
"4 {\"Final Answer\": \"f\", \"Reasoning\": \"Plants and... \n",
|
1837 |
+
"... ... \n",
|
1838 |
+
"1678 {\"Final Answer\": \"g\", \"Reasoning\": \"Recycling ... \n",
|
1839 |
+
"1679 {\"Final Answer\": \"d\", \"Reasoning\": \"A drought ... \n",
|
1840 |
+
"1680 {\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ... \n",
|
1841 |
+
"1681 {\"Final Answer\": \"d\", \"Reasoning\": \"Ultraviole... \n",
|
1842 |
+
"1682 {\"Final Answer\": \"c\", \"Reasoning\": \"Running is... \n",
|
1843 |
+
"\n",
|
1844 |
+
" responses_FA responses_base predictions_base \\\n",
|
1845 |
+
"0 {\"Final Answer\": \"b\"} {\"Final Answer\": \"b\"} b \n",
|
1846 |
+
"1 {\"Final Answer\": \"a\"} {\"Final Answer\": \"a\"} a \n",
|
1847 |
+
"2 {\"Final Answer\": \"c\"} {\"Final Answer\": \"c\"} c \n",
|
1848 |
+
"3 {\"Final Answer\": \"d\"} {\"Final Answer\": \"d\"} d \n",
|
1849 |
+
"4 {\"Final Answer\": \"g\"} {\"Final Answer\": \"g\"} g \n",
|
1850 |
+
"... ... ... ... \n",
|
1851 |
+
"1678 {\"Final Answer\": \"g\"} {\"Final Answer\": \"g\"} g \n",
|
1852 |
+
"1679 {\"Final Answer\": \"d\"} {\"Final Answer\": \"d\"} d \n",
|
1853 |
+
"1680 {\"Final Answer\": \"d\"} {\"Final Answer\": \"d\"} d \n",
|
1854 |
+
"1681 {\"Final Answer\": \"b\"} {\"Final Answer\": \"b\"} b \n",
|
1855 |
+
"1682 {\"Final Answer\": \"c\"} {\"Final Answer\": \"c\"} c \n",
|
1856 |
+
"\n",
|
1857 |
+
" predictions_FA predictions_RFA_mistral predictions_FAR_mistral \\\n",
|
1858 |
+
"0 b b b \n",
|
1859 |
+
"1 a g a \n",
|
1860 |
+
"2 c e c \n",
|
1861 |
+
"3 d d d \n",
|
1862 |
+
"4 g b a \n",
|
1863 |
+
"... ... ... ... \n",
|
1864 |
+
"1678 g g g \n",
|
1865 |
+
"1679 d d d \n",
|
1866 |
+
"1680 d e e \n",
|
1867 |
+
"1681 b b b \n",
|
1868 |
+
"1682 c f c \n",
|
1869 |
+
"\n",
|
1870 |
+
" predictions_RFA_gpt3_5 predictions_FAR_gpt3_5 \n",
|
1871 |
+
"0 b b \n",
|
1872 |
+
"1 a a \n",
|
1873 |
+
"2 c e \n",
|
1874 |
+
"3 d d \n",
|
1875 |
+
"4 f f \n",
|
1876 |
+
"... ... ... \n",
|
1877 |
+
"1678 g g \n",
|
1878 |
+
"1679 f d \n",
|
1879 |
+
"1680 e e \n",
|
1880 |
+
"1681 b d \n",
|
1881 |
+
"1682 f c \n",
|
1882 |
+
"\n",
|
1883 |
+
"[1683 rows x 27 columns]"
|
1884 |
+
]
|
1885 |
+
},
|
1886 |
+
"execution_count": 24,
|
1887 |
+
"metadata": {},
|
1888 |
+
"output_type": "execute_result"
|
1889 |
+
}
|
1890 |
+
],
|
1891 |
+
"source": [
|
1892 |
+
"df"
|
1893 |
+
]
|
1894 |
+
},
|
1895 |
+
{
|
1896 |
+
"cell_type": "code",
|
1897 |
+
"execution_count": 26,
|
1898 |
+
"id": "45c08dd4-0b98-4e0f-b487-549f60518a4e",
|
1899 |
+
"metadata": {},
|
1900 |
+
"outputs": [
|
1901 |
+
{
|
1902 |
+
"data": {
|
1903 |
+
"application/vnd.jupyter.widget-view+json": {
|
1904 |
+
"model_id": "30c9a9f7656f4950a3fde9deaa6bf0ac",
|
1905 |
+
"version_major": 2,
|
1906 |
+
"version_minor": 0
|
1907 |
+
},
|
1908 |
+
"text/plain": [
|
1909 |
+
"Uploading the dataset shards: 0%| | 0/1 [00:00<?, ?it/s]"
|
1910 |
+
]
|
1911 |
+
},
|
1912 |
+
"metadata": {},
|
1913 |
+
"output_type": "display_data"
|
1914 |
+
},
|
1915 |
+
{
|
1916 |
+
"data": {
|
1917 |
+
"application/vnd.jupyter.widget-view+json": {
|
1918 |
+
"model_id": "bfc21892a7eb45caa097435b6026087d",
|
1919 |
+
"version_major": 2,
|
1920 |
+
"version_minor": 0
|
1921 |
+
},
|
1922 |
+
"text/plain": [
|
1923 |
+
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
|
1924 |
+
]
|
1925 |
+
},
|
1926 |
+
"metadata": {},
|
1927 |
+
"output_type": "display_data"
|
1928 |
+
},
|
1929 |
+
{
|
1930 |
+
"data": {
|
1931 |
+
"text/plain": [
|
1932 |
+
"CommitInfo(commit_url='https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-results/commit/b2c01c5867b23afe06c0806f2886152864574946', commit_message='Upload dataset', commit_description='', oid='b2c01c5867b23afe06c0806f2886152864574946', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-results', endpoint='https://huggingface.co', repo_type='dataset', repo_id='derek-thomas/labeled-multiple-choice-explained-mistral-results'), pr_revision=None, pr_num=None)"
|
1933 |
+
]
|
1934 |
+
},
|
1935 |
+
"execution_count": 26,
|
1936 |
+
"metadata": {},
|
1937 |
+
"output_type": "execute_result"
|
1938 |
+
}
|
1939 |
+
],
|
1940 |
+
"source": [
|
1941 |
+
"from datasets import Dataset, DatasetDict\n",
|
1942 |
+
"\n",
|
1943 |
+
"# Create dataset from df\n",
|
1944 |
+
"df.reset_index(drop=True, inplace=True)\n",
|
1945 |
+
"dataset = Dataset.from_pandas(df)\n",
|
1946 |
+
"\n",
|
1947 |
+
"# Push dataset to the hub\n",
|
1948 |
+
"dataset.push_to_hub(OUTPUT_DATASET)"
|
1949 |
+
]
|
1950 |
+
},
|
1951 |
+
{
|
1952 |
+
"cell_type": "code",
|
1953 |
+
"execution_count": null,
|
1954 |
+
"id": "45ed7bf8-54de-4f4d-aab5-b78af8f54ccc",
|
1955 |
+
"metadata": {},
|
1956 |
+
"outputs": [],
|
1957 |
+
"source": []
|
1958 |
+
}
|
1959 |
+
],
|
1960 |
+
"metadata": {
|
1961 |
+
"kernelspec": {
|
1962 |
+
"display_name": "Python 3 (ipykernel)",
|
1963 |
+
"language": "python",
|
1964 |
+
"name": "python3"
|
1965 |
+
},
|
1966 |
+
"language_info": {
|
1967 |
+
"codemirror_mode": {
|
1968 |
+
"name": "ipython",
|
1969 |
+
"version": 3
|
1970 |
+
},
|
1971 |
+
"file_extension": ".py",
|
1972 |
+
"mimetype": "text/x-python",
|
1973 |
+
"name": "python",
|
1974 |
+
"nbconvert_exporter": "python",
|
1975 |
+
"pygments_lexer": "ipython3",
|
1976 |
+
"version": "3.11.10"
|
1977 |
+
}
|
1978 |
+
},
|
1979 |
+
"nbformat": 4,
|
1980 |
+
"nbformat_minor": 5
|
1981 |
+
}
|
autotrain/autotrain.sh
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#EXPERIMENT_TYPES=("R-FA" "FA-R" "FA")
|
2 |
+
EXPERIMENT_TYPES=("FA")
|
3 |
+
|
4 |
+
for EXPERIMENT_TYPE in ${EXPERIMENT_TYPES[@]}; do
|
5 |
+
echo "Running autotrain for experiment type: ${EXPERIMENT_TYPE}"
|
6 |
+
|
7 |
+
# Export the variable to make it available to envsubst
|
8 |
+
export EXPERIMENT_TYPE=${EXPERIMENT_TYPE}
|
9 |
+
|
10 |
+
# Substitute the variable into the template
|
11 |
+
envsubst < ./autotrain/autotrain_template.yml > ./autotrain/autotrain.yml
|
12 |
+
|
13 |
+
# Echo the contents of the substituted YAML file
|
14 |
+
echo "Generated autotrain.yml:"
|
15 |
+
cat ./autotrain/autotrain.yml
|
16 |
+
echo "-----------------------------------"
|
17 |
+
|
18 |
+
# Run the autotrain command
|
19 |
+
autotrain --config ./autotrain/autotrain.yml
|
20 |
+
|
21 |
+
# Check if the command was successful
|
22 |
+
if [[ $? -ne 0 ]]; then
|
23 |
+
echo "Error running autotrain for ${EXPERIMENT_TYPE}, exiting."
|
24 |
+
exit 1
|
25 |
+
fi
|
26 |
+
done
|
27 |
+
|
28 |
+
echo "All experiments completed successfully!"
|
autotrain/autotrain.yml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
task: llm-sft
|
2 |
+
base_model: mistralai/Mistral-7B-Instruct-v0.3
|
3 |
+
project_name: autotrain-mistral-v03-prompt-experiment-mc-R-FA-sg
|
4 |
+
log: tensorboard
|
5 |
+
backend: spaces-l4x1
|
6 |
+
|
7 |
+
data:
|
8 |
+
path: derek-thomas/labeled-multiple-choice-explained-mistral-tokenized
|
9 |
+
train_split: train
|
10 |
+
# valid_split: val
|
11 |
+
valid_split: null
|
12 |
+
chat_template: none
|
13 |
+
column_mapping:
|
14 |
+
text_column: conversation_R_FA_sg
|
15 |
+
|
16 |
+
params:
|
17 |
+
block_size: 1024
|
18 |
+
model_max_length: 1024
|
19 |
+
epochs: 2
|
20 |
+
batch_size: 1
|
21 |
+
lr: 3e-5
|
22 |
+
peft: true
|
23 |
+
quantization: int4
|
24 |
+
target_modules: all-linear
|
25 |
+
padding: left
|
26 |
+
optimizer: adamw_torch
|
27 |
+
scheduler: linear
|
28 |
+
gradient_accumulation: 8
|
29 |
+
mixed_precision: bf16
|
30 |
+
|
31 |
+
hub:
|
32 |
+
username: derek-thomas
|
33 |
+
token: ${HF_TOKEN}
|
34 |
+
push_to_hub: true
|
autotrain/run_autotrain.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
|
4 |
+
import yaml
|
5 |
+
|
6 |
+
# Base config
|
7 |
+
config_template = {
|
8 |
+
"task": "llm-sft",
|
9 |
+
"base_model": "mistralai/Mistral-7B-Instruct-v0.3",
|
10 |
+
"project_name": "",
|
11 |
+
"log": "tensorboard",
|
12 |
+
"backend": "spaces-l4x1",
|
13 |
+
"data": {
|
14 |
+
"path": "derek-thomas/labeled-multiple-choice-explained-mistral-tokenized",
|
15 |
+
"train_split": "train",
|
16 |
+
"valid_split": None,
|
17 |
+
"chat_template": "none",
|
18 |
+
"column_mapping": {
|
19 |
+
"text_column": ""
|
20 |
+
},
|
21 |
+
},
|
22 |
+
"params": {
|
23 |
+
"block_size": 1024,
|
24 |
+
"model_max_length": 1024,
|
25 |
+
"epochs": 2,
|
26 |
+
"batch_size": 1,
|
27 |
+
"lr": 3e-5,
|
28 |
+
"peft": True,
|
29 |
+
"quantization": "int4",
|
30 |
+
"target_modules": "all-linear",
|
31 |
+
"padding": "left",
|
32 |
+
"optimizer": "adamw_torch",
|
33 |
+
"scheduler": "linear",
|
34 |
+
"gradient_accumulation": 8,
|
35 |
+
"mixed_precision": "bf16",
|
36 |
+
},
|
37 |
+
"hub": {
|
38 |
+
"username": "derek-thomas",
|
39 |
+
"token": os.getenv('HF_TOKEN'),
|
40 |
+
"push_to_hub": True,
|
41 |
+
},
|
42 |
+
}
|
43 |
+
|
44 |
+
# Suffix options
|
45 |
+
project_suffixes = ["RFA-gpt3-5", "RFA-mistral", "FAR-gpt3-5", "FAR-mistral", "FA"]
|
46 |
+
text_columns = ["conversation_RFA_gpt3_5", "conversation_RFA_mistral", "conversation_FAR_gpt3_5",
|
47 |
+
"conversation_FAR_mistral", "conversation_FA"]
|
48 |
+
|
49 |
+
# Directory to store generated configs
|
50 |
+
output_dir = "./autotrain_configs"
|
51 |
+
os.makedirs(output_dir, exist_ok=True)
|
52 |
+
|
53 |
+
# Generate configs and run commands
|
54 |
+
for project_suffix, text_column in zip(project_suffixes, text_columns):
|
55 |
+
# Modify the config
|
56 |
+
config = config_template.copy()
|
57 |
+
config["project_name"] = f"mistral-v03-poe-{project_suffix}"
|
58 |
+
config["data"]["column_mapping"]["text_column"] = text_column
|
59 |
+
|
60 |
+
# Save the config to a YAML file
|
61 |
+
config_path = os.path.join(output_dir, f"{text_column}.yml")
|
62 |
+
with open(config_path, "w") as f:
|
63 |
+
yaml.dump(config, f)
|
64 |
+
|
65 |
+
# Run the command
|
66 |
+
print(f"Running autotrain with config: {config_path}")
|
67 |
+
subprocess.run(["autotrain", "--config", config_path])
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
autotrain-advanced
|
2 |
+
wandb
|
3 |
+
huggingface_hub
|
4 |
+
datasets
|
5 |
+
pandas
|
6 |
+
gradio
|
7 |
+
scikit-learn
|
8 |
+
lighteval[tensorboardX,adapters]
|
9 |
+
nest_asyncio
|
10 |
+
plotly
|
11 |
+
ipywidgets
|
utilities/clean_up_spaces.ipynb
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 9,
|
6 |
+
"id": "291ab1c2-0ff0-41bf-ba56-e0c5ff2cde92",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"AUTHOR = 'your-user-name'\n",
|
11 |
+
"SEARCH_STRING = 'autotrain-autotrain-mistral'"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": 4,
|
17 |
+
"id": "1c68cc85-bed7-4270-8745-87f6fc7cb8d0",
|
18 |
+
"metadata": {},
|
19 |
+
"outputs": [
|
20 |
+
{
|
21 |
+
"data": {
|
22 |
+
"application/vnd.jupyter.widget-view+json": {
|
23 |
+
"model_id": "2bfbe27ca548439a860e624431f1c90d",
|
24 |
+
"version_major": 2,
|
25 |
+
"version_minor": 0
|
26 |
+
},
|
27 |
+
"text/plain": [
|
28 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
"metadata": {},
|
32 |
+
"output_type": "display_data"
|
33 |
+
}
|
34 |
+
],
|
35 |
+
"source": [
|
36 |
+
"from huggingface_hub import login\n",
|
37 |
+
"\n",
|
38 |
+
"login()"
|
39 |
+
]
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"cell_type": "code",
|
43 |
+
"execution_count": 6,
|
44 |
+
"id": "7f96de54-43ac-4bd4-a65f-15d4046914c7",
|
45 |
+
"metadata": {},
|
46 |
+
"outputs": [
|
47 |
+
{
|
48 |
+
"name": "stdout",
|
49 |
+
"output_type": "stream",
|
50 |
+
"text": [
|
51 |
+
"Deleting:\tderek-thomas/autotrain-autotrain-mistral-v03-prompt-experiment-mc-FA\n",
|
52 |
+
"Deleting:\tderek-thomas/autotrain-autotrain-mistral-v03-prompt-experiment-mc-FA-R\n",
|
53 |
+
"Deleting:\tderek-thomas/autotrain-autotrain-mistral-v03-prompt-experiment-mc-R-FA\n",
|
54 |
+
"Deleting:\tderek-thomas/autotrain-autotrain-mistral-v03-prompt-experiment-mc-FA-sg\n",
|
55 |
+
"Deleting:\tderek-thomas/autotrain-autotrain-mistral-v03-prompt-experiment-mc-FA-R-sg\n",
|
56 |
+
"Deleting:\tderek-thomas/autotrain-autotrain-mistral-v03-prompt-experiment-mc-R-FA-sg\n"
|
57 |
+
]
|
58 |
+
}
|
59 |
+
],
|
60 |
+
"source": [
|
61 |
+
"from huggingface_hub import list_spaces, delete_repo\n",
|
62 |
+
"\n",
|
63 |
+
"# Retrieve your spaces\n",
|
64 |
+
"spaces = list_spaces(author=AUTHOR)\n",
|
65 |
+
"\n",
|
66 |
+
"# Print space names\n",
|
67 |
+
"for space in spaces:\n",
|
68 |
+
" if SEARCH_STRING in space.id:\n",
|
69 |
+
" delete_repo(repo_id=space.id, repo_type=\"space\")\n",
|
70 |
+
" print(f\"Deleting:\\t{space.id}\")\n",
|
71 |
+
"\n"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "code",
|
76 |
+
"execution_count": 8,
|
77 |
+
"id": "a9f439dd-e8d6-44d8-b68c-eca1f6ae40ff",
|
78 |
+
"metadata": {},
|
79 |
+
"outputs": [
|
80 |
+
{
|
81 |
+
"name": "stdout",
|
82 |
+
"output_type": "stream",
|
83 |
+
"text": [
|
84 |
+
"derek-thomas/dreambooth-training-hannah\n",
|
85 |
+
"derek-thomas/dreambooth-training-hannah-v21-v2\n",
|
86 |
+
"derek-thomas/derek-thomas-dreambooth-training-hannah-v15-v3\n",
|
87 |
+
"derek-thomas/dreambooth-training-hannah-v15-v4\n",
|
88 |
+
"derek-thomas/dreambooth-training-hannah-v15-v4-1672074451\n",
|
89 |
+
"derek-thomas/dreambooth-training-hannah-v15-v5\n",
|
90 |
+
"derek-thomas/dreambooth-training-hannah-v15-v6\n",
|
91 |
+
"derek-thomas/xlm-roberta-base-finetuned-panx-de\n",
|
92 |
+
"derek-thomas/lal-parser\n",
|
93 |
+
"derek-thomas/naughtyformer\n",
|
94 |
+
"derek-thomas/ibert-quantized\n",
|
95 |
+
"derek-thomas/t5-end2end-question-generation\n",
|
96 |
+
"derek-thomas/distilhubert-finetuned-gtzan\n",
|
97 |
+
"derek-thomas/distilhubert-finetuned-gtzan-efficient\n",
|
98 |
+
"derek-thomas/Hubert_emotion-finetuned-gtzan-efficient\n",
|
99 |
+
"derek-thomas/hubert-large-ll60k-finetuned-gtzan-efficient\n",
|
100 |
+
"derek-thomas/hubert-base-ls960-finetuned-gtzan-efficient\n",
|
101 |
+
"derek-thomas/hubert-base-ls960-finetuned-gtzan-efficient-label-smoothed\n",
|
102 |
+
"derek-thomas/whisper-tiny-polyai-english\n",
|
103 |
+
"derek-thomas/speecht5_tts-finetuned_voxpopuli_lt\n",
|
104 |
+
"derek-thomas/jais-13b-chat-hf\n",
|
105 |
+
"derek-thomas/sd-class-butterflies-32\n",
|
106 |
+
"derek-thomas/speecht5_tts-finetuned_voxpopuli_hr\n",
|
107 |
+
"derek-thomas/speecht5_finetuned_voxpopuli_hr\n",
|
108 |
+
"derek-thomas/tgi-benchmark-notebooks\n",
|
109 |
+
"derek-thomas/Meta-Llama-3-8B-Instruct\n",
|
110 |
+
"derek-thomas/tgi-notebooks-optimization\n",
|
111 |
+
"derek-thomas/trained-flux\n",
|
112 |
+
"derek-thomas/hannah-dreambooth\n",
|
113 |
+
"derek-thomas/hannah-dreambooth-2\n",
|
114 |
+
"derek-thomas/hannah-book-creation\n",
|
115 |
+
"derek-thomas/hannah-dreambooth-3\n",
|
116 |
+
"derek-thomas/hannah-dreambooth-4\n",
|
117 |
+
"derek-thomas/bert-ner-openvino\n",
|
118 |
+
"derek-thomas/hannah-dreambooth-5\n",
|
119 |
+
"derek-thomas/prompt-experiment-sft-multiple-choice\n",
|
120 |
+
"derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA\n",
|
121 |
+
"Deleting:\tderek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA\n",
|
122 |
+
"derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-R-FA-sg\n",
|
123 |
+
"Deleting:\tderek-thomas/autotrain-mistral-v03-prompt-experiment-mc-R-FA-sg\n",
|
124 |
+
"derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-R\n",
|
125 |
+
"Deleting:\tderek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-R\n",
|
126 |
+
"derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-sg\n",
|
127 |
+
"Deleting:\tderek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-sg\n",
|
128 |
+
"derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-R-FA\n",
|
129 |
+
"Deleting:\tderek-thomas/autotrain-mistral-v03-prompt-experiment-mc-R-FA\n",
|
130 |
+
"derek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-R-sg\n",
|
131 |
+
"Deleting:\tderek-thomas/autotrain-mistral-v03-prompt-experiment-mc-FA-R-sg\n"
|
132 |
+
]
|
133 |
+
}
|
134 |
+
],
|
135 |
+
"source": [
|
136 |
+
"from huggingface_hub import list_models\n",
|
137 |
+
"\n",
|
138 |
+
"# Retrieve your spaces\n",
|
139 |
+
"models = list_models(author=AUTHOR)\n",
|
140 |
+
"\n",
|
141 |
+
"# Print space names\n",
|
142 |
+
"for model in models:\n",
|
143 |
+
" # print(model.id)\n",
|
144 |
+
" if 'autotrain-mistral' in model.id:\n",
|
145 |
+
" delete_repo(repo_id=model.id, repo_type=\"model\")\n",
|
146 |
+
" print(f\"Deleting:\\t{model.id}\")\n",
|
147 |
+
"\n"
|
148 |
+
]
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"cell_type": "code",
|
152 |
+
"execution_count": null,
|
153 |
+
"id": "8064d700-e880-4022-9c91-55ef60c493cd",
|
154 |
+
"metadata": {},
|
155 |
+
"outputs": [],
|
156 |
+
"source": []
|
157 |
+
}
|
158 |
+
],
|
159 |
+
"metadata": {
|
160 |
+
"kernelspec": {
|
161 |
+
"display_name": "Python 3 (ipykernel)",
|
162 |
+
"language": "python",
|
163 |
+
"name": "python3"
|
164 |
+
},
|
165 |
+
"language_info": {
|
166 |
+
"codemirror_mode": {
|
167 |
+
"name": "ipython",
|
168 |
+
"version": 3
|
169 |
+
},
|
170 |
+
"file_extension": ".py",
|
171 |
+
"mimetype": "text/x-python",
|
172 |
+
"name": "python",
|
173 |
+
"nbconvert_exporter": "python",
|
174 |
+
"pygments_lexer": "ipython3",
|
175 |
+
"version": "3.11.10"
|
176 |
+
}
|
177 |
+
},
|
178 |
+
"nbformat": 4,
|
179 |
+
"nbformat_minor": 5
|
180 |
+
}
|