feat: installation
Browse files- .gitignore +0 -2
- Dockerfile +60 -0
- Makefile +50 -0
- README.md +27 -4
- app.py +37 -13
- llmdataparser/__init__.py +5 -3
- llmdataparser/base_parser.py +13 -6
- llmdataparser/math_parser.py +1 -1
- llmdataparser/mmlu_parser.py +2 -2
- notebooks/demo.ipynb +0 -77
- poetry.lock +0 -0
- pyproject.toml +33 -17
.gitignore
CHANGED
@@ -8,8 +8,6 @@ build/
|
|
8 |
dist/
|
9 |
*.egg-info/
|
10 |
|
11 |
-
# Poetry
|
12 |
-
poetry.lock
|
13 |
|
14 |
# Virtual environment
|
15 |
.env/
|
|
|
8 |
dist/
|
9 |
*.egg-info/
|
10 |
|
|
|
|
|
11 |
|
12 |
# Virtual environment
|
13 |
.env/
|
Dockerfile
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use Python 3.12 slim image as base
|
2 |
+
FROM python:3.12-slim
|
3 |
+
|
4 |
+
# Set environment variables
|
5 |
+
ENV PYTHONUNBUFFERED=1 \
|
6 |
+
POETRY_VERSION=1.7.1 \
|
7 |
+
POETRY_HOME="/opt/poetry" \
|
8 |
+
POETRY_NO_INTERACTION=1 \
|
9 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
10 |
+
GRADIO_SERVER_PORT=7860
|
11 |
+
|
12 |
+
# Set working directory
|
13 |
+
WORKDIR /app
|
14 |
+
|
15 |
+
# Create cache directories for Hugging Face
|
16 |
+
ENV HF_HOME=/home/app/.cache/huggingface
|
17 |
+
RUN mkdir -p /home/app/.cache/huggingface
|
18 |
+
|
19 |
+
# Install system dependencies
|
20 |
+
RUN apt-get update && apt-get install -y \
|
21 |
+
portaudio19-dev \
|
22 |
+
python3-pip \
|
23 |
+
gcc \
|
24 |
+
git \
|
25 |
+
&& rm -rf /var/lib/apt/lists/* \
|
26 |
+
&& apt-get clean
|
27 |
+
|
28 |
+
# Install poetry
|
29 |
+
RUN pip install poetry==${POETRY_VERSION} && \
|
30 |
+
poetry config virtualenvs.create false
|
31 |
+
|
32 |
+
# Copy dependency files first
|
33 |
+
COPY pyproject.toml poetry.lock ./
|
34 |
+
|
35 |
+
# Install dependencies using the lock file
|
36 |
+
RUN poetry install --no-dev --no-interaction --no-ansi
|
37 |
+
|
38 |
+
# Create app user and group
|
39 |
+
RUN groupadd -r app && useradd -r -g app app
|
40 |
+
|
41 |
+
# Before switching to non-root user, create and set permissions
|
42 |
+
RUN mkdir -p /home/app/.cache && \
|
43 |
+
mkdir -p /home/app/.config/matplotlib && \
|
44 |
+
chown -R app:app /home/app/.cache && \
|
45 |
+
chown -R app:app /home/app/.config
|
46 |
+
|
47 |
+
# Set matplotlib config dir
|
48 |
+
ENV MPLCONFIGDIR=/home/app/.config/matplotlib
|
49 |
+
|
50 |
+
# Switch to non-root user
|
51 |
+
USER app
|
52 |
+
|
53 |
+
# Copy the rest of the application
|
54 |
+
COPY --chown=app:app . .
|
55 |
+
|
56 |
+
# Expose the port the app runs on
|
57 |
+
EXPOSE 7860
|
58 |
+
|
59 |
+
# Run the application
|
60 |
+
CMD ["python", "app.py"]
|
Makefile
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Variables
|
2 |
+
IMAGE_NAME = llmdataparser
|
3 |
+
CONTAINER_NAME = llmdataparser
|
4 |
+
VERSION = latest
|
5 |
+
|
6 |
+
# Build the Docker image
|
7 |
+
build:
|
8 |
+
docker build -t $(IMAGE_NAME):$(VERSION) .
|
9 |
+
|
10 |
+
# Run the container
|
11 |
+
run:
|
12 |
+
docker run -d -p 7860:7860 --name $(CONTAINER_NAME) $(IMAGE_NAME):$(VERSION)
|
13 |
+
|
14 |
+
# Stop the container
|
15 |
+
stop:
|
16 |
+
docker stop $(CONTAINER_NAME)
|
17 |
+
|
18 |
+
# Remove the container
|
19 |
+
rm:
|
20 |
+
docker rm $(CONTAINER_NAME)
|
21 |
+
|
22 |
+
# Remove the image
|
23 |
+
rmi:
|
24 |
+
docker rmi $(IMAGE_NAME):$(VERSION)
|
25 |
+
|
26 |
+
# Clean everything
|
27 |
+
clean: stop rm rmi
|
28 |
+
|
29 |
+
# Build and run
|
30 |
+
up: build run
|
31 |
+
|
32 |
+
# Stop and remove container
|
33 |
+
down: stop rm
|
34 |
+
|
35 |
+
# Show container logs
|
36 |
+
logs:
|
37 |
+
docker logs $(CONTAINER_NAME)
|
38 |
+
|
39 |
+
# Enter container shell
|
40 |
+
shell:
|
41 |
+
docker exec -it $(CONTAINER_NAME) /bin/bash
|
42 |
+
|
43 |
+
# Optional: command to check container status
|
44 |
+
status:
|
45 |
+
docker ps -a | grep $(CONTAINER_NAME)
|
46 |
+
|
47 |
+
logs-follow:
|
48 |
+
docker logs -f $(CONTAINER_NAME)
|
49 |
+
|
50 |
+
.PHONY: build run stop rm rmi clean up down logs shell
|
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# LLMDataParser
|
2 |
|
3 |
-
**LLMDataParser** is a Python library that provides parsers for benchmark datasets used in evaluating Large Language Models (LLMs). It offers a unified interface for loading and parsing datasets like **MMLU
|
4 |
|
5 |
## Features
|
6 |
|
@@ -8,6 +8,7 @@
|
|
8 |
- **LLM-Agnostic**: Independent of any specific language model.
|
9 |
- **Easy to Use**: Simple methods and built-in Python types.
|
10 |
- **Extensible**: Easily add support for new datasets.
|
|
|
11 |
|
12 |
## Installation
|
13 |
|
@@ -22,7 +23,7 @@ You can install the package directly using `pip`. Even with only a `pyproject.to
|
|
22 |
cd LLMDataParser
|
23 |
```
|
24 |
|
25 |
-
|
26 |
|
27 |
```bash
|
28 |
pip install .
|
@@ -38,7 +39,7 @@ Poetry manages the virtual environment and dependencies automatically, so you do
|
|
38 |
poetry install
|
39 |
```
|
40 |
|
41 |
-
|
42 |
|
43 |
```bash
|
44 |
poetry shell
|
@@ -46,7 +47,29 @@ Poetry manages the virtual environment and dependencies automatically, so you do
|
|
46 |
|
47 |
## Available Parsers
|
48 |
|
49 |
-
- **MMLUDatasetParser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
## License
|
52 |
|
|
|
1 |
# LLMDataParser
|
2 |
|
3 |
+
**LLMDataParser** is a Python library that provides parsers for benchmark datasets used in evaluating Large Language Models (LLMs). It offers a unified interface for loading and parsing datasets like **MMLU**, **GSM8k**, and others, streamlining dataset preparation for LLM evaluation. The library aims to simplify the process of working with common LLM benchmark datasets through a consistent API.
|
4 |
|
5 |
## Features
|
6 |
|
|
|
8 |
- **LLM-Agnostic**: Independent of any specific language model.
|
9 |
- **Easy to Use**: Simple methods and built-in Python types.
|
10 |
- **Extensible**: Easily add support for new datasets.
|
11 |
+
- **Gradio**: Built-in Gradio interface for interactive dataset exploration and testing.
|
12 |
|
13 |
## Installation
|
14 |
|
|
|
23 |
cd LLMDataParser
|
24 |
```
|
25 |
|
26 |
+
1. **Install Dependencies with pip**:
|
27 |
|
28 |
```bash
|
29 |
pip install .
|
|
|
39 |
poetry install
|
40 |
```
|
41 |
|
42 |
+
1. **Activate the Virtual Environment**:
|
43 |
|
44 |
```bash
|
45 |
poetry shell
|
|
|
47 |
|
48 |
## Available Parsers
|
49 |
|
50 |
+
- **MMLUDatasetParser**
|
51 |
+
- **MMLUProDatasetParser**
|
52 |
+
- **MMLUReduxDatasetParser**
|
53 |
+
- **TMMLUPlusDatasetParser**
|
54 |
+
- **GSM8KDatasetParser**
|
55 |
+
- **MATHDatasetParser**
|
56 |
+
- **MGSMDatasetParser**
|
57 |
+
- **HumanEvalDatasetParser**
|
58 |
+
- **HumanEvalDatasetPlusParser**
|
59 |
+
- **BBHDatasetParser**
|
60 |
+
- **MBPPDatasetParser**
|
61 |
+
- **IFEvalDatasetParser**
|
62 |
+
- **TWLegalDatasetParser**
|
63 |
+
- **TMLUDatasetParser**
|
64 |
+
|
65 |
+
## Adding New Dataset Parsers
|
66 |
+
|
67 |
+
To add support for a new dataset, please refer to our detailed guide in [docs/adding_new_parser.md](docs/adding_new_parser.md). The guide includes:
|
68 |
+
|
69 |
+
- Step-by-step instructions for creating a new parser
|
70 |
+
- Code examples and templates
|
71 |
+
- Best practices and common patterns
|
72 |
+
- Testing guidelines
|
73 |
|
74 |
## License
|
75 |
|
app.py
CHANGED
@@ -1,30 +1,36 @@
|
|
1 |
import secrets
|
2 |
from functools import lru_cache
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
|
6 |
from llmdataparser import ParserRegistry
|
7 |
-
from llmdataparser.base_parser import
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
@lru_cache(maxsize=32)
|
11 |
-
def get_parser_instance(parser_name: str):
|
12 |
"""Get a cached parser instance by name."""
|
13 |
return ParserRegistry.get_parser(parser_name)
|
14 |
|
15 |
|
16 |
-
def get_available_splits(parser) -> list[str] | None:
|
17 |
"""Get available splits for the selected parser after loading."""
|
18 |
if not hasattr(parser, "split_names") or not parser.split_names:
|
19 |
return None
|
20 |
-
return parser.split_names
|
21 |
|
22 |
|
23 |
-
def get_available_tasks(parser) -> list[str]:
|
24 |
"""Get available tasks for the selected parser."""
|
25 |
if not hasattr(parser, "task_names"):
|
26 |
return ["default"]
|
27 |
-
return parser.task_names
|
28 |
|
29 |
|
30 |
def format_entry_attributes(entry: ParseEntry) -> str:
|
@@ -41,7 +47,7 @@ def format_entry_attributes(entry: ParseEntry) -> str:
|
|
41 |
|
42 |
def load_and_parse(
|
43 |
parser_name: str, task_name: str | None, split_name: str | None
|
44 |
-
) -> tuple:
|
45 |
"""Load and parse the dataset, return the first entry and available splits."""
|
46 |
try:
|
47 |
parser = get_parser_instance(parser_name)
|
@@ -72,7 +78,7 @@ def load_and_parse(
|
|
72 |
|
73 |
info = parser.__repr__()
|
74 |
if not parsed_data:
|
75 |
-
return 0, "No entries found", "", "", split_dropdown, info
|
76 |
|
77 |
# Get the first entry
|
78 |
first_entry = parsed_data[0]
|
@@ -92,7 +98,9 @@ def load_and_parse(
|
|
92 |
return 0, error_msg, "", "", "", [], ""
|
93 |
|
94 |
|
95 |
-
def update_entry(
|
|
|
|
|
96 |
"""Update the displayed entry based on the selected index."""
|
97 |
try:
|
98 |
if not parser_name:
|
@@ -120,7 +128,7 @@ def update_entry(parsed_data_index: int | None, parser_name: str):
|
|
120 |
format_entry_attributes(entry),
|
121 |
)
|
122 |
except Exception as e:
|
123 |
-
return f"Error: {str(e)}", "", ""
|
124 |
|
125 |
|
126 |
def update_parser_options(parser_name: str) -> tuple[gr.Dropdown, gr.Dropdown, str]:
|
@@ -159,7 +167,7 @@ def update_parser_options(parser_name: str) -> tuple[gr.Dropdown, gr.Dropdown, s
|
|
159 |
)
|
160 |
|
161 |
|
162 |
-
def clear_parser_cache():
|
163 |
"""Clear the parser cache."""
|
164 |
get_parser_instance.cache_clear()
|
165 |
|
@@ -242,7 +250,8 @@ def update_metric_details(metric_name: str, parser_name: str) -> str:
|
|
242 |
return f"Error loading metric details: {str(e)}"
|
243 |
|
244 |
|
245 |
-
def create_interface():
|
|
|
246 |
with gr.Blocks() as demo:
|
247 |
gr.Markdown("# LLM Evaluation Dataset Parser")
|
248 |
|
@@ -377,5 +386,20 @@ def create_interface():
|
|
377 |
|
378 |
|
379 |
if __name__ == "__main__":
|
|
|
380 |
demo = create_interface()
|
381 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import secrets
|
2 |
from functools import lru_cache
|
3 |
+
from typing import Any
|
4 |
|
5 |
import gradio as gr
|
6 |
|
7 |
from llmdataparser import ParserRegistry
|
8 |
+
from llmdataparser.base_parser import (
|
9 |
+
DatasetDescription,
|
10 |
+
DatasetParser,
|
11 |
+
EvaluationMetric,
|
12 |
+
ParseEntry,
|
13 |
+
)
|
14 |
|
15 |
|
16 |
@lru_cache(maxsize=32)
|
17 |
+
def get_parser_instance(parser_name: str) -> DatasetParser[Any]:
|
18 |
"""Get a cached parser instance by name."""
|
19 |
return ParserRegistry.get_parser(parser_name)
|
20 |
|
21 |
|
22 |
+
def get_available_splits(parser: DatasetParser[Any]) -> list[str] | None:
|
23 |
"""Get available splits for the selected parser after loading."""
|
24 |
if not hasattr(parser, "split_names") or not parser.split_names:
|
25 |
return None
|
26 |
+
return list(parser.split_names)
|
27 |
|
28 |
|
29 |
+
def get_available_tasks(parser: DatasetParser[Any]) -> list[str]:
|
30 |
"""Get available tasks for the selected parser."""
|
31 |
if not hasattr(parser, "task_names"):
|
32 |
return ["default"]
|
33 |
+
return list(parser.task_names)
|
34 |
|
35 |
|
36 |
def format_entry_attributes(entry: ParseEntry) -> str:
|
|
|
47 |
|
48 |
def load_and_parse(
|
49 |
parser_name: str, task_name: str | None, split_name: str | None
|
50 |
+
) -> tuple[int, str, str, str, str, gr.Dropdown, str]:
|
51 |
"""Load and parse the dataset, return the first entry and available splits."""
|
52 |
try:
|
53 |
parser = get_parser_instance(parser_name)
|
|
|
78 |
|
79 |
info = parser.__repr__()
|
80 |
if not parsed_data:
|
81 |
+
return 0, "No entries found", "", "", "", split_dropdown, info
|
82 |
|
83 |
# Get the first entry
|
84 |
first_entry = parsed_data[0]
|
|
|
98 |
return 0, error_msg, "", "", "", [], ""
|
99 |
|
100 |
|
101 |
+
def update_entry(
|
102 |
+
parsed_data_index: int | None, parser_name: str
|
103 |
+
) -> tuple[str, str, str, str]:
|
104 |
"""Update the displayed entry based on the selected index."""
|
105 |
try:
|
106 |
if not parser_name:
|
|
|
128 |
format_entry_attributes(entry),
|
129 |
)
|
130 |
except Exception as e:
|
131 |
+
return f"Error: {str(e)}", "", "", ""
|
132 |
|
133 |
|
134 |
def update_parser_options(parser_name: str) -> tuple[gr.Dropdown, gr.Dropdown, str]:
|
|
|
167 |
)
|
168 |
|
169 |
|
170 |
+
def clear_parser_cache() -> None:
|
171 |
"""Clear the parser cache."""
|
172 |
get_parser_instance.cache_clear()
|
173 |
|
|
|
250 |
return f"Error loading metric details: {str(e)}"
|
251 |
|
252 |
|
253 |
+
def create_interface() -> gr.Blocks:
|
254 |
+
"""Create and return the Gradio interface."""
|
255 |
with gr.Blocks() as demo:
|
256 |
gr.Markdown("# LLM Evaluation Dataset Parser")
|
257 |
|
|
|
386 |
|
387 |
|
388 |
if __name__ == "__main__":
|
389 |
+
print("Starting Gradio interface...") # Add debug logging
|
390 |
demo = create_interface()
|
391 |
+
try:
|
392 |
+
demo.launch(
|
393 |
+
server_port=7860,
|
394 |
+
auth=None,
|
395 |
+
ssl_keyfile=None,
|
396 |
+
ssl_certfile=None,
|
397 |
+
show_error=True, # Changed to True for debugging
|
398 |
+
share=False,
|
399 |
+
max_threads=40,
|
400 |
+
)
|
401 |
+
except Exception as e:
|
402 |
+
print(f"Error launching Gradio: {e}") # Add error logging
|
403 |
+
import traceback
|
404 |
+
|
405 |
+
traceback.print_exc()
|
llmdataparser/__init__.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# llmdataparser/__init__.py
|
2 |
-
from typing import Type
|
3 |
|
4 |
from .base_parser import DatasetParser
|
5 |
from .bbh_parser import BBHDatasetParser
|
@@ -31,11 +31,13 @@ class ParserRegistry:
|
|
31 |
cls._registry[name.lower()] = parser_class
|
32 |
|
33 |
@classmethod
|
34 |
-
def get_parser(cls, name: str, **kwargs) ->
|
|
|
35 |
parser_class = cls._registry.get(name.lower())
|
36 |
if parser_class is None:
|
37 |
raise ValueError(f"Parser '{name}' is not registered.")
|
38 |
-
|
|
|
39 |
|
40 |
@classmethod
|
41 |
def list_parsers(cls) -> list[str]:
|
|
|
1 |
# llmdataparser/__init__.py
|
2 |
+
from typing import Any, Type
|
3 |
|
4 |
from .base_parser import DatasetParser
|
5 |
from .bbh_parser import BBHDatasetParser
|
|
|
31 |
cls._registry[name.lower()] = parser_class
|
32 |
|
33 |
@classmethod
|
34 |
+
def get_parser(cls, name: str, **kwargs: Any) -> DatasetParser[Any]:
|
35 |
+
"""Get a parser instance by name."""
|
36 |
parser_class = cls._registry.get(name.lower())
|
37 |
if parser_class is None:
|
38 |
raise ValueError(f"Parser '{name}' is not registered.")
|
39 |
+
parser: DatasetParser[Any] = parser_class(**kwargs)
|
40 |
+
return parser
|
41 |
|
42 |
@classmethod
|
43 |
def list_parsers(cls) -> list[str]:
|
llmdataparser/base_parser.py
CHANGED
@@ -84,7 +84,7 @@ class DatasetParser(Generic[T], ABC):
|
|
84 |
Abstract base class defining the interface for all dataset parsers.
|
85 |
"""
|
86 |
|
87 |
-
def __init__(self):
|
88 |
self._parsed_data: list[T] = []
|
89 |
|
90 |
@abstractmethod
|
@@ -151,7 +151,7 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
|
|
151 |
# _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
|
152 |
_hidden_task_names: ClassVar[list[str]] = []
|
153 |
|
154 |
-
def __init__(self, system_prompt: str | None = None, **kwargs):
|
155 |
"""
|
156 |
Initialize a HuggingFaceDatasetParser.
|
157 |
|
@@ -183,7 +183,9 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
|
|
183 |
# If data_entry is provided and contains task information, use it
|
184 |
if data_entry is not None and hasattr(self, "_get_task_from_entry"):
|
185 |
try:
|
186 |
-
|
|
|
|
|
187 |
except (KeyError, AttributeError):
|
188 |
pass
|
189 |
|
@@ -207,12 +209,17 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
|
|
207 |
@staticmethod
|
208 |
@lru_cache(maxsize=3)
|
209 |
def load_dataset_cached(
|
210 |
-
data_source: str,
|
211 |
-
|
|
|
|
|
|
|
212 |
"""
|
213 |
Cached static method to load a dataset from Hugging Face.
|
214 |
"""
|
215 |
-
return datasets.load_dataset(
|
|
|
|
|
216 |
|
217 |
def parse(
|
218 |
self,
|
|
|
84 |
Abstract base class defining the interface for all dataset parsers.
|
85 |
"""
|
86 |
|
87 |
+
def __init__(self) -> None:
|
88 |
self._parsed_data: list[T] = []
|
89 |
|
90 |
@abstractmethod
|
|
|
151 |
# _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
|
152 |
_hidden_task_names: ClassVar[list[str]] = []
|
153 |
|
154 |
+
def __init__(self, system_prompt: str | None = None, **kwargs: Any) -> None:
|
155 |
"""
|
156 |
Initialize a HuggingFaceDatasetParser.
|
157 |
|
|
|
183 |
# If data_entry is provided and contains task information, use it
|
184 |
if data_entry is not None and hasattr(self, "_get_task_from_entry"):
|
185 |
try:
|
186 |
+
task = self._get_task_from_entry(data_entry)
|
187 |
+
if isinstance(task, str): # Add type checking
|
188 |
+
return task
|
189 |
except (KeyError, AttributeError):
|
190 |
pass
|
191 |
|
|
|
209 |
@staticmethod
|
210 |
@lru_cache(maxsize=3)
|
211 |
def load_dataset_cached(
|
212 |
+
data_source: str,
|
213 |
+
task_name: str = "default",
|
214 |
+
trust_remote_code: bool = True,
|
215 |
+
**kwargs: Any,
|
216 |
+
) -> datasets.Dataset:
|
217 |
"""
|
218 |
Cached static method to load a dataset from Hugging Face.
|
219 |
"""
|
220 |
+
return datasets.load_dataset(
|
221 |
+
data_source, task_name, trust_remote_code=trust_remote_code, **kwargs
|
222 |
+
)
|
223 |
|
224 |
def parse(
|
225 |
self,
|
llmdataparser/math_parser.py
CHANGED
@@ -63,7 +63,7 @@ class MATHDatasetParser(HuggingFaceDatasetParser[MATHParseEntry]):
|
|
63 |
|
64 |
def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
|
65 |
"""Get the task name from the data entry or fall back to current task."""
|
66 |
-
entry_type = data_entry.get("type")
|
67 |
if entry_type and (entry_type in self._task_names):
|
68 |
return entry_type
|
69 |
return self._current_task or self._default_task
|
|
|
63 |
|
64 |
def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
|
65 |
"""Get the task name from the data entry or fall back to current task."""
|
66 |
+
entry_type: str = data_entry.get("type", "")
|
67 |
if entry_type and (entry_type in self._task_names):
|
68 |
return entry_type
|
69 |
return self._current_task or self._default_task
|
llmdataparser/mmlu_parser.py
CHANGED
@@ -99,7 +99,7 @@ class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
|
|
99 |
|
100 |
def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
|
101 |
"""Get the task name from the data entry or default task name."""
|
102 |
-
task_name = data_entry.get("subject")
|
103 |
return task_name if task_name else (self._current_task or self._default_task)
|
104 |
|
105 |
def process_entry(
|
@@ -574,7 +574,7 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
|
|
574 |
def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
|
575 |
"""Get the task name from the data entry or default task name."""
|
576 |
if data_entry is not None:
|
577 |
-
task_name = data_entry.get("category")
|
578 |
if task_name:
|
579 |
return task_name
|
580 |
return self._current_task or self._default_task
|
|
|
99 |
|
100 |
def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
|
101 |
"""Get the task name from the data entry or default task name."""
|
102 |
+
task_name: str = data_entry.get("subject", "")
|
103 |
return task_name if task_name else (self._current_task or self._default_task)
|
104 |
|
105 |
def process_entry(
|
|
|
574 |
def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
|
575 |
"""Get the task name from the data entry or default task name."""
|
576 |
if data_entry is not None:
|
577 |
+
task_name: str = data_entry.get("category", "")
|
578 |
if task_name:
|
579 |
return task_name
|
580 |
return self._current_task or self._default_task
|
notebooks/demo.ipynb
DELETED
@@ -1,77 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": null,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"import pprint\n",
|
10 |
-
"import random"
|
11 |
-
]
|
12 |
-
},
|
13 |
-
{
|
14 |
-
"cell_type": "code",
|
15 |
-
"execution_count": null,
|
16 |
-
"metadata": {},
|
17 |
-
"outputs": [],
|
18 |
-
"source": [
|
19 |
-
"from llmdataparser import ParserRegistry\n",
|
20 |
-
"ParserRegistry.list_parsers()"
|
21 |
-
]
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"cell_type": "code",
|
25 |
-
"execution_count": null,
|
26 |
-
"metadata": {},
|
27 |
-
"outputs": [],
|
28 |
-
"source": [
|
29 |
-
"mmlu_parser = ParserRegistry.get_parser('mmlu')\n",
|
30 |
-
"mmlu_parser.load()"
|
31 |
-
]
|
32 |
-
},
|
33 |
-
{
|
34 |
-
"cell_type": "code",
|
35 |
-
"execution_count": null,
|
36 |
-
"metadata": {},
|
37 |
-
"outputs": [],
|
38 |
-
"source": [
|
39 |
-
"mmlu_parser.parse(split_names=['dev', 'test'])\n",
|
40 |
-
"parsed_data = mmlu_parser.get_parsed_data"
|
41 |
-
]
|
42 |
-
},
|
43 |
-
{
|
44 |
-
"cell_type": "code",
|
45 |
-
"execution_count": null,
|
46 |
-
"metadata": {},
|
47 |
-
"outputs": [],
|
48 |
-
"source": [
|
49 |
-
"index = random.randint(0, len(parsed_data))\n",
|
50 |
-
"print(f\"Question: \\n-------------------\\n {parsed_data[index].prompt}\")\n",
|
51 |
-
"print(\"-------------------\")\n",
|
52 |
-
"print(f\"Answer: \\n-------------------\\n{parsed_data[index].answer_letter}\")"
|
53 |
-
]
|
54 |
-
}
|
55 |
-
],
|
56 |
-
"metadata": {
|
57 |
-
"kernelspec": {
|
58 |
-
"display_name": "llmdata",
|
59 |
-
"language": "python",
|
60 |
-
"name": "python3"
|
61 |
-
},
|
62 |
-
"language_info": {
|
63 |
-
"codemirror_mode": {
|
64 |
-
"name": "ipython",
|
65 |
-
"version": 3
|
66 |
-
},
|
67 |
-
"file_extension": ".py",
|
68 |
-
"mimetype": "text/x-python",
|
69 |
-
"name": "python",
|
70 |
-
"nbconvert_exporter": "python",
|
71 |
-
"pygments_lexer": "ipython3",
|
72 |
-
"version": "3.12.7"
|
73 |
-
}
|
74 |
-
},
|
75 |
-
"nbformat": 4,
|
76 |
-
"nbformat_minor": 2
|
77 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -1,6 +1,10 @@
|
|
|
|
|
|
|
|
|
|
1 |
[tool.poetry]
|
2 |
name = "llmdataparser"
|
3 |
-
version = "
|
4 |
description = "A collection of parsers for LLM benchmark datasets like MMLU, MMLU-Pro, GSM8k, and more."
|
5 |
authors = ["Jeff Yang <[email protected]>"]
|
6 |
license = "MIT"
|
@@ -16,14 +20,23 @@ classifiers = [
|
|
16 |
"Intended Audience :: Developers"
|
17 |
]
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
[tool.poetry.dependencies]
|
20 |
-
python = ">=3.
|
21 |
pandas = "^2.0.3"
|
22 |
datasets = "^2.14.4"
|
23 |
typing-extensions = "^4.8.0"
|
24 |
ipywidgets = "^8.1.1"
|
25 |
gradio = "^4.19.2"
|
26 |
-
|
|
|
|
|
27 |
|
28 |
[tool.poetry.group.dev.dependencies]
|
29 |
pytest = "^7.0.0"
|
@@ -33,6 +46,9 @@ mypy = "^1.5.1"
|
|
33 |
pre-commit = "^3.4.0"
|
34 |
types-python-dateutil = "^2.8.19.14"
|
35 |
ipykernel = "^6.7.0"
|
|
|
|
|
|
|
36 |
|
37 |
[tool.ruff]
|
38 |
line-length = 88
|
@@ -41,20 +57,20 @@ line-length = 88
|
|
41 |
select = ["E", "F", "I"]
|
42 |
ignore = ["E501"]
|
43 |
|
44 |
-
[tool.ruff.format]
|
45 |
-
quote-style = "double"
|
46 |
-
indent-style = "space"
|
47 |
-
skip-magic-trailing-comma = false
|
48 |
|
49 |
-
[
|
50 |
-
|
51 |
-
|
|
|
52 |
|
53 |
-
[tool.
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
57 |
|
58 |
-
[tool.
|
59 |
-
|
60 |
-
|
|
|
|
1 |
+
[build-system]
|
2 |
+
requires = ["poetry-core>=1.5.0"]
|
3 |
+
build-backend = "poetry.core.masonry.api"
|
4 |
+
|
5 |
[tool.poetry]
|
6 |
name = "llmdataparser"
|
7 |
+
version = "1.0.0"
|
8 |
description = "A collection of parsers for LLM benchmark datasets like MMLU, MMLU-Pro, GSM8k, and more."
|
9 |
authors = ["Jeff Yang <[email protected]>"]
|
10 |
license = "MIT"
|
|
|
20 |
"Intended Audience :: Developers"
|
21 |
]
|
22 |
|
23 |
+
packages = [
|
24 |
+
{ include = "llmdataparser" }
|
25 |
+
]
|
26 |
+
|
27 |
+
[tool.poetry.scripts]
|
28 |
+
start = "llmdataparser.app:main"
|
29 |
+
|
30 |
[tool.poetry.dependencies]
|
31 |
+
python = ">=3.12"
|
32 |
pandas = "^2.0.3"
|
33 |
datasets = "^2.14.4"
|
34 |
typing-extensions = "^4.8.0"
|
35 |
ipywidgets = "^8.1.1"
|
36 |
gradio = "^4.19.2"
|
37 |
+
pyyaml = "^6.0.1" # Add this for configuration handling
|
38 |
+
tqdm = "^4.66.1" # Add this for progress bars
|
39 |
+
numpy = "^1.24.0" # Add this for numerical operations
|
40 |
|
41 |
[tool.poetry.group.dev.dependencies]
|
42 |
pytest = "^7.0.0"
|
|
|
46 |
pre-commit = "^3.4.0"
|
47 |
types-python-dateutil = "^2.8.19.14"
|
48 |
ipykernel = "^6.7.0"
|
49 |
+
coverage = "^7.4.1"
|
50 |
+
pytest-cov = "^4.1.0"
|
51 |
+
evaluate = "^0.4.0"
|
52 |
|
53 |
[tool.ruff]
|
54 |
line-length = 88
|
|
|
57 |
select = ["E", "F", "I"]
|
58 |
ignore = ["E501"]
|
59 |
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
[tool.isort]
|
62 |
+
profile = "black"
|
63 |
+
multi_line_output = 3
|
64 |
+
line_length = 88
|
65 |
|
66 |
+
[tool.mypy]
|
67 |
+
python_version = "3.12"
|
68 |
+
warn_return_any = true
|
69 |
+
warn_unused_configs = true
|
70 |
+
disallow_untyped_defs = true
|
71 |
+
check_untyped_defs = true
|
72 |
|
73 |
+
[tool.pytest.ini_options]
|
74 |
+
testpaths = ["tests"]
|
75 |
+
python_files = ["test_*.py"]
|
76 |
+
addopts = "-ra -q --cov=llmdataparser --cov-report=term-missing"
|