Spaces:

JeffYang52415
/

LLMEval-Dataset-Parser

Sleeping

App Files Files Community

JeffYang52415 commited on Dec 29, 2024

Commit

acfee14

unverified ·

1 Parent(s): 3d203ac

feat: installation

Browse files

Files changed (12) hide show

.gitignore +0 -2
Dockerfile +60 -0
Makefile +50 -0
README.md +27 -4
app.py +37 -13
llmdataparser/__init__.py +5 -3
llmdataparser/base_parser.py +13 -6
llmdataparser/math_parser.py +1 -1
llmdataparser/mmlu_parser.py +2 -2
notebooks/demo.ipynb +0 -77
poetry.lock +0 -0
pyproject.toml +33 -17

.gitignore CHANGED Viewed

@@ -8,8 +8,6 @@ build/
 dist/
 *.egg-info/
-# Poetry
-poetry.lock
 # Virtual environment
 .env/

 dist/
 *.egg-info/
 # Virtual environment
 .env/

Dockerfile ADDED Viewed

	@@ -0,0 +1,60 @@

+# Use Python 3.12 slim image as base
+FROM python:3.12-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    POETRY_VERSION=1.7.1 \
+    POETRY_HOME="/opt/poetry" \
+    POETRY_NO_INTERACTION=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_SERVER_PORT=7860
+# Set working directory
+WORKDIR /app
+# Create cache directories for Hugging Face
+ENV HF_HOME=/home/app/.cache/huggingface
+RUN mkdir -p /home/app/.cache/huggingface
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    portaudio19-dev \
+    python3-pip \
+    gcc \
+    git \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+# Install poetry
+RUN pip install poetry==${POETRY_VERSION} && \
+    poetry config virtualenvs.create false
+# Copy dependency files first
+COPY pyproject.toml poetry.lock ./
+# Install dependencies using the lock file
+RUN poetry install --no-dev --no-interaction --no-ansi
+# Create app user and group
+RUN groupadd -r app && useradd -r -g app app
+# Before switching to non-root user, create and set permissions
+RUN mkdir -p /home/app/.cache && \
+    mkdir -p /home/app/.config/matplotlib && \
+    chown -R app:app /home/app/.cache && \
+    chown -R app:app /home/app/.config
+# Set matplotlib config dir
+ENV MPLCONFIGDIR=/home/app/.config/matplotlib
+# Switch to non-root user
+USER app
+# Copy the rest of the application
+COPY --chown=app:app . .
+# Expose the port the app runs on
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

Makefile ADDED Viewed

	@@ -0,0 +1,50 @@

+# Variables
+IMAGE_NAME = llmdataparser
+CONTAINER_NAME = llmdataparser
+VERSION = latest
+# Build the Docker image
+build:
+	docker build -t $(IMAGE_NAME):$(VERSION) .
+# Run the container
+run:
+	docker run -d -p 7860:7860 --name $(CONTAINER_NAME) $(IMAGE_NAME):$(VERSION)
+# Stop the container
+stop:
+	docker stop $(CONTAINER_NAME)
+# Remove the container
+rm:
+	docker rm $(CONTAINER_NAME)
+# Remove the image
+rmi:
+	docker rmi $(IMAGE_NAME):$(VERSION)
+# Clean everything
+clean: stop rm rmi
+# Build and run
+up: build run
+# Stop and remove container
+down: stop rm
+# Show container logs
+logs:
+	docker logs $(CONTAINER_NAME)
+# Enter container shell
+shell:
+	docker exec -it $(CONTAINER_NAME) /bin/bash
+# Optional: command to check container status
+status:
+	docker ps -a | grep $(CONTAINER_NAME)
+logs-follow:
+	docker logs -f $(CONTAINER_NAME)
+.PHONY: build run stop rm rmi clean up down logs shell

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # LLMDataParser
-**LLMDataParser** is a Python library that provides parsers for benchmark datasets used in evaluating Large Language Models (LLMs). It offers a unified interface for loading and parsing datasets like **MMLU** and **GSM8k**, simplifying dataset preparation for LLM evaluation.
 ## Features
@@ -8,6 +8,7 @@
 - **LLM-Agnostic**: Independent of any specific language model.
 - **Easy to Use**: Simple methods and built-in Python types.
 - **Extensible**: Easily add support for new datasets.
 ## Installation
@@ -22,7 +23,7 @@ You can install the package directly using `pip`. Even with only a `pyproject.to
    cd LLMDataParser
    ```
-2. **Install Dependencies with pip**:
    ```bash
    pip install .
@@ -38,7 +39,7 @@ Poetry manages the virtual environment and dependencies automatically, so you do
    poetry install
    ```
-2. **Activate the Virtual Environment**:
    ```bash
    poetry shell
@@ -46,7 +47,29 @@ Poetry manages the virtual environment and dependencies automatically, so you do
 ## Available Parsers
-- **MMLUDatasetParser**: Parses the MMLU dataset.
 ## License

 # LLMDataParser
+**LLMDataParser** is a Python library that provides parsers for benchmark datasets used in evaluating Large Language Models (LLMs). It offers a unified interface for loading and parsing datasets like **MMLU**, **GSM8k**, and others, streamlining dataset preparation for LLM evaluation. The library aims to simplify the process of working with common LLM benchmark datasets through a consistent API.
 ## Features
 - **LLM-Agnostic**: Independent of any specific language model.
 - **Easy to Use**: Simple methods and built-in Python types.
 - **Extensible**: Easily add support for new datasets.
+- **Gradio**: Built-in Gradio interface for interactive dataset exploration and testing.
 ## Installation
    cd LLMDataParser
    ```
+1. **Install Dependencies with pip**:
    ```bash
    pip install .
    poetry install
    ```
+1. **Activate the Virtual Environment**:
    ```bash
    poetry shell
 ## Available Parsers
+- **MMLUDatasetParser**
+- **MMLUProDatasetParser**
+- **MMLUReduxDatasetParser**
+- **TMMLUPlusDatasetParser**
+- **GSM8KDatasetParser**
+- **MATHDatasetParser**
+- **MGSMDatasetParser**
+- **HumanEvalDatasetParser**
+- **HumanEvalDatasetPlusParser**
+- **BBHDatasetParser**
+- **MBPPDatasetParser**
+- **IFEvalDatasetParser**
+- **TWLegalDatasetParser**
+- **TMLUDatasetParser**
+## Adding New Dataset Parsers
+To add support for a new dataset, please refer to our detailed guide in [docs/adding_new_parser.md](docs/adding_new_parser.md). The guide includes:
+- Step-by-step instructions for creating a new parser
+- Code examples and templates
+- Best practices and common patterns
+- Testing guidelines
 ## License

app.py CHANGED Viewed

@@ -1,30 +1,36 @@
 import secrets
 from functools import lru_cache
 import gradio as gr
 from llmdataparser import ParserRegistry
-from llmdataparser.base_parser import DatasetDescription, EvaluationMetric, ParseEntry
 @lru_cache(maxsize=32)
-def get_parser_instance(parser_name: str):
     """Get a cached parser instance by name."""
     return ParserRegistry.get_parser(parser_name)
-def get_available_splits(parser) -> list[str] | None:
     """Get available splits for the selected parser after loading."""
     if not hasattr(parser, "split_names") or not parser.split_names:
         return None
-    return parser.split_names
-def get_available_tasks(parser) -> list[str]:
     """Get available tasks for the selected parser."""
     if not hasattr(parser, "task_names"):
         return ["default"]
-    return parser.task_names
 def format_entry_attributes(entry: ParseEntry) -> str:
@@ -41,7 +47,7 @@ def format_entry_attributes(entry: ParseEntry) -> str:
 def load_and_parse(
     parser_name: str, task_name: str | None, split_name: str | None
-) -> tuple:
     """Load and parse the dataset, return the first entry and available splits."""
     try:
         parser = get_parser_instance(parser_name)
@@ -72,7 +78,7 @@ def load_and_parse(
         info = parser.__repr__()
         if not parsed_data:
-            return 0, "No entries found", "", "", split_dropdown, info
         # Get the first entry
         first_entry = parsed_data[0]
@@ -92,7 +98,9 @@ def load_and_parse(
         return 0, error_msg, "", "", "", [], ""
-def update_entry(parsed_data_index: int | None, parser_name: str):
     """Update the displayed entry based on the selected index."""
     try:
         if not parser_name:
@@ -120,7 +128,7 @@ def update_entry(parsed_data_index: int | None, parser_name: str):
             format_entry_attributes(entry),
         )
     except Exception as e:
-        return f"Error: {str(e)}", "", ""
 def update_parser_options(parser_name: str) -> tuple[gr.Dropdown, gr.Dropdown, str]:
@@ -159,7 +167,7 @@ def update_parser_options(parser_name: str) -> tuple[gr.Dropdown, gr.Dropdown, s
         )
-def clear_parser_cache():
     """Clear the parser cache."""
     get_parser_instance.cache_clear()
@@ -242,7 +250,8 @@ def update_metric_details(metric_name: str, parser_name: str) -> str:
         return f"Error loading metric details: {str(e)}"
-def create_interface():
     with gr.Blocks() as demo:
         gr.Markdown("# LLM Evaluation Dataset Parser")
@@ -377,5 +386,20 @@ def create_interface():
 if __name__ == "__main__":
     demo = create_interface()
-    demo.launch(share=False)  # Enable sharing for remote access

 import secrets
 from functools import lru_cache
+from typing import Any
 import gradio as gr
 from llmdataparser import ParserRegistry
+from llmdataparser.base_parser import (
+    DatasetDescription,
+    DatasetParser,
+    EvaluationMetric,
+    ParseEntry,
+)
 @lru_cache(maxsize=32)
+def get_parser_instance(parser_name: str) -> DatasetParser[Any]:
     """Get a cached parser instance by name."""
     return ParserRegistry.get_parser(parser_name)
+def get_available_splits(parser: DatasetParser[Any]) -> list[str] | None:
     """Get available splits for the selected parser after loading."""
     if not hasattr(parser, "split_names") or not parser.split_names:
         return None
+    return list(parser.split_names)
+def get_available_tasks(parser: DatasetParser[Any]) -> list[str]:
     """Get available tasks for the selected parser."""
     if not hasattr(parser, "task_names"):
         return ["default"]
+    return list(parser.task_names)
 def format_entry_attributes(entry: ParseEntry) -> str:
 def load_and_parse(
     parser_name: str, task_name: str | None, split_name: str | None
+) -> tuple[int, str, str, str, str, gr.Dropdown, str]:
     """Load and parse the dataset, return the first entry and available splits."""
     try:
         parser = get_parser_instance(parser_name)
         info = parser.__repr__()
         if not parsed_data:
+            return 0, "No entries found", "", "", "", split_dropdown, info
         # Get the first entry
         first_entry = parsed_data[0]
         return 0, error_msg, "", "", "", [], ""
+def update_entry(
+    parsed_data_index: int | None, parser_name: str
+) -> tuple[str, str, str, str]:
     """Update the displayed entry based on the selected index."""
     try:
         if not parser_name:
             format_entry_attributes(entry),
         )
     except Exception as e:
+        return f"Error: {str(e)}", "", "", ""
 def update_parser_options(parser_name: str) -> tuple[gr.Dropdown, gr.Dropdown, str]:
         )
+def clear_parser_cache() -> None:
     """Clear the parser cache."""
     get_parser_instance.cache_clear()
         return f"Error loading metric details: {str(e)}"
+def create_interface() -> gr.Blocks:
+    """Create and return the Gradio interface."""
     with gr.Blocks() as demo:
         gr.Markdown("# LLM Evaluation Dataset Parser")
 if __name__ == "__main__":
+    print("Starting Gradio interface...")  # Add debug logging
     demo = create_interface()
+    try:
+        demo.launch(
+            server_port=7860,
+            auth=None,
+            ssl_keyfile=None,
+            ssl_certfile=None,
+            show_error=True,  # Changed to True for debugging
+            share=False,
+            max_threads=40,
+        )
+    except Exception as e:
+        print(f"Error launching Gradio: {e}")  # Add error logging
+        import traceback
+        traceback.print_exc()

llmdataparser/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # llmdataparser/__init__.py
-from typing import Type
 from .base_parser import DatasetParser
 from .bbh_parser import BBHDatasetParser
@@ -31,11 +31,13 @@ class ParserRegistry:
         cls._registry[name.lower()] = parser_class
     @classmethod
-    def get_parser(cls, name: str, **kwargs) -> Type[DatasetParser]:
         parser_class = cls._registry.get(name.lower())
         if parser_class is None:
             raise ValueError(f"Parser '{name}' is not registered.")
-        return parser_class(**kwargs)
     @classmethod
     def list_parsers(cls) -> list[str]:

 # llmdataparser/__init__.py
+from typing import Any, Type
 from .base_parser import DatasetParser
 from .bbh_parser import BBHDatasetParser
         cls._registry[name.lower()] = parser_class
     @classmethod
+    def get_parser(cls, name: str, **kwargs: Any) -> DatasetParser[Any]:
+        """Get a parser instance by name."""
         parser_class = cls._registry.get(name.lower())
         if parser_class is None:
             raise ValueError(f"Parser '{name}' is not registered.")
+        parser: DatasetParser[Any] = parser_class(**kwargs)
+        return parser
     @classmethod
     def list_parsers(cls) -> list[str]:

llmdataparser/base_parser.py CHANGED Viewed

@@ -84,7 +84,7 @@ class DatasetParser(Generic[T], ABC):
     Abstract base class defining the interface for all dataset parsers.
     """
-    def __init__(self):
         self._parsed_data: list[T] = []
     @abstractmethod
@@ -151,7 +151,7 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
     # _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
     _hidden_task_names: ClassVar[list[str]] = []
-    def __init__(self, system_prompt: str | None = None, **kwargs):
         """
         Initialize a HuggingFaceDatasetParser.
@@ -183,7 +183,9 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
         # If data_entry is provided and contains task information, use it
         if data_entry is not None and hasattr(self, "_get_task_from_entry"):
             try:
-                return self._get_task_from_entry(data_entry)
             except (KeyError, AttributeError):
                 pass
@@ -207,12 +209,17 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
     @staticmethod
     @lru_cache(maxsize=3)
     def load_dataset_cached(
-        data_source: str, task_name: str = "default", **kwargs: Any
-    ):
         """
         Cached static method to load a dataset from Hugging Face.
         """
-        return datasets.load_dataset(data_source, task_name, **kwargs)
     def parse(
         self,

     Abstract base class defining the interface for all dataset parsers.
     """
+    def __init__(self) -> None:
         self._parsed_data: list[T] = []
     @abstractmethod
     # _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
     _hidden_task_names: ClassVar[list[str]] = []
+    def __init__(self, system_prompt: str | None = None, **kwargs: Any) -> None:
         """
         Initialize a HuggingFaceDatasetParser.
         # If data_entry is provided and contains task information, use it
         if data_entry is not None and hasattr(self, "_get_task_from_entry"):
             try:
+                task = self._get_task_from_entry(data_entry)
+                if isinstance(task, str):  # Add type checking
+                    return task
             except (KeyError, AttributeError):
                 pass
     @staticmethod
     @lru_cache(maxsize=3)
     def load_dataset_cached(
+        data_source: str,
+        task_name: str = "default",
+        trust_remote_code: bool = True,
+        **kwargs: Any,
+    ) -> datasets.Dataset:
         """
         Cached static method to load a dataset from Hugging Face.
         """
+        return datasets.load_dataset(
+            data_source, task_name, trust_remote_code=trust_remote_code, **kwargs
+        )
     def parse(
         self,

llmdataparser/math_parser.py CHANGED Viewed

@@ -63,7 +63,7 @@ class MATHDatasetParser(HuggingFaceDatasetParser[MATHParseEntry]):
     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or fall back to current task."""
-        entry_type = data_entry.get("type")
         if entry_type and (entry_type in self._task_names):
             return entry_type
         return self._current_task or self._default_task

     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or fall back to current task."""
+        entry_type: str = data_entry.get("type", "")
         if entry_type and (entry_type in self._task_names):
             return entry_type
         return self._current_task or self._default_task

llmdataparser/mmlu_parser.py CHANGED Viewed

@@ -99,7 +99,7 @@ class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or default task name."""
-        task_name = data_entry.get("subject")
         return task_name if task_name else (self._current_task or self._default_task)
     def process_entry(
@@ -574,7 +574,7 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or default task name."""
         if data_entry is not None:
-            task_name = data_entry.get("category")
             if task_name:
                 return task_name
         return self._current_task or self._default_task

     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or default task name."""
+        task_name: str = data_entry.get("subject", "")
         return task_name if task_name else (self._current_task or self._default_task)
     def process_entry(
     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or default task name."""
         if data_entry is not None:
+            task_name: str = data_entry.get("category", "")
             if task_name:
                 return task_name
         return self._current_task or self._default_task

notebooks/demo.ipynb DELETED Viewed

@@ -1,77 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pprint\n",
-    "import random"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from llmdataparser import ParserRegistry\n",
-    "ParserRegistry.list_parsers()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mmlu_parser = ParserRegistry.get_parser('mmlu')\n",
-    "mmlu_parser.load()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mmlu_parser.parse(split_names=['dev', 'test'])\n",
-    "parsed_data = mmlu_parser.get_parsed_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "index = random.randint(0, len(parsed_data))\n",
-    "print(f\"Question: \\n-------------------\\n {parsed_data[index].prompt}\")\n",
-    "print(\"-------------------\")\n",
-    "print(f\"Answer: \\n-------------------\\n{parsed_data[index].answer_letter}\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "llmdata",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -1,6 +1,10 @@
 [tool.poetry]
 name = "llmdataparser"
-version = "0.1.0"
 description = "A collection of parsers for LLM benchmark datasets like MMLU, MMLU-Pro, GSM8k, and more."
 authors = ["Jeff Yang <[email protected]>"]
 license = "MIT"
@@ -16,14 +20,23 @@ classifiers = [
     "Intended Audience :: Developers"
 ]
 [tool.poetry.dependencies]
-python = ">=3.11"
 pandas = "^2.0.3"
 datasets = "^2.14.4"
 typing-extensions = "^4.8.0"
 ipywidgets = "^8.1.1"
 gradio = "^4.19.2"
-pyaudio = "^0.2.14"
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.0.0"
@@ -33,6 +46,9 @@ mypy = "^1.5.1"
 pre-commit = "^3.4.0"
 types-python-dateutil = "^2.8.19.14"
 ipykernel = "^6.7.0"
 [tool.ruff]
 line-length = 88
@@ -41,20 +57,20 @@ line-length = 88
 select = ["E", "F", "I"]
 ignore = ["E501"]
-[tool.ruff.format]
-quote-style = "double"
-indent-style = "space"
-skip-magic-trailing-comma = false
-[build-system]
-requires = ["poetry-core>=1.5.0"]
-build-backend = "poetry.core.masonry.api"
-[tool.pytest.ini_options]
-markers = [
-    "integration: marks tests as integration tests (deselect with '-m \"not integration\"')"
-]
-[tool.bandit]
-exclude_dirs = ["tests"]
-skips = ["B101"]

+[build-system]
+requires = ["poetry-core>=1.5.0"]
+build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "llmdataparser"
+version = "1.0.0"
 description = "A collection of parsers for LLM benchmark datasets like MMLU, MMLU-Pro, GSM8k, and more."
 authors = ["Jeff Yang <[email protected]>"]
 license = "MIT"
     "Intended Audience :: Developers"
 ]
+packages = [
+    { include = "llmdataparser" }
+]
+[tool.poetry.scripts]
+start = "llmdataparser.app:main"
 [tool.poetry.dependencies]
+python = ">=3.12"
 pandas = "^2.0.3"
 datasets = "^2.14.4"
 typing-extensions = "^4.8.0"
 ipywidgets = "^8.1.1"
 gradio = "^4.19.2"
+pyyaml = "^6.0.1"   # Add this for configuration handling
+tqdm = "^4.66.1"    # Add this for progress bars
+numpy = "^1.24.0"   # Add this for numerical operations
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.0.0"
 pre-commit = "^3.4.0"
 types-python-dateutil = "^2.8.19.14"
 ipykernel = "^6.7.0"
+coverage = "^7.4.1"
+pytest-cov = "^4.1.0"
+evaluate = "^0.4.0"
 [tool.ruff]
 line-length = 88
 select = ["E", "F", "I"]
 ignore = ["E501"]
+[tool.isort]
+profile = "black"
+multi_line_output = 3
+line_length = 88
+[tool.mypy]
+python_version = "3.12"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+check_untyped_defs = true
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+addopts = "-ra -q --cov=llmdataparser --cov-report=term-missing"