Spaces:

bigcode
/

bigcodebench-leaderboard

Running

Terry Zhuo commited on Aug 20, 2024

Commit

c3c5af3

1 Parent(s): 7d73426

update

Files changed (3) hide show

app.py CHANGED Viewed

@@ -390,7 +390,7 @@ with main_block as demo:
                 gr.Markdown(
                     """
                 **Notes:**
-                - For the efficiency reasons, we only display the Hard Set leaderboard.
                 - _Hard Set_ vs _Full Set_:
                     - <u>Hard Set</u>: A subset of ~150 BigCodeBench tasks which is more user-facing and challenging.
                     - <u>Full Set</u>: The full set of 1140 BigCodeBench tasks.
@@ -524,7 +524,7 @@ with main_block as demo:
             )
         with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
-            gr.Markdown("## Upload your sanitized JSONL file to evaluate (see [GitHub](https://github.com/bigcode-project/bigcodebench) for more details)\n\n### Hard Set Ground Truth Pass Rate: 100%\n### Full Set Ground Truth Pass Rate: 99.6%")
             with gr.Row():
                 jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])

                 gr.Markdown(
                     """
                 **Notes:**
+                - For the limited compute, we now update the Hard Set leaderboard. (**We are open to sponsorship for more compute!**)
                 - _Hard Set_ vs _Full Set_:
                     - <u>Hard Set</u>: A subset of ~150 BigCodeBench tasks which is more user-facing and challenging.
                     - <u>Full Set</u>: The full set of 1140 BigCodeBench tasks.
             )
         with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
+            gr.Markdown("## Upload your [sanitized JSONL file](https://github.com/bigcode-project/bigcodebench?tab=readme-ov-file#code-post-processing) to evaluate\n\n### Hard Set Ground Truth Pass Rate: 100%\n### Full Set Ground Truth Pass Rate: 99.6%")
             with gr.Row():
                 jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])

src/display/about.py CHANGED Viewed

@@ -143,6 +143,7 @@ CITATION_BUTTON_TEXT = r"""
 """
 SUBMISSION_TEXT_3="""
-## We welcome the community to request for new models to be added to the leaderboard.
-## Please [file an issue](https://github.com/bigcode-project/bigcodebench/issues/new/choose) to add the model to the leaderboard or [start a discussion](https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard/discussions/new) in the community🤗
 """

 """
 SUBMISSION_TEXT_3="""
+## We welcome the community to submit the evaluation results or request for new models to be added to the leaderboard.
+## To submit the evaluation results, please send us your (1) raw generations, (2) sanitized generations, (3) execution logs, and (4) pass rate results to our [email](mailto:terry.[email protected]). We will review and add the results to the leaderboard as soon as possible.
+## To request for the new model evaluation, please [file an issue](https://github.com/bigcode-project/bigcodebench/issues/new/choose) to add the model to the leaderboard or [start a discussion](https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard/discussions/new) in the community 🤗
 """

src/envs.py CHANGED Viewed

@@ -4,7 +4,7 @@ from huggingface_hub import HfApi
 # clone / pull the lmeh eval data
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-DATA_VERSION = "v0.1.0_hf"
 REPO_ID = "bigcode/bigcodebench-leaderboard"
 QUEUE_REPO = "bigcode/bigcodebench-requests"

 # clone / pull the lmeh eval data
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+DATA_VERSION = "v0.1.1_hf"
 REPO_ID = "bigcode/bigcodebench-leaderboard"
 QUEUE_REPO = "bigcode/bigcodebench-requests"