Terry Zhuo
commited on
Commit
·
c3c5af3
1
Parent(s):
7d73426
update
Browse files- app.py +2 -2
- src/display/about.py +3 -2
- src/envs.py +1 -1
app.py
CHANGED
@@ -390,7 +390,7 @@ with main_block as demo:
|
|
390 |
gr.Markdown(
|
391 |
"""
|
392 |
**Notes:**
|
393 |
-
- For the
|
394 |
- _Hard Set_ vs _Full Set_:
|
395 |
- <u>Hard Set</u>: A subset of ~150 BigCodeBench tasks which is more user-facing and challenging.
|
396 |
- <u>Full Set</u>: The full set of 1140 BigCodeBench tasks.
|
@@ -524,7 +524,7 @@ with main_block as demo:
|
|
524 |
)
|
525 |
|
526 |
with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
|
527 |
-
gr.Markdown("## Upload your sanitized JSONL file
|
528 |
|
529 |
with gr.Row():
|
530 |
jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
|
|
|
390 |
gr.Markdown(
|
391 |
"""
|
392 |
**Notes:**
|
393 |
+
- For the limited compute, we now update the Hard Set leaderboard. (**We are open to sponsorship for more compute!**)
|
394 |
- _Hard Set_ vs _Full Set_:
|
395 |
- <u>Hard Set</u>: A subset of ~150 BigCodeBench tasks which is more user-facing and challenging.
|
396 |
- <u>Full Set</u>: The full set of 1140 BigCodeBench tasks.
|
|
|
524 |
)
|
525 |
|
526 |
with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
|
527 |
+
gr.Markdown("## Upload your [sanitized JSONL file](https://github.com/bigcode-project/bigcodebench?tab=readme-ov-file#code-post-processing) to evaluate\n\n### Hard Set Ground Truth Pass Rate: 100%\n### Full Set Ground Truth Pass Rate: 99.6%")
|
528 |
|
529 |
with gr.Row():
|
530 |
jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
|
src/display/about.py
CHANGED
@@ -143,6 +143,7 @@ CITATION_BUTTON_TEXT = r"""
|
|
143 |
"""
|
144 |
|
145 |
SUBMISSION_TEXT_3="""
|
146 |
-
## We welcome the community to request for new models to be added to the leaderboard.
|
147 |
-
##
|
|
|
148 |
"""
|
|
|
143 |
"""
|
144 |
|
145 |
SUBMISSION_TEXT_3="""
|
146 |
+
## We welcome the community to submit the evaluation results or request for new models to be added to the leaderboard.
|
147 |
+
## To submit the evaluation results, please send us your (1) raw generations, (2) sanitized generations, (3) execution logs, and (4) pass rate results to our [email](mailto:terry.[email protected]). We will review and add the results to the leaderboard as soon as possible.
|
148 |
+
## To request for the new model evaluation, please [file an issue](https://github.com/bigcode-project/bigcodebench/issues/new/choose) to add the model to the leaderboard or [start a discussion](https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard/discussions/new) in the community 🤗
|
149 |
"""
|
src/envs.py
CHANGED
@@ -4,7 +4,7 @@ from huggingface_hub import HfApi
|
|
4 |
# clone / pull the lmeh eval data
|
5 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
6 |
|
7 |
-
DATA_VERSION = "v0.1.
|
8 |
|
9 |
REPO_ID = "bigcode/bigcodebench-leaderboard"
|
10 |
QUEUE_REPO = "bigcode/bigcodebench-requests"
|
|
|
4 |
# clone / pull the lmeh eval data
|
5 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
6 |
|
7 |
+
DATA_VERSION = "v0.1.1_hf"
|
8 |
|
9 |
REPO_ID = "bigcode/bigcodebench-leaderboard"
|
10 |
QUEUE_REPO = "bigcode/bigcodebench-requests"
|