Terry Zhuo commited on
Commit
c3c5af3
1 Parent(s): 7d73426
Files changed (3) hide show
  1. app.py +2 -2
  2. src/display/about.py +3 -2
  3. src/envs.py +1 -1
app.py CHANGED
@@ -390,7 +390,7 @@ with main_block as demo:
390
  gr.Markdown(
391
  """
392
  **Notes:**
393
- - For the efficiency reasons, we only display the Hard Set leaderboard.
394
  - _Hard Set_ vs _Full Set_:
395
  - <u>Hard Set</u>: A subset of ~150 BigCodeBench tasks which is more user-facing and challenging.
396
  - <u>Full Set</u>: The full set of 1140 BigCodeBench tasks.
@@ -524,7 +524,7 @@ with main_block as demo:
524
  )
525
 
526
  with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
527
- gr.Markdown("## Upload your sanitized JSONL file to evaluate (see [GitHub](https://github.com/bigcode-project/bigcodebench) for more details)\n\n### Hard Set Ground Truth Pass Rate: 100%\n### Full Set Ground Truth Pass Rate: 99.6%")
528
 
529
  with gr.Row():
530
  jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
 
390
  gr.Markdown(
391
  """
392
  **Notes:**
393
+ - For the limited compute, we now update the Hard Set leaderboard. (**We are open to sponsorship for more compute!**)
394
  - _Hard Set_ vs _Full Set_:
395
  - <u>Hard Set</u>: A subset of ~150 BigCodeBench tasks which is more user-facing and challenging.
396
  - <u>Full Set</u>: The full set of 1140 BigCodeBench tasks.
 
524
  )
525
 
526
  with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
527
+ gr.Markdown("## Upload your [sanitized JSONL file](https://github.com/bigcode-project/bigcodebench?tab=readme-ov-file#code-post-processing) to evaluate\n\n### Hard Set Ground Truth Pass Rate: 100%\n### Full Set Ground Truth Pass Rate: 99.6%")
528
 
529
  with gr.Row():
530
  jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
src/display/about.py CHANGED
@@ -143,6 +143,7 @@ CITATION_BUTTON_TEXT = r"""
143
  """
144
 
145
  SUBMISSION_TEXT_3="""
146
- ## We welcome the community to request for new models to be added to the leaderboard.
147
- ## Please [file an issue](https://github.com/bigcode-project/bigcodebench/issues/new/choose) to add the model to the leaderboard or [start a discussion](https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard/discussions/new) in the community🤗
 
148
  """
 
143
  """
144
 
145
  SUBMISSION_TEXT_3="""
146
+ ## We welcome the community to submit the evaluation results or request for new models to be added to the leaderboard.
147
+ ## To submit the evaluation results, please send us your (1) raw generations, (2) sanitized generations, (3) execution logs, and (4) pass rate results to our [email](mailto:terry.[email protected]). We will review and add the results to the leaderboard as soon as possible.
148
+ ## To request for the new model evaluation, please [file an issue](https://github.com/bigcode-project/bigcodebench/issues/new/choose) to add the model to the leaderboard or [start a discussion](https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard/discussions/new) in the community 🤗
149
  """
src/envs.py CHANGED
@@ -4,7 +4,7 @@ from huggingface_hub import HfApi
4
  # clone / pull the lmeh eval data
5
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
6
 
7
- DATA_VERSION = "v0.1.0_hf"
8
 
9
  REPO_ID = "bigcode/bigcodebench-leaderboard"
10
  QUEUE_REPO = "bigcode/bigcodebench-requests"
 
4
  # clone / pull the lmeh eval data
5
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
6
 
7
+ DATA_VERSION = "v0.1.1_hf"
8
 
9
  REPO_ID = "bigcode/bigcodebench-leaderboard"
10
  QUEUE_REPO = "bigcode/bigcodebench-requests"