Spaces:
Sleeping
Sleeping
# Venus Annotation System | |
# Author: Du Mingzhe ([email protected]) | |
# Date: 2024-09-25 | |
import uuid | |
import streamlit as st | |
import streamlit_ext as ste | |
from code_editor import code_editor | |
from datasets import load_dataset, Dataset | |
case_generation = """ | |
Given the problem description and the canonical solution, write these functions and return in the given JSON format. Import all neccessary libraries in the code. | |
Problem Description: | |
{problem_description} | |
Canonical Solution: | |
{canonical_solution} | |
{{ | |
"generate_test_case_input": "a {lang} function 'generate_test_case_input() → Turple' that randomly generate a test case input Turple from a reasonable test range. Wrap the test case input in a tuple.", | |
"serialize_input": "a {lang} function 'serialize_input(Turple) → Str' that takes the test case input {lang} Turple, and generates the serialized test case input string.", | |
"deserialize_input": "a {lang} function 'deserialize_input(Str) → Turple' that takes the serialized test case input string, and generate the {lang} test case input Turple.", | |
"serialize_output": "a {lang} function 'serialize_output(Turple) → Str' that takes the test case output {lang} Turple, and generates the serialized test case output string.", | |
"deserialize_output": "a {lang} function 'deserialize_output(Str) → Turple' that takes the serialized test case output string, and generate the {lang} test case output Turple.", | |
"entry_point": "the entry point function name of the canonical solution" | |
}} | |
Example 1: | |
Problem Description: | |
<p>Given an array of integers <code>nums</code> and an integer <code>target</code>, return <em>indices of the two numbers such that they add up to <code>target</code></em>.</p> <p>You may assume that each input would have <strong><em>exactly</em> one solution</strong>, and you may not use the <em>same</em> element twice.</p> <p>You can return the answer in any order.</p> <p> </p> <p><strong class="example">Example 1:</strong></p> <pre> <strong>Input:</strong> nums = [2,7,11,15], target = 9 <strong>Output:</strong> [0,1] <strong>Explanation:</strong> Because nums[0] + nums[1] == 9, we return [0, 1]. </pre> <p><strong class="example">Example 2:</strong></p> <pre> <strong>Input:</strong> nums = [3,2,4], target = 6 <strong>Output:</strong> [1,2] </pre> <p><strong class="example">Example 3:</strong></p> <pre> <strong>Input:</strong> nums = [3,3], target = 6 <strong>Output:</strong> [0,1] </pre> <p> </p> <p><strong>Constraints:</strong></p> <ul> <li><code>2 <= nums.length <= 10<sup>4</sup></code></li> <li><code>-10<sup>9</sup> <= nums[i] <= 10<sup>9</sup></code></li> <li><code>-10<sup>9</sup> <= target <= 10<sup>9</sup></code></li> <li><strong>Only one valid answer exists.</strong></li> </ul> <p> </p> <strong>Follow-up: </strong>Can you come up with an algorithm that is less than <code>O(n<sup>2</sup>)</code><font face="monospace"> </font>time complexity? | |
Canonical Solution: | |
class Solution: | |
def twoSum(self, nums: List[int], target: int) -> List[int]: | |
num_map = {{}} | |
for i, num in enumerate(nums): | |
complement = target - num | |
if complement in num_map: | |
return [num_map[complement], i] | |
num_map[num] = i | |
Response: | |
{{ | |
"generate_test_case_input": "import random\nfrom typing import List, Tuple\n\ndef generate_test_case_input() -> Tuple[List[int], int]:\n length = random.randint(2, 10000)\n nums = [random.randint(-10**9, 10**9) for _ in range(length)]\n idx1, idx2 = random.sample(range(length), 2)\n target = nums[idx1] + nums[idx2]\n return nums, target", | |
"serialize_input": "from typing import List, Tuple\n\ndef serialize_input(input: Tuple[List[int], int]) -> str:\n nums, target = input\n return f'{{nums}}\\n{{target}}'\n", | |
"deserialize_input": "from typing import List, Tuple\n\ndef deserialize_input(serialized: str) -> Tuple[List[int], int]:\n parts = serialized.strip().split('\\n')\n nums = eval(parts[0])\n target = int(parts[1])\n return nums, target\n", | |
"serialize_output": "from typing import List\n\ndef serialize_output(output: List[int]) -> str:\n return str(output)\n", | |
"deserialize_output": "from typing import List\n\ndef deserialize_output(serialized: str) -> List[int]:\n return eval(serialized)\n", | |
"entry_point": "twoSum" | |
}} | |
""" | |
st.title(":blue[Venus] Annotation System 🪐") | |
# Step 1: Load the problem set | |
language = ste.selectbox("Select a problem here", ['python3', 'cpp', 'rust', 'javascript', 'golang', 'java']) | |
st.write(f"Ok! let's go with [{language}]") | |
my_bar = st.progress(0, text="Loading the problem set...") | |
my_bar.progress(10, text="Loading [Elfsong/Venus]-[{language}] datasets...") | |
if "raw_ds" not in st.session_state.keys(): | |
st.session_state["raw_ds"] = load_dataset("Elfsong/Venus", language) | |
raw_ds = st.session_state["raw_ds"] | |
my_bar.progress(55, text=f"Loading [Elfsong/venus_case]-[{language}] datasets...") | |
if "case_ds" not in st.session_state.keys(): | |
st.session_state["case_ds"] = load_dataset("Elfsong/venus_case", language) | |
case_ds = st.session_state["case_ds"] | |
my_bar.progress(90, text="Filtering out the cases that already exist...") | |
if "candidates" not in st.session_state.keys(): | |
case_ds_ids = set(case_ds['train']['question_id']) | |
candidates = [raw_ds['train'][i] for i in range(len(raw_ds['train'])) if raw_ds['train'][i]['question_id'] not in case_ds_ids] | |
st.session_state["candidates"] = candidates | |
candidates = st.session_state["candidates"] | |
my_bar.progress(100, text="System Initialized Successfully 🚀") | |
# Step 2: Select the problem | |
candidates_dict = {} | |
for candidate in candidates: | |
candidate_name = str(candidate['question_id']) + '.' + str(candidate['name']) + ' [' + str(candidate['difficulty']).upper() + ']' | |
candidates_dict[candidate_name] = candidate | |
option = ste.selectbox("Select a problem here", candidates_dict.keys()) | |
example = candidates_dict[option] | |
tab1, tab2, tab3, tab4 = st.tabs(["Problem Description", "Canonical Solution", "Prompt","Test Cases Generator"]) | |
with tab1: | |
st.html(example['content']) | |
with tab2: | |
solutions_displayed = 0 | |
canonical_solutions = list() | |
for solution in example['rt_list']: | |
if "Solution" in solution['code']: | |
st.write(f"Canonical Solution {solutions_displayed + 1}") | |
st.code(solution['code']) | |
canonical_solutions.append(solution['code']) | |
solutions_displayed += 1 | |
if solutions_displayed >= 3: | |
break | |
with tab3: | |
prompt = case_generation.format(problem_description=example['content'], canonical_solution=canonical_solutions[0], lang=language) | |
st.html(prompt) | |
with tab4: | |
editor_buttons = [{ | |
"name": "Submit", | |
"feather": "Play", | |
"primary": True, | |
"hasText": True, | |
"showWithIcon": True, | |
"commands": ["submit"], | |
"style": {"bottom": "0.44rem","right": "0.4rem"} | |
}] | |
predefined_code = "def generate_test_cases():\n\tpass\n\ndef serialize_input():\n\tpass\n\ndef deserialize_input():\n\tpass\n\ndef serialize_output():\n\tpass\n\ndef deserialize_output():\n\tpass" | |
response_dict = code_editor(predefined_code, lang="python", height=20, options={"wrap": False}, buttons=editor_buttons) | |
st.write("Click 'Submit' bottom right to upload your functions.") | |
if response_dict['type'] == 'submit': | |
new_ds = Dataset.from_list([{ | |
"question_id": example['question_id'], | |
"test_case_functions": response_dict['text'], | |
}]) | |
ds_name = str(uuid.uuid1()) | |
qid = example['question_id'] | |
new_ds.push_to_hub(f"Elfsong/Venus_Anotation", f'{language}-{qid}-{ds_name}') | |
st.divider() | |
st.write("Thanks for your contribution! 🌟") | |