# Venus Annotation System # Author: Du Mingzhe (mingzhe@nus.edu.sg) # Date: 2024-09-25 import uuid import streamlit as st import streamlit_ext as ste from code_editor import code_editor from datasets import load_dataset, Dataset case_generation = """ Given the problem description and the canonical solution, write these functions and return in the given JSON format. Import all neccessary libraries in the code. Problem Description: {problem_description} Canonical Solution: {canonical_solution} {{ "generate_test_case_input": "a {lang} function 'generate_test_case_input() → Turple' that randomly generate a test case input Turple from a reasonable test range. Wrap the test case input in a tuple.", "serialize_input": "a {lang} function 'serialize_input(Turple) → Str' that takes the test case input {lang} Turple, and generates the serialized test case input string.", "deserialize_input": "a {lang} function 'deserialize_input(Str) → Turple' that takes the serialized test case input string, and generate the {lang} test case input Turple.", "serialize_output": "a {lang} function 'serialize_output(Turple) → Str' that takes the test case output {lang} Turple, and generates the serialized test case output string.", "deserialize_output": "a {lang} function 'deserialize_output(Str) → Turple' that takes the serialized test case output string, and generate the {lang} test case output Turple.", "entry_point": "the entry point function name of the canonical solution" }} Example 1: Problem Description:

Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.

You may assume that each input would have exactly one solution, and you may not use the same element twice.

You can return the answer in any order.

 

Example 1:

 Input: nums = [2,7,11,15], target = 9 Output: [0,1] Explanation: Because nums[0] + nums[1] == 9, we return [0, 1]. 

Example 2:

 Input: nums = [3,2,4], target = 6 Output: [1,2] 

Example 3:

 Input: nums = [3,3], target = 6 Output: [0,1] 

 

Constraints:

 

Follow-up: Can you come up with an algorithm that is less than O(n2) time complexity? Canonical Solution: class Solution: def twoSum(self, nums: List[int], target: int) -> List[int]: num_map = {{}} for i, num in enumerate(nums): complement = target - num if complement in num_map: return [num_map[complement], i] num_map[num] = i Response: {{ "generate_test_case_input": "import random\nfrom typing import List, Tuple\n\ndef generate_test_case_input() -> Tuple[List[int], int]:\n length = random.randint(2, 10000)\n nums = [random.randint(-10**9, 10**9) for _ in range(length)]\n idx1, idx2 = random.sample(range(length), 2)\n target = nums[idx1] + nums[idx2]\n return nums, target", "serialize_input": "from typing import List, Tuple\n\ndef serialize_input(input: Tuple[List[int], int]) -> str:\n nums, target = input\n return f'{{nums}}\\n{{target}}'\n", "deserialize_input": "from typing import List, Tuple\n\ndef deserialize_input(serialized: str) -> Tuple[List[int], int]:\n parts = serialized.strip().split('\\n')\n nums = eval(parts[0])\n target = int(parts[1])\n return nums, target\n", "serialize_output": "from typing import List\n\ndef serialize_output(output: List[int]) -> str:\n return str(output)\n", "deserialize_output": "from typing import List\n\ndef deserialize_output(serialized: str) -> List[int]:\n return eval(serialized)\n", "entry_point": "twoSum" }} """ st.title(":blue[Venus] Annotation System 🪐") # Step 1: Load the problem set language = ste.selectbox("Select a problem here", ['python3', 'cpp', 'rust', 'javascript', 'golang', 'java']) st.write(f"Ok! let's go with [{language}]") my_bar = st.progress(0, text="Loading the problem set...") my_bar.progress(10, text="Loading [Elfsong/Venus]-[{language}] datasets...") if "raw_ds" not in st.session_state.keys(): st.session_state["raw_ds"] = load_dataset("Elfsong/Venus", language) raw_ds = st.session_state["raw_ds"] my_bar.progress(55, text=f"Loading [Elfsong/venus_case]-[{language}] datasets...") if "case_ds" not in st.session_state.keys(): st.session_state["case_ds"] = load_dataset("Elfsong/venus_case", language) case_ds = st.session_state["case_ds"] my_bar.progress(90, text="Filtering out the cases that already exist...") if "candidates" not in st.session_state.keys(): case_ds_ids = set(case_ds['train']['question_id']) candidates = [raw_ds['train'][i] for i in range(len(raw_ds['train'])) if raw_ds['train'][i]['question_id'] not in case_ds_ids] st.session_state["candidates"] = candidates candidates = st.session_state["candidates"] my_bar.progress(100, text="System Initialized Successfully 🚀") # Step 2: Select the problem candidates_dict = {} for candidate in candidates: candidate_name = str(candidate['question_id']) + '.' + str(candidate['name']) + ' [' + str(candidate['difficulty']).upper() + ']' candidates_dict[candidate_name] = candidate option = ste.selectbox("Select a problem here", candidates_dict.keys()) example = candidates_dict[option] tab1, tab2, tab3, tab4 = st.tabs(["Problem Description", "Canonical Solution", "Prompt","Test Cases Generator"]) with tab1: st.html(example['content']) with tab2: solutions_displayed = 0 canonical_solutions = list() for solution in example['rt_list']: if "Solution" in solution['code']: st.write(f"Canonical Solution {solutions_displayed + 1}") st.code(solution['code']) canonical_solutions.append(solution['code']) solutions_displayed += 1 if solutions_displayed >= 3: break with tab3: prompt = case_generation.format(problem_description=example['content'], canonical_solution=canonical_solutions[0], lang=language) st.html(prompt) with tab4: editor_buttons = [{ "name": "Submit", "feather": "Play", "primary": True, "hasText": True, "showWithIcon": True, "commands": ["submit"], "style": {"bottom": "0.44rem","right": "0.4rem"} }] predefined_code = "def generate_test_cases():\n\tpass\n\ndef serialize_input():\n\tpass\n\ndef deserialize_input():\n\tpass\n\ndef serialize_output():\n\tpass\n\ndef deserialize_output():\n\tpass" response_dict = code_editor(predefined_code, lang="python", height=20, options={"wrap": False}, buttons=editor_buttons) st.write("Click 'Submit' bottom right to upload your functions.") if response_dict['type'] == 'submit': new_ds = Dataset.from_list([{ "question_id": example['question_id'], "test_case_functions": response_dict['text'], }]) ds_name = str(uuid.uuid1()) qid = example['question_id'] new_ds.push_to_hub(f"Elfsong/Venus_Anotation", f'{language}-{qid}-{ds_name}') st.divider() st.write("Thanks for your contribution! 🌟")