Spaces:
Sleeping
Sleeping
File size: 7,868 Bytes
36e34dd 7bdce7c 36e34dd 195e9fe 36e34dd 52124d0 909894a 52124d0 36e34dd 52124d0 36e34dd 52124d0 36e34dd 52124d0 36e34dd 52124d0 36e34dd 7bdce7c 36e34dd 0902657 36e34dd 2a013e3 36e34dd 195e9fe 36e34dd 195e9fe 36e34dd 2a013e3 36e34dd 00cb9ee 195e9fe 36e34dd f834d8e 36e34dd c566e8a f5efd0f f834d8e 36e34dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
# Venus Annotation System
# Author: Du Mingzhe ([email protected])
# Date: 2024-09-25
import uuid
import streamlit as st
import streamlit_ext as ste
from code_editor import code_editor
from datasets import load_dataset, Dataset
case_generation = """
Given the problem description and the canonical solution, write these functions and return in the given JSON format. Import all neccessary libraries in the code.
Problem Description:
{problem_description}
Canonical Solution:
{canonical_solution}
{{
"generate_test_case_input": "a {lang} function 'generate_test_case_input() → Turple' that randomly generate a test case input Turple from a reasonable test range. Wrap the test case input in a tuple.",
"serialize_input": "a {lang} function 'serialize_input(Turple) → Str' that takes the test case input {lang} Turple, and generates the serialized test case input string.",
"deserialize_input": "a {lang} function 'deserialize_input(Str) → Turple' that takes the serialized test case input string, and generate the {lang} test case input Turple.",
"serialize_output": "a {lang} function 'serialize_output(Turple) → Str' that takes the test case output {lang} Turple, and generates the serialized test case output string.",
"deserialize_output": "a {lang} function 'deserialize_output(Str) → Turple' that takes the serialized test case output string, and generate the {lang} test case output Turple.",
"entry_point": "the entry point function name of the canonical solution"
}}
Example 1:
Problem Description:
<p>Given an array of integers <code>nums</code> and an integer <code>target</code>, return <em>indices of the two numbers such that they add up to <code>target</code></em>.</p> <p>You may assume that each input would have <strong><em>exactly</em> one solution</strong>, and you may not use the <em>same</em> element twice.</p> <p>You can return the answer in any order.</p> <p> </p> <p><strong class="example">Example 1:</strong></p> <pre> <strong>Input:</strong> nums = [2,7,11,15], target = 9 <strong>Output:</strong> [0,1] <strong>Explanation:</strong> Because nums[0] + nums[1] == 9, we return [0, 1]. </pre> <p><strong class="example">Example 2:</strong></p> <pre> <strong>Input:</strong> nums = [3,2,4], target = 6 <strong>Output:</strong> [1,2] </pre> <p><strong class="example">Example 3:</strong></p> <pre> <strong>Input:</strong> nums = [3,3], target = 6 <strong>Output:</strong> [0,1] </pre> <p> </p> <p><strong>Constraints:</strong></p> <ul> <li><code>2 <= nums.length <= 10<sup>4</sup></code></li> <li><code>-10<sup>9</sup> <= nums[i] <= 10<sup>9</sup></code></li> <li><code>-10<sup>9</sup> <= target <= 10<sup>9</sup></code></li> <li><strong>Only one valid answer exists.</strong></li> </ul> <p> </p> <strong>Follow-up: </strong>Can you come up with an algorithm that is less than <code>O(n<sup>2</sup>)</code><font face="monospace"> </font>time complexity?
Canonical Solution:
class Solution:
def twoSum(self, nums: List[int], target: int) -> List[int]:
num_map = {{}}
for i, num in enumerate(nums):
complement = target - num
if complement in num_map:
return [num_map[complement], i]
num_map[num] = i
Response:
{{
"generate_test_case_input": "import random\nfrom typing import List, Tuple\n\ndef generate_test_case_input() -> Tuple[List[int], int]:\n length = random.randint(2, 10000)\n nums = [random.randint(-10**9, 10**9) for _ in range(length)]\n idx1, idx2 = random.sample(range(length), 2)\n target = nums[idx1] + nums[idx2]\n return nums, target",
"serialize_input": "from typing import List, Tuple\n\ndef serialize_input(input: Tuple[List[int], int]) -> str:\n nums, target = input\n return f'{{nums}}\\n{{target}}'\n",
"deserialize_input": "from typing import List, Tuple\n\ndef deserialize_input(serialized: str) -> Tuple[List[int], int]:\n parts = serialized.strip().split('\\n')\n nums = eval(parts[0])\n target = int(parts[1])\n return nums, target\n",
"serialize_output": "from typing import List\n\ndef serialize_output(output: List[int]) -> str:\n return str(output)\n",
"deserialize_output": "from typing import List\n\ndef deserialize_output(serialized: str) -> List[int]:\n return eval(serialized)\n",
"entry_point": "twoSum"
}}
"""
st.title(":blue[Venus] Annotation System 🪐")
# Step 1: Load the problem set
language = ste.selectbox("Select a problem here", ['python3', 'cpp', 'rust', 'javascript', 'golang', 'java'])
st.write(f"Ok! let's go with [{language}]")
my_bar = st.progress(0, text="Loading the problem set...")
my_bar.progress(10, text="Loading [Elfsong/Venus]-[{language}] datasets...")
if "raw_ds" not in st.session_state.keys():
st.session_state["raw_ds"] = load_dataset("Elfsong/Venus", language)
raw_ds = st.session_state["raw_ds"]
my_bar.progress(55, text=f"Loading [Elfsong/venus_case]-[{language}] datasets...")
if "case_ds" not in st.session_state.keys():
st.session_state["case_ds"] = load_dataset("Elfsong/venus_case", language)
case_ds = st.session_state["case_ds"]
my_bar.progress(90, text="Filtering out the cases that already exist...")
if "candidates" not in st.session_state.keys():
case_ds_ids = set(case_ds['train']['question_id'])
candidates = [raw_ds['train'][i] for i in range(len(raw_ds['train'])) if raw_ds['train'][i]['question_id'] not in case_ds_ids]
st.session_state["candidates"] = candidates
candidates = st.session_state["candidates"]
my_bar.progress(100, text="System Initialized Successfully 🚀")
# Step 2: Select the problem
candidates_dict = {}
for candidate in candidates:
candidate_name = str(candidate['question_id']) + '.' + str(candidate['name']) + ' [' + str(candidate['difficulty']).upper() + ']'
candidates_dict[candidate_name] = candidate
option = ste.selectbox("Select a problem here", candidates_dict.keys())
example = candidates_dict[option]
tab1, tab2, tab3, tab4 = st.tabs(["Problem Description", "Canonical Solution", "Prompt","Test Cases Generator"])
with tab1:
st.html(example['content'])
with tab2:
solutions_displayed = 0
canonical_solutions = list()
for solution in example['rt_list']:
if "Solution" in solution['code']:
st.write(f"Canonical Solution {solutions_displayed + 1}")
st.code(solution['code'])
canonical_solutions.append(solution['code'])
solutions_displayed += 1
if solutions_displayed >= 3:
break
with tab3:
prompt = case_generation.format(problem_description=example['content'], canonical_solution=canonical_solutions[0], lang=language)
st.html(prompt)
with tab4:
editor_buttons = [{
"name": "Submit",
"feather": "Play",
"primary": True,
"hasText": True,
"showWithIcon": True,
"commands": ["submit"],
"style": {"bottom": "0.44rem","right": "0.4rem"}
}]
predefined_code = "def generate_test_cases():\n\tpass\n\ndef serialize_input():\n\tpass\n\ndef deserialize_input():\n\tpass\n\ndef serialize_output():\n\tpass\n\ndef deserialize_output():\n\tpass"
response_dict = code_editor(predefined_code, lang="python", height=20, options={"wrap": False}, buttons=editor_buttons)
st.write("Click 'Submit' bottom right to upload your functions.")
if response_dict['type'] == 'submit':
new_ds = Dataset.from_list([{
"question_id": example['question_id'],
"test_case_functions": response_dict['text'],
}])
ds_name = str(uuid.uuid1())
qid = example['question_id']
new_ds.push_to_hub(f"Elfsong/Venus_Anotation", f'{language}-{qid}-{ds_name}')
st.divider()
st.write("Thanks for your contribution! 🌟")
|