File size: 7,868 Bytes
36e34dd
 
 
 
 
 
7bdce7c
36e34dd
 
 
195e9fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36e34dd
 
 
 
52124d0
909894a
52124d0
36e34dd
 
52124d0
36e34dd
52124d0
36e34dd
 
52124d0
36e34dd
52124d0
36e34dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bdce7c
36e34dd
 
0902657
36e34dd
 
 
2a013e3
36e34dd
 
195e9fe
36e34dd
 
 
 
195e9fe
36e34dd
 
 
2a013e3
36e34dd
00cb9ee
 
195e9fe
 
36e34dd
 
 
 
 
 
 
 
 
 
 
f834d8e
36e34dd
 
 
 
 
 
 
c566e8a
f5efd0f
f834d8e
36e34dd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Venus Annotation System
# Author: Du Mingzhe ([email protected])
# Date: 2024-09-25

import uuid
import streamlit as st
import streamlit_ext as ste
from code_editor import code_editor
from datasets import load_dataset, Dataset

case_generation = """
Given the problem description and the canonical solution, write these functions and return in the given JSON format. Import all neccessary libraries in the code.

Problem Description:
{problem_description}

Canonical Solution:
{canonical_solution}

{{
	"generate_test_case_input": "a {lang} function 'generate_test_case_input() → Turple' that randomly generate a test case input Turple from a reasonable test range. Wrap the test case input in a tuple.", 
	"serialize_input": "a {lang} function 'serialize_input(Turple) → Str' that takes the test case input {lang} Turple, and generates the serialized test case input string.", 
	"deserialize_input": "a {lang} function 'deserialize_input(Str) → Turple' that takes the serialized test case input string, and generate the {lang} test case input Turple.", 
	"serialize_output": "a {lang} function 'serialize_output(Turple) → Str' that takes the test case output {lang} Turple, and generates the serialized test case output string.", 
	"deserialize_output": "a {lang} function 'deserialize_output(Str) → Turple' that takes the serialized test case output string, and generate the {lang} test case output Turple.", 
	"entry_point": "the entry point function name of the canonical solution"
}}

Example 1:
Problem Description:
<p>Given an array of integers <code>nums</code>&nbsp;and an integer <code>target</code>, return <em>indices of the two numbers such that they add up to <code>target</code></em>.</p> <p>You may assume that each input would have <strong><em>exactly</em> one solution</strong>, and you may not use the <em>same</em> element twice.</p> <p>You can return the answer in any order.</p> <p>&nbsp;</p> <p><strong class="example">Example 1:</strong></p> <pre> <strong>Input:</strong> nums = [2,7,11,15], target = 9 <strong>Output:</strong> [0,1] <strong>Explanation:</strong> Because nums[0] + nums[1] == 9, we return [0, 1]. </pre> <p><strong class="example">Example 2:</strong></p> <pre> <strong>Input:</strong> nums = [3,2,4], target = 6 <strong>Output:</strong> [1,2] </pre> <p><strong class="example">Example 3:</strong></p> <pre> <strong>Input:</strong> nums = [3,3], target = 6 <strong>Output:</strong> [0,1] </pre> <p>&nbsp;</p> <p><strong>Constraints:</strong></p> <ul> <li><code>2 &lt;= nums.length &lt;= 10<sup>4</sup></code></li> <li><code>-10<sup>9</sup> &lt;= nums[i] &lt;= 10<sup>9</sup></code></li> <li><code>-10<sup>9</sup> &lt;= target &lt;= 10<sup>9</sup></code></li> <li><strong>Only one valid answer exists.</strong></li> </ul> <p>&nbsp;</p> <strong>Follow-up:&nbsp;</strong>Can you come up with an algorithm that is less than <code>O(n<sup>2</sup>)</code><font face="monospace">&nbsp;</font>time complexity?

Canonical Solution:
class Solution:
    def twoSum(self, nums: List[int], target: int) -> List[int]:
        num_map = {{}}
        for i, num in enumerate(nums):
            complement = target - num
            if complement in num_map:
                return [num_map[complement], i]
            num_map[num] = i

Response:
{{
    "generate_test_case_input": "import random\nfrom typing import List, Tuple\n\ndef generate_test_case_input() -> Tuple[List[int], int]:\n    length = random.randint(2, 10000)\n    nums = [random.randint(-10**9, 10**9) for _ in range(length)]\n    idx1, idx2 = random.sample(range(length), 2)\n    target = nums[idx1] + nums[idx2]\n    return nums, target",
    "serialize_input": "from typing import List, Tuple\n\ndef serialize_input(input: Tuple[List[int], int]) -> str:\n    nums, target = input\n    return f'{{nums}}\\n{{target}}'\n",
    "deserialize_input": "from typing import List, Tuple\n\ndef deserialize_input(serialized: str) -> Tuple[List[int], int]:\n    parts = serialized.strip().split('\\n')\n    nums = eval(parts[0])\n    target = int(parts[1])\n    return nums, target\n",
    "serialize_output": "from typing import List\n\ndef serialize_output(output: List[int]) -> str:\n    return str(output)\n",
    "deserialize_output": "from typing import List\n\ndef deserialize_output(serialized: str) -> List[int]:\n    return eval(serialized)\n",
    "entry_point": "twoSum"
}}
"""


st.title(":blue[Venus] Annotation System 🪐")

# Step 1: Load the problem set
language = ste.selectbox("Select a problem here", ['python3', 'cpp', 'rust', 'javascript', 'golang', 'java'])
st.write(f"Ok! let's go with [{language}]")

my_bar = st.progress(0, text="Loading the problem set...")

my_bar.progress(10, text="Loading [Elfsong/Venus]-[{language}] datasets...")
if "raw_ds" not in st.session_state.keys():
    st.session_state["raw_ds"] = load_dataset("Elfsong/Venus", language)
raw_ds = st.session_state["raw_ds"]

my_bar.progress(55, text=f"Loading [Elfsong/venus_case]-[{language}] datasets...")
if "case_ds" not in st.session_state.keys():
    st.session_state["case_ds"] = load_dataset("Elfsong/venus_case", language)
case_ds = st.session_state["case_ds"]

my_bar.progress(90, text="Filtering out the cases that already exist...")
if "candidates" not in st.session_state.keys():
    case_ds_ids = set(case_ds['train']['question_id'])
    candidates = [raw_ds['train'][i] for i in range(len(raw_ds['train'])) if raw_ds['train'][i]['question_id'] not in case_ds_ids]
    st.session_state["candidates"] = candidates
candidates = st.session_state["candidates"]

my_bar.progress(100, text="System Initialized Successfully 🚀")
    
# Step 2: Select the problem
candidates_dict = {}
for candidate in candidates:
    candidate_name = str(candidate['question_id']) + '.' + str(candidate['name']) + ' [' + str(candidate['difficulty']).upper() + ']'
    candidates_dict[candidate_name] = candidate
option = ste.selectbox("Select a problem here", candidates_dict.keys())
example = candidates_dict[option]

tab1, tab2, tab3, tab4 = st.tabs(["Problem Description", "Canonical Solution", "Prompt","Test Cases Generator"])

with tab1:
    st.html(example['content'])
    
with tab2:
    solutions_displayed = 0
    canonical_solutions = list()
    for solution in example['rt_list']:
        if "Solution" in solution['code']:
            st.write(f"Canonical Solution {solutions_displayed + 1}")
            st.code(solution['code'])
            canonical_solutions.append(solution['code'])
            solutions_displayed += 1
        if solutions_displayed >= 3:
            break
            
with tab3:
    prompt = case_generation.format(problem_description=example['content'], canonical_solution=canonical_solutions[0], lang=language)
    st.html(prompt)
    
with tab4:
    editor_buttons = [{
        "name": "Submit", 
        "feather": "Play",
        "primary": True, 
        "hasText": True, 
        "showWithIcon": True, 
        "commands": ["submit"], 
        "style": {"bottom": "0.44rem","right": "0.4rem"}
    }]
    predefined_code = "def generate_test_cases():\n\tpass\n\ndef serialize_input():\n\tpass\n\ndef deserialize_input():\n\tpass\n\ndef serialize_output():\n\tpass\n\ndef deserialize_output():\n\tpass"
    response_dict = code_editor(predefined_code, lang="python", height=20, options={"wrap": False}, buttons=editor_buttons)
    st.write("Click 'Submit' bottom right to upload your functions.")
    if response_dict['type'] == 'submit':
        new_ds = Dataset.from_list([{
            "question_id": example['question_id'],
            "test_case_functions": response_dict['text'],
        }])
        
        ds_name = str(uuid.uuid1())
        qid = example['question_id']
        new_ds.push_to_hub(f"Elfsong/Venus_Anotation", f'{language}-{qid}-{ds_name}')
        st.divider()
        st.write("Thanks for your contribution! 🌟")