Spaces:
Sleeping
Sleeping
File size: 3,101 Bytes
36e34dd 7bdce7c 36e34dd 7bdce7c 36e34dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# Venus Annotation System
# Author: Du Mingzhe ([email protected])
# Date: 2024-09-25
import uuid
import streamlit as st
import streamlit_ext as ste
from code_editor import code_editor
from datasets import load_dataset, Dataset
st.title(":blue[Venus] Annotation System 🪐")
# Step 1: Load the problem set
my_bar = st.progress(0, text="Loading the problem set...")
my_bar.progress(10, text="Loading [Elfsong/Venus] datasets...")
if "raw_ds" not in st.session_state.keys():
st.session_state["raw_ds"] = load_dataset("Elfsong/Venus", "python3")
raw_ds = st.session_state["raw_ds"]
my_bar.progress(55, text="Loading [Elfsong/venus_case] datasets...")
if "case_ds" not in st.session_state.keys():
st.session_state["case_ds"] = load_dataset("Elfsong/venus_case", "python3")
case_ds = st.session_state["case_ds"]
my_bar.progress(90, text="Filtering out the cases that already exist...")
if "candidates" not in st.session_state.keys():
case_ds_ids = set(case_ds['train']['question_id'])
candidates = [raw_ds['train'][i] for i in range(len(raw_ds['train'])) if raw_ds['train'][i]['question_id'] not in case_ds_ids]
st.session_state["candidates"] = candidates
candidates = st.session_state["candidates"]
my_bar.progress(100, text="System Initialized Successfully 🚀")
# Step 2: Select the problem
candidates_dict = {}
for candidate in candidates:
candidate_name = str(candidate['question_id']) + '.' + str(candidate['name']) + ' [' + str(candidate['difficulty']).upper() + ']'
candidates_dict[candidate_name] = candidate
option = ste.selectbox("Select a problem here", candidates_dict.keys())
example = candidates_dict[option]
tab1, tab2, tab3 = st.tabs(["Problem Description", "Canonical Solution", "Test Cases Generator"])
with tab1:
st.html(example['content'])
with tab2:
solutions_displayed = 0
for solution in example['rt_list']:
if "Solution" in solution['code']:
st.write(f"Canonical Solution {solutions_displayed + 1}")
st.code(solution['code'])
solutions_displayed += 1
if solutions_displayed >= 3:
break
with tab3:
editor_buttons = [{
"name": "Submit",
"feather": "Play",
"primary": True,
"hasText": True,
"showWithIcon": True,
"commands": ["submit"],
"style": {"bottom": "0.44rem","right": "0.4rem"}
}]
predefined_code = "def generate_test_cases():\n\tpass\n\ndef serialize_input():\n\tpass\n\ndef deserialize_input():\n\tpass\n\ndef serialize_output():\n\tpass\n\ndef deserialize_output():\n\tpass"
response_dict = code_editor(predefined_code, lang="python", height=20, options={"wrap": False}, buttons=editor_buttons)
if response_dict['type'] == 'submit':
new_ds = Dataset.from_list([{
"question_id": example['question_id'],
"test_case_functions": response_dict['text'],
}])
ds_name = str(uuid.uuid1())
new_ds.push_to_hub(f"Elfsong/Venus_Anotation", f'python3-{ds_name}')
st.write("Thanks for your contribution! 🌟")
|