Spaces:
Runtime error
Runtime error
import json | |
import re | |
from datetime import datetime | |
import datacards | |
from datacards import ( | |
considerations_page, | |
considerations_summary, | |
context_page, | |
context_summary, | |
curation_page, | |
curation_summary, | |
gem_page, | |
gem_summary, | |
overview_page, | |
overview_summary, | |
results_page, | |
results_summary, | |
) | |
import streamlit as st | |
################## | |
## streamlit | |
################## | |
st.set_page_config( | |
page_title="GEM Data Card Input Form", | |
page_icon="https://avatars.githubusercontent.com/u/72612128", | |
layout="wide", | |
initial_sidebar_state="auto", | |
) | |
page_description = """ | |
# GEM Data Card Input Form | |
This application was designed to support the GEM v2 data hackathon. | |
It allows users to fill out all of the information going into the data documentation when submitting a new dataset. | |
Use the left sidebar to navigate: | |
- "**Dataset at a Glance**" shows selected information and tracks progress | |
- Each of the "**Section:**" pages opens a form for a specific section of the card | |
- Go to "**Review and Save**" when you are done to save your data card | |
""" | |
_N_FIELDS = datacards.considerations.N_FIELDS + \ | |
datacards.context.N_FIELDS + \ | |
datacards.curation.N_FIELDS + \ | |
datacards.gem.N_FIELDS + \ | |
datacards.overview.N_FIELDS + \ | |
datacards.results.N_FIELDS | |
def main(): | |
if "save_state" not in st.session_state: | |
st.session_state.save_state = {} | |
if "card_dict" not in st.session_state: | |
st.session_state.card_dict = {} | |
st.sidebar.markdown(page_description, unsafe_allow_html=True) | |
pages = { | |
"Dataset at a Glance": glance_page, | |
"Section: Dataset Overview": overview_page, | |
"Section: Dataset Curation": curation_page, | |
"Section: Dataset in GEM": gem_page, | |
"Section: Previous Results": results_page, | |
"Section: Considerations for Using Data": considerations_page, | |
"Section: Broader Social Context": context_page, | |
"Review and Save": review_page, | |
} | |
app_mode = st.sidebar.radio( | |
label="Navigation menu:", | |
options=list(pages.keys()), | |
index=0, | |
) | |
with st.sidebar.expander("Save or Load your work"): | |
# Save intermediary state | |
current_date = datetime.now().strftime( | |
"%m/%d/%Y, %H:%M:%S" | |
) | |
friendly_date = re.sub( | |
r"[^\w\s]", "_", current_date | |
).replace(" ", "_").replace("__", "_").replace("-", "") | |
st.download_button( | |
label="Save Intermediary Card State", | |
data=json.dumps(st.session_state.get("card_dict", {}), indent=2), | |
file_name=f"data_card_{friendly_date}.json", | |
) | |
# load from file | |
uploaded_file = st.file_uploader( | |
label=f"Load Intermediary Card State", | |
) | |
if uploaded_file is not None: | |
pre_card_dict = json.load(uploaded_file, encoding="utf-8") | |
def save_dict_to_save_state(saved, prefix=None): | |
prefix = [] if prefix is None else prefix | |
if isinstance(saved, dict): | |
for k, v in saved.items(): | |
save_dict_to_save_state(v, prefix + [k]) | |
else: | |
if not st.session_state.save_state.get("_".join(prefix) + "_filled", False): | |
st.session_state.save_state["_".join(prefix)] = saved | |
# TODO: don't overwrite by default | |
st.session_state.card_dict = pre_card_dict | |
save_dict_to_save_state(st.session_state.card_dict) | |
# TODO: delete file after use | |
st.markdown("#### GEM Data Card Input Form") | |
pages[app_mode]() | |
def glance_page(): | |
with st.expander("Dataset at a Glance", expanded=True): | |
dataset_summary = "" | |
dataset_summary += f"- **Dataset Website**: {st.session_state.save_state.get('overview_where_website', '*Go to `Section: Dataset Overview` to fill in*')}\n" | |
dataset_summary += f"- **Dataset Contact**: {st.session_state.save_state.get('overview_where_contact-name', '*Go to `Section: Dataset Overview` to fill in*')}\n" | |
dataset_summary += f"- **Dataset License**: {st.session_state.save_state.get('overview_languages_license', '*Go to `Section: Dataset Overview` to fill in*')}\n" | |
dataset_summary += f"- **Multilingual Dataset**: {st.session_state.save_state.get('overview_languages_is-multilingual', '*Go to `Section: Dataset Overview` to fill in*')}\n" | |
dataset_summary += f"- **Dataset Languages**: {st.session_state.save_state.get('overview_languages_language-names', '*Go to `Section: Dataset Overview` to fill in*')}\n" | |
dataset_summary += f"- **Dataset Supported Task**: {st.session_state.save_state.get('overview_languages_task', '*Go to `Section: Dataset Overview` to fill in*')}\n" | |
dataset_summary += f"- **Communicative Goal**: {st.session_state.save_state.get('overview_languages_communicative', '*Go to `Section: Dataset Overview` to fill in*')}\n" | |
dataset_summary += f"- **Language Data Origin**: {st.session_state.save_state.get('curation_language_obtained', '*Go to `Section: Dataset Curation` to fill in*')}\n" | |
dataset_summary += f"- **Annotation Data Origin**: {st.session_state.save_state.get('curation_annotations_obtained', '*Go to `Section: Dataset Curation` to fill in*')}\n" | |
dataset_summary += f"- **Likelihood of PII**: {st.session_state.save_state.get('curation_pii_has-pii', '*Go to `Section: Dataset Curation` to fill in*')}\n" | |
st.markdown(dataset_summary + "---\n") | |
num_fields = sum([len(dct) for k in st.session_state.get("card_dict", {}) for dct in st.session_state.card_dict.get(k, {}).values()]) | |
st.markdown(f"You have currently filled out **{num_fields} of {_N_FIELDS} required fields** in the data card.") | |
left_col, right_col = st.columns(2) | |
with left_col: | |
overview_summary() | |
curation_summary() | |
gem_summary() | |
with right_col: | |
results_summary() | |
considerations_summary() | |
context_summary() | |
def review_page(): | |
dataset_name = st.text_input( | |
label="Enter dataset name here", | |
) | |
if dataset_name != "": | |
friendly_name = re.sub( | |
r"[^\w\s]", " ", dataset_name.lower() | |
).strip().replace(" ", "_") | |
current_date = datetime.now().strftime( | |
"%m/%d/%Y, %H:%M:%S" | |
) | |
friendly_date = re.sub( | |
r"[^\w\s]", "_", current_date | |
).replace(" ", "_").replace("__", "_").replace("-", "") | |
dataset_file_name = f"{friendly_name}-{friendly_date}.json" | |
st.download_button( | |
label=f"Download the Dataset Card below as {dataset_file_name}", | |
data=json.dumps(st.session_state.get("card_dict", {}), indent=2), | |
file_name=dataset_file_name, | |
) | |
else: | |
st.markdown("##### Enter a dataset name above to be able to download the card!") | |
st.markdown("---\n") | |
st.write(st.session_state.get("card_dict", {})) | |
# TODO add buttons to save and download | |
if __name__ == "__main__": | |
main() | |