File size: 7,056 Bytes
ac6c40f
 
57616af
ac6c40f
37b8c09
9994065
37b8c09
 
 
 
 
9994065
37b8c09
 
9994065
37b8c09
ac6c40f
 
37b8c09
 
 
57616af
 
 
37b8c09
b03f385
37b8c09
 
 
 
 
 
 
 
 
e0a15f4
 
 
 
 
 
 
 
 
 
 
 
05d58bc
9999db9
05d58bc
 
37b8c09
 
 
 
 
9994065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37b8c09
 
57616af
 
 
37b8c09
 
 
 
 
 
 
 
 
 
 
ac6c40f
9994065
ac6c40f
57616af
 
 
 
 
 
37b8c09
57616af
9994065
57616af
37b8c09
9994065
37b8c09
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import streamlit as st

from .streamlit_utils import make_text_input

from .streamlit_utils import (
    make_multiselect,
    make_text_area,
    make_radio,
)

N_FIELDS_RATIONALE = 5
N_FIELDS_CURATION = 6
N_FIELDS_STARTING = 2

N_FIELDS = N_FIELDS_RATIONALE + N_FIELDS_CURATION + N_FIELDS_STARTING


def gem_page():
    st.session_state.card_dict["gem"] = st.session_state.card_dict.get("gem", {})
    with st.expander("Rationale", expanded=False):
        key_pref = ["gem", "rationale"]
        st.session_state.card_dict["gem"]["rationale"] = st.session_state.card_dict[
            "gem"
        ].get("rationale", {})
        make_text_area(
            label="What does this dataset contribute toward better generation evaluation and why is it part of GEM?",
            key_list=key_pref + ["contribution"],
            help="Describe briefly what makes this dataset an interesting target for NLG evaluations and why it is part of GEM",
        )
        make_radio(
            label="Do other datasets for the high level task exist?",
            options=["no", "yes"],
            key_list=key_pref + ["sole-task-dataset"],
            help="for example, is this the only summarization dataset proposed in GEM",
        )
        if "yes" in st.session_state.card_dict["gem"]["rationale"].get("sole-task-dataset", []):
            make_radio(
                label="Does this dataset cover other languages than other datasets for the same task?",
                options=["no", "yes"],
                key_list=key_pref + ["sole-language-task-dataset"],
                help="for example, is this the only summarization dataset proposed in GEM to have French text?",
            )
            make_text_area(
                label="What else sets this dataset apart from other similar datasets in GEM?",
                key_list=key_pref + ["distinction-description"],
                help="Describe briefly for each similar dataset (same task/languages) what sets this one apart",
            )
        else:
            st.session_state.card_dict["gem"]["rationale"]["sole-language-task-dataset"] = "N/A"
            st.session_state.card_dict["gem"]["rationale"]["distinction-description"] = "N/A"

        make_text_area(
            label="What aspect of model ability can be measured with this dataset?",
            key_list=key_pref + ["model-ability"],
            help="What kind of abilities should a model exhibit that performs well on the task of this dataset (e.g., reasoning capability, morphological inflection)?",
        )

    with st.expander("GEM Additional Curation", expanded=False):
        key_pref = ["gem", "curation"]
        st.session_state.card_dict["gem"]["curation"] = st.session_state.card_dict[
            "gem"
        ].get("curation", {})
        make_radio(
            label="Has the GEM version of the dataset been modified in any way (data, processing, splits) from the original curated data?",
            options=["no", "yes"],
            key_list=key_pref+["has-additional-curation"],
        )
        if st.session_state.card_dict["gem"]["curation"]["has-additional-curation"] == "yes":
            make_multiselect(
                label="What changes have been made to he original dataset?",
                options=["data points added", "data points removed", "data points modified", "annotations added", "other"],
                key_list=key_pref+["modification-types"],
            )
            make_text_area(
                label="For each of these changes, described them in more details and provided the intended purpose of the modification",
                key_list=key_pref+["modification-description"],
            )
            make_radio(
                label="Does GEM provide additional splits to the dataset?",
                options=["no", "yes"],
                key_list=key_pref+["has-additional-splits"],
            )
            if st.session_state.card_dict["gem"]["curation"]["has-additional-splits"] == "yes":
                make_text_area(
                    label="Describe how the new splits were created",
                    key_list=key_pref+["additional-splits-description"],
                )
                make_text_area(
                    label="What aspects of the model's generation capacities were the splits created to test?",
                    key_list=key_pref+["additional-splits-capacicites"],
                )
            else:
                st.session_state.card_dict["gem"]["curation"]["additional-splits-description"] = "N/A"
                st.session_state.card_dict["gem"]["curation"]["additional-splits-capacicites"] = "N/A"
        else:
            st.session_state.card_dict["gem"]["curation"]["modification-types"] = []
            st.session_state.card_dict["gem"]["curation"]["modification-description"] = "N/A"
            st.session_state.card_dict["gem"]["curation"]["has-additional-splits"] = "no"
            st.session_state.card_dict["gem"]["curation"]["additional-splits-description"] = "N/A"
            st.session_state.card_dict["gem"]["curation"]["additional-splits-capacicites"] = "N/A"

    with st.expander("Getting Started", expanded=False):
        key_pref = ["gem", "starting"]
        st.session_state.card_dict["gem"]["starting"] = st.session_state.card_dict[
            "gem"
        ].get("starting", {})
        make_text_area(
            label="Getting started with in-depth research on the task. Add relevant pointers to resources that researchers can consult when they want to get started digging deeper into the task.",
            key_list=key_pref + ["research-pointers"],
            help=" These can include blog posts, research papers, literature surveys, etc. You can also link to tutorials on the GEM website.",
        )
        make_text_area(
            label="Technical terms used in this card and the dataset and their definitions",
            key_list=key_pref + ["technical-terms"],
            help="Provide a brief definition of technical terms that are unique to this dataset",
        )



def gem_summary():
    total_filled = sum(
        [len(dct) for dct in st.session_state.card_dict.get("gem", {}).values()]
    )
    with st.expander(
        f"Dataset in GEM Completion - {total_filled} of {N_FIELDS}", expanded=False
    ):
        completion_markdown = ""
        completion_markdown += (
            f"- **Overall completion:**\n  - {total_filled} of {N_FIELDS} fields\n"
        )
        completion_markdown += f"- **Sub-section - Rationale:**\n  - {len(st.session_state.card_dict.get('gem', {}).get('rationale', {}))} of {N_FIELDS_RATIONALE} fields\n"
        completion_markdown += f"- **Sub-section - GEM Additional Curation:**\n  - {len(st.session_state.card_dict.get('gem', {}).get('curation', {}))} of {N_FIELDS_CURATION} fields\n"
        completion_markdown += f"- **Sub-section - Getting Started:**\n  - {len(st.session_state.card_dict.get('gem', {}).get('starting', {}))} of {N_FIELDS_STARTING} fields\n"
        st.markdown(completion_markdown)