Spaces:
Runtime error
Runtime error
shlomihod
commited on
Commit
·
8b90d81
1
Parent(s):
d537bad
improve explanations of ux parts
Browse files
app.py
CHANGED
@@ -844,11 +844,11 @@ def main():
|
|
844 |
is_multi_placeholder = len(st.session_state.input_columns) > 1
|
845 |
|
846 |
st.write(
|
847 |
-
f"To determine the inferred label, the model
|
848 |
f" {combine_labels(st.session_state.labels)}"
|
849 |
)
|
850 |
st.write(
|
851 |
-
f"The placeholder{'s' if is_multi_placeholder else ''} available for the prompt template {'are' if is_multi_placeholder else 'is'}:"
|
852 |
f" {combine_labels(f'{{{col}}}' for col in st.session_state.input_columns)}"
|
853 |
)
|
854 |
|
@@ -896,11 +896,16 @@ def main():
|
|
896 |
cols = st.columns(num_metric_cols)
|
897 |
with cols[0]:
|
898 |
st.metric("Accuracy", f"{100 * evaluation['accuracy']:.0f}%")
|
|
|
899 |
with cols[1]:
|
900 |
st.metric(
|
901 |
-
"Unknown
|
902 |
f"{100 * evaluation['unknown_proportion']:.0f}%",
|
903 |
)
|
|
|
|
|
|
|
|
|
904 |
if not balancing:
|
905 |
with cols[2]:
|
906 |
st.metric(
|
@@ -910,7 +915,13 @@ def main():
|
|
910 |
with cols[3]:
|
911 |
st.metric("MCC", f"{evaluation['mcc']:.2f}")
|
912 |
|
913 |
-
st.markdown("##
|
|
|
|
|
|
|
|
|
|
|
|
|
914 |
st.dataframe(evaluation["hit_miss"])
|
915 |
|
916 |
with st.expander("Additional Information", expanded=False):
|
|
|
844 |
is_multi_placeholder = len(st.session_state.input_columns) > 1
|
845 |
|
846 |
st.write(
|
847 |
+
f"To determine the inferred label of an input, the model should output one of the following words:"
|
848 |
f" {combine_labels(st.session_state.labels)}"
|
849 |
)
|
850 |
st.write(
|
851 |
+
f"The input placeholder{'s' if is_multi_placeholder else ''} available for the prompt template {'are' if is_multi_placeholder else 'is'}:"
|
852 |
f" {combine_labels(f'{{{col}}}' for col in st.session_state.input_columns)}"
|
853 |
)
|
854 |
|
|
|
896 |
cols = st.columns(num_metric_cols)
|
897 |
with cols[0]:
|
898 |
st.metric("Accuracy", f"{100 * evaluation['accuracy']:.0f}%")
|
899 |
+
st.caption("The percentage of correct inferences.")
|
900 |
with cols[1]:
|
901 |
st.metric(
|
902 |
+
"Unknown",
|
903 |
f"{100 * evaluation['unknown_proportion']:.0f}%",
|
904 |
)
|
905 |
+
st.caption(
|
906 |
+
"The percentage of inferences"
|
907 |
+
" that could not be determined based on the model output."
|
908 |
+
)
|
909 |
if not balancing:
|
910 |
with cols[2]:
|
911 |
st.metric(
|
|
|
915 |
with cols[3]:
|
916 |
st.metric("MCC", f"{evaluation['mcc']:.2f}")
|
917 |
|
918 |
+
st.markdown("## Detailed Evaluation")
|
919 |
+
st.caption(
|
920 |
+
"A table of all examples (input and output pairs) used to evaluate the prompt template with the model (e.g., accuracy)."
|
921 |
+
" It consists of the input placeholder values, the model *output* as-is, the *inference*, and the 'ground-truth' *annotation*."
|
922 |
+
" A hit is a correct inference (*inference* is the same as *annotation*), a miss is an incorrect inference (otherwise)."
|
923 |
+
" If the inference could not be determined based on the model output, the *inference* is 'unknown'."
|
924 |
+
)
|
925 |
st.dataframe(evaluation["hit_miss"])
|
926 |
|
927 |
with st.expander("Additional Information", expanded=False):
|