shlomihod commited on
Commit
8b90d81
·
1 Parent(s): d537bad

improve explanations of ux parts

Browse files
Files changed (1) hide show
  1. app.py +15 -4
app.py CHANGED
@@ -844,11 +844,11 @@ def main():
844
  is_multi_placeholder = len(st.session_state.input_columns) > 1
845
 
846
  st.write(
847
- f"To determine the inferred label, the model need to produce one of the following words:"
848
  f" {combine_labels(st.session_state.labels)}"
849
  )
850
  st.write(
851
- f"The placeholder{'s' if is_multi_placeholder else ''} available for the prompt template {'are' if is_multi_placeholder else 'is'}:"
852
  f" {combine_labels(f'{{{col}}}' for col in st.session_state.input_columns)}"
853
  )
854
 
@@ -896,11 +896,16 @@ def main():
896
  cols = st.columns(num_metric_cols)
897
  with cols[0]:
898
  st.metric("Accuracy", f"{100 * evaluation['accuracy']:.0f}%")
 
899
  with cols[1]:
900
  st.metric(
901
- "Unknown Proportion",
902
  f"{100 * evaluation['unknown_proportion']:.0f}%",
903
  )
 
 
 
 
904
  if not balancing:
905
  with cols[2]:
906
  st.metric(
@@ -910,7 +915,13 @@ def main():
910
  with cols[3]:
911
  st.metric("MCC", f"{evaluation['mcc']:.2f}")
912
 
913
- st.markdown("## Hits and Misses")
 
 
 
 
 
 
914
  st.dataframe(evaluation["hit_miss"])
915
 
916
  with st.expander("Additional Information", expanded=False):
 
844
  is_multi_placeholder = len(st.session_state.input_columns) > 1
845
 
846
  st.write(
847
+ f"To determine the inferred label of an input, the model should output one of the following words:"
848
  f" {combine_labels(st.session_state.labels)}"
849
  )
850
  st.write(
851
+ f"The input placeholder{'s' if is_multi_placeholder else ''} available for the prompt template {'are' if is_multi_placeholder else 'is'}:"
852
  f" {combine_labels(f'{{{col}}}' for col in st.session_state.input_columns)}"
853
  )
854
 
 
896
  cols = st.columns(num_metric_cols)
897
  with cols[0]:
898
  st.metric("Accuracy", f"{100 * evaluation['accuracy']:.0f}%")
899
+ st.caption("The percentage of correct inferences.")
900
  with cols[1]:
901
  st.metric(
902
+ "Unknown",
903
  f"{100 * evaluation['unknown_proportion']:.0f}%",
904
  )
905
+ st.caption(
906
+ "The percentage of inferences"
907
+ " that could not be determined based on the model output."
908
+ )
909
  if not balancing:
910
  with cols[2]:
911
  st.metric(
 
915
  with cols[3]:
916
  st.metric("MCC", f"{evaluation['mcc']:.2f}")
917
 
918
+ st.markdown("## Detailed Evaluation")
919
+ st.caption(
920
+ "A table of all examples (input and output pairs) used to evaluate the prompt template with the model (e.g., accuracy)."
921
+ " It consists of the input placeholder values, the model *output* as-is, the *inference*, and the 'ground-truth' *annotation*."
922
+ " A hit is a correct inference (*inference* is the same as *annotation*), a miss is an incorrect inference (otherwise)."
923
+ " If the inference could not be determined based on the model output, the *inference* is 'unknown'."
924
+ )
925
  st.dataframe(evaluation["hit_miss"])
926
 
927
  with st.expander("Additional Information", expanded=False):