user name commited on
Commit
5e86dd4
·
1 Parent(s): bfbf195

add factuality, faithfulness scores

Browse files
Files changed (2) hide show
  1. src/display/utils.py +3 -0
  2. src/populate.py +10 -1
src/display/utils.py CHANGED
@@ -74,6 +74,9 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
74
  #Scores
75
  # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
76
 
 
 
 
77
  for task in Tasks:
78
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
79
 
 
74
  #Scores
75
  # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
76
 
77
+ auto_eval_column_dict.append(["Faithfulness", ColumnContent, ColumnContent("Faithfulness", "number", True)])
78
+ auto_eval_column_dict.append(["Factuality", ColumnContent, ColumnContent("Factuality", "number", True)])
79
+
80
  for task in Tasks:
81
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
82
 
src/populate.py CHANGED
@@ -60,11 +60,20 @@ def get_leaderboard_df(results_path: str,
60
  # if AutoEvalColumn.average.name in df:
61
  # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
62
 
63
- df = df[cols].round(decimals=2)
 
 
 
64
 
65
  # filter out if any of the benchmarks have not been produced
66
  df = df[has_no_nan_values(df, benchmark_cols)]
67
 
 
 
 
 
 
 
68
  return raw_data, df
69
 
70
 
 
60
  # if AutoEvalColumn.average.name in df:
61
  # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
62
 
63
+ cols_mod = copy.deepcopy(cols)
64
+ cols_mod.remove('Faithfulness')
65
+ cols_mod.remove('Factuality')
66
+ df = df[cols_mod]#.round(decimals=2)
67
 
68
  # filter out if any of the benchmarks have not been produced
69
  df = df[has_no_nan_values(df, benchmark_cols)]
70
 
71
+ Factuality_score = df[factuality_tasks].mean(axis=1)
72
+ Faithfulness_score = df[faithfulness_tasks].mean(axis=1)
73
+ df.insert(2, 'Factuality', Factuality_score)
74
+ df.insert(2, 'Faithfulness', Faithfulness_score)
75
+ df = df.round(decimals=2)
76
+
77
  return raw_data, df
78
 
79