Spaces:

holistic-ai
/

explainbility_benchmark

Sleeping

App Files Files Community

Zekun Wu commited on Jun 23, 2024

Commit

4d4a56e

1 Parent(s): a75576c

add

Browse files

Files changed (1) hide show

util/evaluator.py +7 -7

util/evaluator.py CHANGED Viewed

@@ -34,7 +34,7 @@ class evaluator:
         evaluation_prompt = f"""You are provided with a user's question and the corresponding explanation generated by
         an AI model. Your task is to evaluate the explanation based on the following five principles. Each principle
         should be scored on a scale from 0 to 1, where 0 indicates that the principle is not met at all,
-        and 1 indicates that the principle is fully satisfied. Additionally, provide a brief explanation for each score to justify your rating.
         Question:
         {question}
@@ -69,23 +69,23 @@ class evaluator:
         Example JSON format:
         {{
         "Factually Correct": {{
-            "Justification": "The explanation is mostly accurate with only minor inaccuracies.",
             "Score": 9
         }},
         "Useful": {{
-            "Justification": "The explanation is very helpful in understanding the main concept.",
             "Score": 8.5
         }},
         "Context Specific": {{
-            "Justification": "The explanation is generally relevant to the specific context but lacks some detail.",
             "Score": 8
         }},
         "User Specific": {{
-            "Justification": "The explanation is appropriate for the typical user but may be too technical for some.",
             "Score": 7.5
         }},
         "Provides Pluralism": {{
-            "Justification": "The explanation considers multiple perspectives but could include more viewpoints.",
             "Score": 7
         }}
     }}
@@ -225,7 +225,7 @@ def write_evaluation_commentary(scores):
                 comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
         evaluation_details.append(
-            {'Principle': principle, 'Score': score, 'Commentary': comment, 'Justification': justification})
     return evaluation_details
 # def write_evaluation_commentary(scores):

         evaluation_prompt = f"""You are provided with a user's question and the corresponding explanation generated by
         an AI model. Your task is to evaluate the explanation based on the following five principles. Each principle
         should be scored on a scale from 0 to 1, where 0 indicates that the principle is not met at all,
+        and 1 indicates that the principle is fully satisfied. Additionally, provide a brief ten words explanation for each score to justify your rating.
         Question:
         {question}
         Example JSON format:
         {{
         "Factually Correct": {{
+            "Justification": "xxx",
             "Score": 9
         }},
         "Useful": {{
+            "Justification": "xxx",
             "Score": 8.5
         }},
         "Context Specific": {{
+            "Justification": "xxx",
             "Score": 8
         }},
         "User Specific": {{
+            "Justification": "xxx",
             "Score": 7.5
         }},
         "Provides Pluralism": {{
+            "Justification": "xxx",
             "Score": 7
         }}
     }}
                 comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
         evaluation_details.append(
+            {'Principle': principle, 'Score': score, 'Justification': justification,'Commentary': comment})
     return evaluation_details
 # def write_evaluation_commentary(scores):