Zekun Wu
commited on
Commit
·
4d4a56e
1
Parent(s):
a75576c
add
Browse files- util/evaluator.py +7 -7
util/evaluator.py
CHANGED
@@ -34,7 +34,7 @@ class evaluator:
|
|
34 |
evaluation_prompt = f"""You are provided with a user's question and the corresponding explanation generated by
|
35 |
an AI model. Your task is to evaluate the explanation based on the following five principles. Each principle
|
36 |
should be scored on a scale from 0 to 1, where 0 indicates that the principle is not met at all,
|
37 |
-
and 1 indicates that the principle is fully satisfied. Additionally, provide a brief explanation for each score to justify your rating.
|
38 |
|
39 |
Question:
|
40 |
{question}
|
@@ -69,23 +69,23 @@ class evaluator:
|
|
69 |
Example JSON format:
|
70 |
{{
|
71 |
"Factually Correct": {{
|
72 |
-
"Justification": "
|
73 |
"Score": 9
|
74 |
}},
|
75 |
"Useful": {{
|
76 |
-
"Justification": "
|
77 |
"Score": 8.5
|
78 |
}},
|
79 |
"Context Specific": {{
|
80 |
-
"Justification": "
|
81 |
"Score": 8
|
82 |
}},
|
83 |
"User Specific": {{
|
84 |
-
"Justification": "
|
85 |
"Score": 7.5
|
86 |
}},
|
87 |
"Provides Pluralism": {{
|
88 |
-
"Justification": "
|
89 |
"Score": 7
|
90 |
}}
|
91 |
}}
|
@@ -225,7 +225,7 @@ def write_evaluation_commentary(scores):
|
|
225 |
comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
|
226 |
|
227 |
evaluation_details.append(
|
228 |
-
{'Principle': principle, 'Score': score, '
|
229 |
|
230 |
return evaluation_details
|
231 |
# def write_evaluation_commentary(scores):
|
|
|
34 |
evaluation_prompt = f"""You are provided with a user's question and the corresponding explanation generated by
|
35 |
an AI model. Your task is to evaluate the explanation based on the following five principles. Each principle
|
36 |
should be scored on a scale from 0 to 1, where 0 indicates that the principle is not met at all,
|
37 |
+
and 1 indicates that the principle is fully satisfied. Additionally, provide a brief ten words explanation for each score to justify your rating.
|
38 |
|
39 |
Question:
|
40 |
{question}
|
|
|
69 |
Example JSON format:
|
70 |
{{
|
71 |
"Factually Correct": {{
|
72 |
+
"Justification": "xxx",
|
73 |
"Score": 9
|
74 |
}},
|
75 |
"Useful": {{
|
76 |
+
"Justification": "xxx",
|
77 |
"Score": 8.5
|
78 |
}},
|
79 |
"Context Specific": {{
|
80 |
+
"Justification": "xxx",
|
81 |
"Score": 8
|
82 |
}},
|
83 |
"User Specific": {{
|
84 |
+
"Justification": "xxx",
|
85 |
"Score": 7.5
|
86 |
}},
|
87 |
"Provides Pluralism": {{
|
88 |
+
"Justification": "xxx",
|
89 |
"Score": 7
|
90 |
}}
|
91 |
}}
|
|
|
225 |
comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
|
226 |
|
227 |
evaluation_details.append(
|
228 |
+
{'Principle': principle, 'Score': score, 'Justification': justification,'Commentary': comment})
|
229 |
|
230 |
return evaluation_details
|
231 |
# def write_evaluation_commentary(scores):
|