Spaces:

Ligeng-Zhu
/

GPTEval

Runtime error

App Files Files Community

Ligeng-Zhu commited on Apr 19, 2023

Commit

86514f4

1 Parent(s): 313b015

update

Browse files

Files changed (2) hide show

app.py +13 -29
data +1 -0

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from huggingface_hub import Repository
 import openai
-HF_TOKEN = os.environ.get("TRL_TOKEN", None)
 API_URL = os.environ.get("API_URL")
 theme = gr.themes.Monochrome(
@@ -31,8 +31,8 @@ if HF_TOKEN:
     repo = Repository(
         local_dir="./data/",
-        clone_from="trl-lib/stack-llama-prompts",
-        use_auth_token=HF_TOKEN,
         repo_type="dataset",
     )
     repo.git_pull()
@@ -49,22 +49,7 @@ def save_inputs_and_outputs(inputs, outputs, generate_kwargs):
         )
         f.write("\n")
         commit_url = repo.push_to_hub()
-def generate(
-    instruction,
-    temperature=0.9,
-    max_new_tokens=256,
-    top_p=0.95,
-    repetition_penalty=1.0,
-    do_save=True,
-):
-    output = instruction + str(temperature)
-    s = ""
-    for ch in output:
-        s += ch
-        yield s
-    return s
 example_system_prompt = [
     "You are a helpful and precise assistant for checking the quality of the answer."
 ]
@@ -75,14 +60,7 @@ examples = [
     ["You are a helpful and precise assistant for checking the quality of the answer.", "[Question]\n{question}\n\n[The Start of Assistant's Answer]\n{answer}\n[The End of of Assistant's Answer]\n\nWe would like to request your feedback on the performance of the AI assistant in response to the user question displayed above.\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only the value indicating the scores for the Assistant. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."]
 ]
-def process_example(args):
-    for x in generate(args):
-        pass
-    return x
-def gpt_eval(system_prompt, prompt, question, answer, openai_key):
     if openai_key is None or len(openai_key) <= 10:
         yield "Please enter a valid openai API key"
         return
@@ -108,7 +86,13 @@ def gpt_eval(system_prompt, prompt, question, answer, openai_key):
             # print(content, end="")
             output += content
             yield input_str + output
 css = ".generating {visibility: hidden}"  # + share_btn_css
@@ -175,7 +159,7 @@ with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
                 openai_model = gr.Textbox(
                     value="gpt-3.5-turbo",
-                    label="Model (More opions coming soon) )",
                 )
                 # gr.Examples(

 import openai
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
 API_URL = os.environ.get("API_URL")
 theme = gr.themes.Monochrome(
     repo = Repository(
         local_dir="./data/",
+        clone_from="Ligeng-Zhu/gpt-eval-prompts",
+        token=HF_TOKEN,
         repo_type="dataset",
     )
     repo.git_pull()
         )
         f.write("\n")
         commit_url = repo.push_to_hub()
 example_system_prompt = [
     "You are a helpful and precise assistant for checking the quality of the answer."
 ]
     ["You are a helpful and precise assistant for checking the quality of the answer.", "[Question]\n{question}\n\n[The Start of Assistant's Answer]\n{answer}\n[The End of of Assistant's Answer]\n\nWe would like to request your feedback on the performance of the AI assistant in response to the user question displayed above.\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only the value indicating the scores for the Assistant. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."]
 ]
+def gpt_eval(system_prompt, prompt, question, answer, openai_key, do_save=True):
     if openai_key is None or len(openai_key) <= 10:
         yield "Please enter a valid openai API key"
         return
             # print(content, end="")
             output += content
             yield input_str + output
+    if do_save and HF_TOKEN:
+        save_inputs_and_outputs(
+            inputs=system_prompt + "\n" + origin_input,
+            outputs=output,
+            generate_kwargs={}
+        )
+    return
 css = ".generating {visibility: hidden}"  # + share_btn_css
                 openai_model = gr.Textbox(
                     value="gpt-3.5-turbo",
+                    label="Model (More opions coming soon)",
                 )
                 # gr.Examples(

data ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 7dca8500183db713b51713e8977d00ff9842e516