Spaces:

futurehouse
/

paper-qa

Running

App Files Files Community

whitead commited on Feb 10, 2023

Commit

ad7af6e

1 Parent(s): 1495711

Removed duplicate space

Browse files

Files changed (2) hide show

app.py +26 -21
space.py +0 -102

app.py CHANGED Viewed

@@ -4,39 +4,44 @@ docs = None
 def request_pathname(files):
     return [[file.name, file.name.split('/')[-1]] for file in files]
 def validate_dataset(dataset, openapi):
     docs_ready = dataset.iloc[-1, 0] != ""
     if docs_ready and type(openapi) is str and len(openapi) > 0:
         return "✨Ready✨"
     elif docs_ready:
-        return "Waiting for key..."
     elif type(openapi) is str and len(openapi) > 0:
-        return "Waiting for documents..."
     else:
-        return "Waiting for documents and key..."
 def do_ask(question, button, openapi, dataset, progress=gr.Progress()):
     global docs
     docs_ready = dataset.iloc[-1, 0] != ""
-    if button == "✨Ready✨" and openapi != "" and docs_ready:
-        import os
-        os.environ['OPENAI_API_KEY'] = openapi.strip()
-        import paperqa
-        docs = paperqa.Docs()
-        # dataset is pandas dataframe
-        for _, row in dataset.iterrows():
-            key = None
-            if ',' not in row['citation string']:
-                key = row['citation string']
-            docs.add(row['filepath'], row['citation string'], key=key)
     else:
         return ""
-    if docs is None:
-        return """**Error**: You must build the index first!"""
     progress(0, "Building Index...")
     docs._build_faiss_index()
     progress(0.25, "Querying...")
@@ -59,13 +64,13 @@ with gr.Blocks() as demo:
     ## Instructions
-    1. Enter API Key
     2. Upload your documents and modify citation strings if you want (to look prettier)
     """)
     openai_api_key = gr.Textbox(
         label="OpenAI API Key", placeholder="sk-...", type="password")
     uploaded_files = gr.File(
-        label="Your Documents Upload (PDF or txt)", file_count="multiple")
     dataset = gr.Dataframe(
         headers=["filepath", "citation string"],
         datatype=["str", "str"],
@@ -73,7 +78,7 @@ with gr.Blocks() as demo:
         interactive=True,
         label="Documents and Citations"
     )
-    buildb = gr.Textbox("Waiting for documents and key...",
                         label="Status", interactive=False, show_label=True)
     openai_api_key.change(validate_dataset, inputs=[
                           dataset, openai_api_key], outputs=[buildb])
@@ -93,5 +98,5 @@ with gr.Blocks() as demo:
     ask.click(fn=do_ask, inputs=[query, buildb,
                                  openai_api_key, dataset], outputs=[answer, context])
-demo.queue(concurrency_count=3)
-demo.launch()

 def request_pathname(files):
+    if files is None:
+        return [[]]
     return [[file.name, file.name.split('/')[-1]] for file in files]
 def validate_dataset(dataset, openapi):
+    global docs
+    print('clearing docs')
+    docs = None  # clear it out if dataset is modified
     docs_ready = dataset.iloc[-1, 0] != ""
     if docs_ready and type(openapi) is str and len(openapi) > 0:
         return "✨Ready✨"
     elif docs_ready:
+        return "⚠️Waiting for key..."
     elif type(openapi) is str and len(openapi) > 0:
+        return "⚠️Waiting for documents..."
     else:
+        return "⚠️Waiting for documents and key..."
 def do_ask(question, button, openapi, dataset, progress=gr.Progress()):
     global docs
     docs_ready = dataset.iloc[-1, 0] != ""
+    if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
+        print('are docs ready?', docs)
+        if docs is None:  # don't want to rebuild index if it's already built
+            import os
+            os.environ['OPENAI_API_KEY'] = openapi.strip()
+            import paperqa
+            docs = paperqa.Docs()
+            # dataset is pandas dataframe
+            for _, row in dataset.iterrows():
+                key = None
+                if ',' not in row['citation string']:
+                    key = row['citation string']
+                docs.add(row['filepath'], row['citation string'], key=key)
     else:
         return ""
     progress(0, "Building Index...")
     docs._build_faiss_index()
     progress(0.25, "Querying...")
     ## Instructions
+    1. Enter API Key ([What is that?](https://openai.com/api/))
     2. Upload your documents and modify citation strings if you want (to look prettier)
     """)
     openai_api_key = gr.Textbox(
         label="OpenAI API Key", placeholder="sk-...", type="password")
     uploaded_files = gr.File(
+        label="Your Documents Upload (PDF or txt)", file_count="multiple", )
     dataset = gr.Dataframe(
         headers=["filepath", "citation string"],
         datatype=["str", "str"],
         interactive=True,
         label="Documents and Citations"
     )
+    buildb = gr.Textbox("⚠️Waiting for documents and key...",
                         label="Status", interactive=False, show_label=True)
     openai_api_key.change(validate_dataset, inputs=[
                           dataset, openai_api_key], outputs=[buildb])
     ask.click(fn=do_ask, inputs=[query, buildb,
                                  openai_api_key, dataset], outputs=[answer, context])
+demo.queue(concurrency_count=20)
+demo.launch(show_error=True)

space.py DELETED Viewed

@@ -1,102 +0,0 @@
-import gradio as gr
-docs = None
-def request_pathname(files):
-    if files is None:
-        return [[]]
-    return [[file.name, file.name.split('/')[-1]] for file in files]
-def validate_dataset(dataset, openapi):
-    global docs
-    print('clearing docs')
-    docs = None  # clear it out if dataset is modified
-    docs_ready = dataset.iloc[-1, 0] != ""
-    if docs_ready and type(openapi) is str and len(openapi) > 0:
-        return "✨Ready✨"
-    elif docs_ready:
-        return "⚠️Waiting for key..."
-    elif type(openapi) is str and len(openapi) > 0:
-        return "⚠️Waiting for documents..."
-    else:
-        return "⚠️Waiting for documents and key..."
-def do_ask(question, button, openapi, dataset, progress=gr.Progress()):
-    global docs
-    docs_ready = dataset.iloc[-1, 0] != ""
-    if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
-        print('are docs ready?', docs)
-        if docs is None:  # don't want to rebuild index if it's already built
-            import os
-            os.environ['OPENAI_API_KEY'] = openapi.strip()
-            import paperqa
-            docs = paperqa.Docs()
-            # dataset is pandas dataframe
-            for _, row in dataset.iterrows():
-                key = None
-                if ',' not in row['citation string']:
-                    key = row['citation string']
-                docs.add(row['filepath'], row['citation string'], key=key)
-    else:
-        return ""
-    progress(0, "Building Index...")
-    docs._build_faiss_index()
-    progress(0.25, "Querying...")
-    result = docs.query(question)
-    progress(1.0, "Done!")
-    return result.formatted_answer, result.context
-with gr.Blocks() as demo:
-    gr.Markdown("""
-    # Document Question and Answer
-    This tool will enable asking questions of your uploaded text or PDF documents.
-    It uses OpenAI's GPT models and thus you must enter your API key below. This
-    tool is under active development and currently uses many tokens - up to 10,000
-    for a single query. That is $0.10-0.20 per query, so please be careful!
-    * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
-    * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
-    ## Instructions
-    1. Enter API Key ([What is that?](https://openai.com/api/))
-    2. Upload your documents and modify citation strings if you want (to look prettier)
-    """)
-    openai_api_key = gr.Textbox(
-        label="OpenAI API Key", placeholder="sk-...", type="password")
-    uploaded_files = gr.File(
-        label="Your Documents Upload (PDF or txt)", file_count="multiple", )
-    dataset = gr.Dataframe(
-        headers=["filepath", "citation string"],
-        datatype=["str", "str"],
-        col_count=(2, "fixed"),
-        interactive=True,
-        label="Documents and Citations"
-    )
-    buildb = gr.Textbox("⚠️Waiting for documents and key...",
-                        label="Status", interactive=False, show_label=True)
-    openai_api_key.change(validate_dataset, inputs=[
-                          dataset, openai_api_key], outputs=[buildb])
-    dataset.change(validate_dataset, inputs=[
-                   dataset, openai_api_key], outputs=[buildb])
-    uploaded_files.change(request_pathname, inputs=[
-                          uploaded_files], outputs=[dataset])
-    query = gr.Textbox(
-        placeholder="Enter your question here...", label="Question")
-    ask = gr.Button("Ask Question")
-    gr.Markdown("## Answer")
-    answer = gr.Markdown(label="Answer")
-    with gr.Accordion("Context", open=False):
-        gr.Markdown(
-            "### Context\n\nThe following context was used to generate the answer:")
-        context = gr.Markdown(label="Context")
-    ask.click(fn=do_ask, inputs=[query, buildb,
-                                 openai_api_key, dataset], outputs=[answer, context])
-demo.queue(concurrency_count=20)
-demo.launch(show_error=True)