Spaces:

mikeee
/

mpt-30b-chat

Runtime error

App Files Files Community

ffreemt commited on Jul 2, 2023

Commit

ab343b8

1 Parent(s): 21d3b25

Update predict_api

Browse files

Files changed (2) hide show

README.md +2 -2
app.py +37 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: mpt 30b chat
 emoji: 🔥
 colorFrom: purple
 colorTo: red
@@ -8,6 +8,6 @@ sdk_version: 3.35.2
 app_file: app.py
 pinned: false
 ---
-NB: Need a CPU Upgrade (32GB RAM) instance to run on a huggingface space
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: mpt-30b-ggml-chat
 emoji: 🔥
 colorFrom: purple
 colorTo: red
 app_file: app.py
 pinned: false
 ---
+NB: Need a CPU UPGRADE (32GB RAM) instance to run on a huggingface space or 19GB+ disk, 22GB+ RAM at a minimum
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -31,6 +31,7 @@ def predict0(prompt, bot):
         print(assistant_prefix, end=" ", flush=True)
         response = ""
         for word in generator:
             print(word, end="", flush=True)
             response += word
@@ -46,6 +47,30 @@ def predict0(prompt, bot):
     return prompt, bot
 def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
     local_path = os.path.abspath(destination_folder)
@@ -445,6 +470,18 @@ with gr.Blocks(
     # AttributeError: 'Blocks' object has no attribute 'run_forever'
     # block.run_forever(lambda: ns.response, None, [buff], every=1)
 # concurrency_count=5, max_size=20
 # max_size=36, concurrency_count=14
 block.queue(concurrency_count=5, max_size=20).launch(debug=True)

         print(assistant_prefix, end=" ", flush=True)
         response = ""
+        buff.update(value="diggin...")
         for word in generator:
             print(word, end="", flush=True)
             response += word
     return prompt, bot
+def predict_api(prompt):
+    logger.debug(f"{prompt=}")
+    ns.response = ""
+    try:
+        user_prompt = prompt
+        generator = generate(llm, generation_config, system_prompt, user_prompt.strip())
+        print(assistant_prefix, end=" ", flush=True)
+        response = ""
+        buff.update(value="diggin...")
+        for word in generator:
+            print(word, end="", flush=True)
+            response += word
+            ns.response = response
+            buff.update(value=response)
+        print("")
+        logger.debug(f"{response=}")
+    except Exception as exc:
+        logger.error(exc)
+        response = f"{exc=}"
+    # bot = {"inputs": [response]}
+    # bot = [(prompt, response)]
+    return response
 def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
     local_path = os.path.abspath(destination_folder)
     # AttributeError: 'Blocks' object has no attribute 'run_forever'
     # block.run_forever(lambda: ns.response, None, [buff], every=1)
+    with gr.Accordion("For Chat/Translation API", open=False, visible=False):
+        input_text = gr.Text()
+        api_btn = gr.Button("Go", variant="primary")
+        out_text = gr.Text()
+    api_btn.click(
+        predict_api,
+        input_text,
+        out_text,
+        # show_progress="full",
+        api_name="api",
+    )
 # concurrency_count=5, max_size=20
 # max_size=36, concurrency_count=14
 block.queue(concurrency_count=5, max_size=20).launch(debug=True)