Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
ab343b8
1
Parent(s):
21d3b25
Update predict_api
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: mpt
|
3 |
emoji: 🔥
|
4 |
colorFrom: purple
|
5 |
colorTo: red
|
@@ -8,6 +8,6 @@ sdk_version: 3.35.2
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
-
NB: Need a CPU
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: mpt-30b-ggml-chat
|
3 |
emoji: 🔥
|
4 |
colorFrom: purple
|
5 |
colorTo: red
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
+
NB: Need a CPU UPGRADE (32GB RAM) instance to run on a huggingface space or 19GB+ disk, 22GB+ RAM at a minimum
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -31,6 +31,7 @@ def predict0(prompt, bot):
|
|
31 |
print(assistant_prefix, end=" ", flush=True)
|
32 |
|
33 |
response = ""
|
|
|
34 |
for word in generator:
|
35 |
print(word, end="", flush=True)
|
36 |
response += word
|
@@ -46,6 +47,30 @@ def predict0(prompt, bot):
|
|
46 |
|
47 |
return prompt, bot
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
|
51 |
local_path = os.path.abspath(destination_folder)
|
@@ -445,6 +470,18 @@ with gr.Blocks(
|
|
445 |
# AttributeError: 'Blocks' object has no attribute 'run_forever'
|
446 |
# block.run_forever(lambda: ns.response, None, [buff], every=1)
|
447 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
# concurrency_count=5, max_size=20
|
449 |
# max_size=36, concurrency_count=14
|
450 |
block.queue(concurrency_count=5, max_size=20).launch(debug=True)
|
|
|
31 |
print(assistant_prefix, end=" ", flush=True)
|
32 |
|
33 |
response = ""
|
34 |
+
buff.update(value="diggin...")
|
35 |
for word in generator:
|
36 |
print(word, end="", flush=True)
|
37 |
response += word
|
|
|
47 |
|
48 |
return prompt, bot
|
49 |
|
50 |
+
def predict_api(prompt):
|
51 |
+
logger.debug(f"{prompt=}")
|
52 |
+
ns.response = ""
|
53 |
+
try:
|
54 |
+
user_prompt = prompt
|
55 |
+
generator = generate(llm, generation_config, system_prompt, user_prompt.strip())
|
56 |
+
print(assistant_prefix, end=" ", flush=True)
|
57 |
+
|
58 |
+
response = ""
|
59 |
+
buff.update(value="diggin...")
|
60 |
+
for word in generator:
|
61 |
+
print(word, end="", flush=True)
|
62 |
+
response += word
|
63 |
+
ns.response = response
|
64 |
+
buff.update(value=response)
|
65 |
+
print("")
|
66 |
+
logger.debug(f"{response=}")
|
67 |
+
except Exception as exc:
|
68 |
+
logger.error(exc)
|
69 |
+
response = f"{exc=}"
|
70 |
+
# bot = {"inputs": [response]}
|
71 |
+
# bot = [(prompt, response)]
|
72 |
+
|
73 |
+
return response
|
74 |
|
75 |
def download_mpt_quant(destination_folder: str, repo_id: str, model_filename: str):
|
76 |
local_path = os.path.abspath(destination_folder)
|
|
|
470 |
# AttributeError: 'Blocks' object has no attribute 'run_forever'
|
471 |
# block.run_forever(lambda: ns.response, None, [buff], every=1)
|
472 |
|
473 |
+
with gr.Accordion("For Chat/Translation API", open=False, visible=False):
|
474 |
+
input_text = gr.Text()
|
475 |
+
api_btn = gr.Button("Go", variant="primary")
|
476 |
+
out_text = gr.Text()
|
477 |
+
api_btn.click(
|
478 |
+
predict_api,
|
479 |
+
input_text,
|
480 |
+
out_text,
|
481 |
+
# show_progress="full",
|
482 |
+
api_name="api",
|
483 |
+
)
|
484 |
+
|
485 |
# concurrency_count=5, max_size=20
|
486 |
# max_size=36, concurrency_count=14
|
487 |
block.queue(concurrency_count=5, max_size=20).launch(debug=True)
|