Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
•
61dfec7
1
Parent(s):
2327177
Update filename_list
Browse files
.flake8-
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
[flake8]
|
2 |
-
ignore =
|
3 |
-
# E203 whitespace before ':'
|
4 |
-
E203
|
5 |
-
D203
|
6 |
-
# line too long
|
7 |
-
E501
|
8 |
-
per-file-ignores =
|
9 |
-
# imported but unused
|
10 |
-
# __init__.py: F401
|
11 |
-
test_*.py: F401
|
12 |
-
exclude =
|
13 |
-
.git
|
14 |
-
__pycache__
|
15 |
-
docs/source/conf.py
|
16 |
-
old
|
17 |
-
build
|
18 |
-
dist
|
19 |
-
.venv
|
20 |
-
pad*.py app-.py
|
21 |
-
max-complexity = 25
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -17,8 +17,12 @@ from ctransformers import AutoModelForCausalLM
|
|
17 |
from huggingface_hub import hf_hub_download
|
18 |
from loguru import logger
|
19 |
|
20 |
-
|
|
|
|
|
21 |
MODEL_FILENAME = Path(URL).name
|
|
|
|
|
22 |
REPO_ID = "/".join(urlparse(URL).path.strip('/').split('/')[:2]) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
|
23 |
|
24 |
DESTINATION_FOLDER = "models"
|
@@ -212,7 +216,7 @@ class GenerationConfig:
|
|
212 |
|
213 |
def format_prompt(system_prompt: str, user_prompt: str):
|
214 |
"""Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
|
215 |
-
# TODO
|
216 |
|
217 |
system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
|
218 |
user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
|
@@ -273,7 +277,7 @@ LLM = AutoModelForCausalLM.from_pretrained(
|
|
273 |
# "TheBloke/WizardCoder-15B-1.0-GGML",
|
274 |
REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
|
275 |
model_file=_,
|
276 |
-
model_type="llama", # "starcoder", AutoConfig.from_pretrained(
|
277 |
threads=cpu_count,
|
278 |
)
|
279 |
|
@@ -347,22 +351,18 @@ with gr.Blocks(
|
|
347 |
# """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
|
348 |
# )
|
349 |
gr.Markdown(
|
350 |
-
f"""<
|
351 |
-
|
352 |
-
message to appear. Average streaming rate ~1 sec/chat. The bot only speaks English.
|
353 |
|
354 |
-
Most examples are meant for another model.
|
|
|
355 |
some related prompts.
|
356 |
-
|
357 |
-
Try to refresh the browser and try again when occasionally errors occur.
|
358 |
-
|
359 |
-
It takes about >100 seconds to get a response. Restarting the space takes about 2 minutes if the space is asleep due to inactivity. If the space crashes for some reason, it will also take about 2 minutes to restart. You need to refresh the browser to reload the new space.
|
360 |
""",
|
361 |
elem_classes="xsmall",
|
362 |
)
|
363 |
|
364 |
# chatbot = gr.Chatbot().style(height=700) # 500
|
365 |
-
chatbot = gr.Chatbot(height=
|
366 |
buff = gr.Textbox(show_label=False, visible=False)
|
367 |
with gr.Row():
|
368 |
with gr.Column(scale=5):
|
@@ -371,7 +371,7 @@ with gr.Blocks(
|
|
371 |
placeholder="Ask me anything (press Enter or click Submit to send)",
|
372 |
show_label=False,
|
373 |
).style(container=False)
|
374 |
-
with gr.Column(scale=1, min_width=
|
375 |
with gr.Row():
|
376 |
submit = gr.Button("Submit", elem_classes="xsmall")
|
377 |
stop = gr.Button("Stop", visible=False)
|
|
|
17 |
from huggingface_hub import hf_hub_download
|
18 |
from loguru import logger
|
19 |
|
20 |
+
filename_list = ["Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin"]
|
21 |
+
|
22 |
+
URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin" # 4.05G
|
23 |
MODEL_FILENAME = Path(URL).name
|
24 |
+
MODEL_FILENAME = filename_list[0]
|
25 |
+
|
26 |
REPO_ID = "/".join(urlparse(URL).path.strip('/').split('/')[:2]) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
|
27 |
|
28 |
DESTINATION_FOLDER = "models"
|
|
|
216 |
|
217 |
def format_prompt(system_prompt: str, user_prompt: str):
|
218 |
"""Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
|
219 |
+
# TODO: fix prompts
|
220 |
|
221 |
system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
|
222 |
user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
|
|
|
277 |
# "TheBloke/WizardCoder-15B-1.0-GGML",
|
278 |
REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
|
279 |
model_file=_,
|
280 |
+
model_type="llama", # "starcoder", AutoConfig.from_pretrained(REPO_ID)
|
281 |
threads=cpu_count,
|
282 |
)
|
283 |
|
|
|
351 |
# """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
|
352 |
# )
|
353 |
gr.Markdown(
|
354 |
+
f"""<h5><center><{REPO_ID}>{MODEL_FILENAME}</center></h4>
|
355 |
+
The bot only speaks English.
|
|
|
356 |
|
357 |
+
Most examples are meant for another model.
|
358 |
+
You probably should try to test
|
359 |
some related prompts.
|
|
|
|
|
|
|
|
|
360 |
""",
|
361 |
elem_classes="xsmall",
|
362 |
)
|
363 |
|
364 |
# chatbot = gr.Chatbot().style(height=700) # 500
|
365 |
+
chatbot = gr.Chatbot(height=500)
|
366 |
buff = gr.Textbox(show_label=False, visible=False)
|
367 |
with gr.Row():
|
368 |
with gr.Column(scale=5):
|
|
|
371 |
placeholder="Ask me anything (press Enter or click Submit to send)",
|
372 |
show_label=False,
|
373 |
).style(container=False)
|
374 |
+
with gr.Column(scale=1, min_width=50):
|
375 |
with gr.Row():
|
376 |
submit = gr.Button("Submit", elem_classes="xsmall")
|
377 |
stop = gr.Button("Stop", visible=False)
|