Wizard-Vicuna-7B-Uncensored-GGML

Runtime error

App Files Files Community

ffreemt commited on Jul 8, 2023

Commit

61dfec7

•

1 Parent(s): 2327177

Update filename_list

Browse files

Files changed (2) hide show

.flake8- +0 -21
app.py +13 -13

.flake8- DELETED Viewed

@@ -1,21 +0,0 @@
-[flake8]
-ignore =
-  # E203 whitespace before ':'
-  E203
-  D203
-  # line too long
-  E501
-per-file-ignores =
-  # imported but unused
-  # __init__.py: F401
-  test_*.py: F401
-exclude =
-  .git
-  __pycache__
-  docs/source/conf.py
-  old
-  build
-  dist
-  .venv
-  pad*.py app-.py
-max-complexity = 25

app.py CHANGED Viewed

@@ -17,8 +17,12 @@ from ctransformers import AutoModelForCausalLM
 from huggingface_hub import hf_hub_download
 from loguru import logger
-URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
 MODEL_FILENAME = Path(URL).name
 REPO_ID = "/".join(urlparse(URL).path.strip('/').split('/')[:2])  # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
 DESTINATION_FOLDER = "models"
@@ -212,7 +216,7 @@ class GenerationConfig:
 def format_prompt(system_prompt: str, user_prompt: str):
     """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
-    # TODO im_start/im_end possible fix for WizardCoder
     system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
     user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
@@ -273,7 +277,7 @@ LLM = AutoModelForCausalLM.from_pretrained(
     # "TheBloke/WizardCoder-15B-1.0-GGML",
     REPO_ID,  # DESTINATION_FOLDER,  # model_path_or_repo_id: str required
     model_file=_,
-    model_type="llama",   # "starcoder",  AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
     threads=cpu_count,
 )
@@ -347,22 +351,18 @@ with gr.Blocks(
         #     """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
         # )
         gr.Markdown(
-            f"""<h4><center>{REPO_ID} {MODEL_FILENAME}</center></h4>
-            It takes about 100 seconds for the initial reply
-            message to appear. Average streaming rate ~1 sec/chat. The bot only speaks English.
-            Most examples are meant for another model. You probably should try to test
             some related prompts.
-            Try to refresh the browser and try again when occasionally errors occur.
-            It takes about >100 seconds to get a response. Restarting the space takes about 2 minutes if the space is asleep due to inactivity. If the space crashes for some reason, it will also take about 2 minutes to restart. You need to refresh the browser to reload the new space.
             """,
             elem_classes="xsmall",
         )
     # chatbot = gr.Chatbot().style(height=700)  # 500
-    chatbot = gr.Chatbot(height=700)  # 500
     buff = gr.Textbox(show_label=False, visible=False)
     with gr.Row():
         with gr.Column(scale=5):
@@ -371,7 +371,7 @@ with gr.Blocks(
                 placeholder="Ask me anything (press Enter or click Submit to send)",
                 show_label=False,
             ).style(container=False)
-        with gr.Column(scale=1, min_width=80):
             with gr.Row():
                 submit = gr.Button("Submit", elem_classes="xsmall")
                 stop = gr.Button("Stop", visible=False)

 from huggingface_hub import hf_hub_download
 from loguru import logger
+filename_list = ["Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin", "Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin"]
+URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"  # 4.05G
 MODEL_FILENAME = Path(URL).name
+MODEL_FILENAME = filename_list[0]
 REPO_ID = "/".join(urlparse(URL).path.strip('/').split('/')[:2])  # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
 DESTINATION_FOLDER = "models"
 def format_prompt(system_prompt: str, user_prompt: str):
     """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
+    # TODO: fix prompts
     system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
     user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
     # "TheBloke/WizardCoder-15B-1.0-GGML",
     REPO_ID,  # DESTINATION_FOLDER,  # model_path_or_repo_id: str required
     model_file=_,
+    model_type="llama",   # "starcoder",  AutoConfig.from_pretrained(REPO_ID)
     threads=cpu_count,
 )
         #     """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
         # )
         gr.Markdown(
+            f"""<h5><center><{REPO_ID}>{MODEL_FILENAME}</center></h4>
+            The bot only speaks English.
+            Most examples are meant for another model.
+            You probably should try to test
             some related prompts.
             """,
             elem_classes="xsmall",
         )
     # chatbot = gr.Chatbot().style(height=700)  # 500
+    chatbot = gr.Chatbot(height=500)
     buff = gr.Textbox(show_label=False, visible=False)
     with gr.Row():
         with gr.Column(scale=5):
                 placeholder="Ask me anything (press Enter or click Submit to send)",
                 show_label=False,
             ).style(container=False)
+        with gr.Column(scale=1, min_width=50):
             with gr.Row():
                 submit = gr.Button("Submit", elem_classes="xsmall")
                 stop = gr.Button("Stop", visible=False)