llama2-7b-chat-uncensored-test

Runtime error

App Files Files Community

Dalleon commited on Sep 13, 2023

Commit

76a48e1

•

1 Parent(s): d3b6503

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -99

app.py CHANGED Viewed

@@ -57,55 +57,6 @@ if _:
     url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin"  # 2.87G
     url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin"  # 4.08G
-prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
-### Instruction: {user_prompt}
-### Response:
-"""
-prompt_template = """System: You are a helpful,
-respectful and honest assistant. Always answer as
-helpfully as possible, while being safe.  Your answers
-should not include any harmful, unethical, racist,
-sexist, toxic, dangerous, or illegal content. Please
-ensure that your responses are socially unbiased and
-positive in nature. If a question does not make any
-sense, or is not factually coherent, explain why instead
-of answering something not correct. If you don't know
-the answer to a question, please don't share false
-information.
-User: {prompt}
-Assistant: """
-prompt_template = """System: You are a helpful assistant.
-User: {prompt}
-Assistant: """
-prompt_template = """Question: {question}
-Answer: Let's work this out in a step by step way to be sure we have the right answer."""
-prompt_template = """[INST] <>
-You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
-<>
-What NFL team won the Super Bowl in the year Justin Bieber was born?
-[/INST]"""
-prompt_template = """[INST] <<SYS>>
-You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
-{question} [/INST]
-"""
-prompt_template = """[INST] <<SYS>>
-You are a helpful assistant.
-<</SYS>>
-{question} [/INST]
-"""
 prompt_template = """### HUMAN:
 {question}
@@ -174,7 +125,8 @@ def generate(
     # print(_)
     prompt = prompt_template.format(question=question)
     return llm(
         prompt,
         **asdict(config),
@@ -195,21 +147,6 @@ def user1(user_message, history):
     history.append([user_message, None])
     return "", history  # clear user_message
-def bot_(history):
-    user_message = history[-1][0]
-    resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
-    bot_message = user_message + ": " + resp
-    history[-1][1] = ""
-    for character in bot_message:
-        history[-1][1] += character
-        time.sleep(0.02)
-        yield history
-    history[-1][1] = resp
-    yield history
 def bot(history):
     user_message = history[-1][0]
     response = []
@@ -278,32 +215,12 @@ def predict_api(prompt):
     return response
-css = """
-    .importantButton {
-        background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
-        border: none !important;
-    }
-    .importantButton:hover {
-        background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
-        border: none !important;
-    }
-    .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
-    .xsmall {font-size: x-small;}
-"""
-etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
 logger.info("start block")
 with gr.Blocks(
     title=f"{Path(model_loc).name}",
-    theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
-#    css=css,
 ) as block:
-    # chatbot = gr.Chatbot().style(height=700)  # 500
     chatbot = gr.Chatbot(height=500)
-    # buff = gr.Textbox(show_label=False, visible=True)
     with gr.Row():
         with gr.Column(scale=5):
             msg = gr.Textbox(
@@ -379,25 +296,11 @@ with gr.Blocks(
         api_name="api",
     )
-    # block.load(update_buff, [], buff, every=1)
-    # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
 # concurrency_count=5, max_size=20
 # max_size=36, concurrency_count=14
 # CPU cpu_count=2 16G, model 7G
 # CPU UPGRADE cpu_count=8 32G, model 7G
-# does not work
-_ = """
-# _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
-# concurrency_count = max(_, 1)
-if psutil.cpu_count(logical=False) >= 8:
-    # concurrency_count = max(int(32 / file_size) - 1, 1)
-else:
-    # concurrency_count = max(int(16 / file_size) - 1, 1)
-# """
 concurrency_count = 1
 logger.info(f"{concurrency_count=}")
 block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)

     url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin"  # 2.87G
     url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin"  # 4.08G
 prompt_template = """### HUMAN:
 {question}
     # print(_)
     prompt = prompt_template.format(question=question)
+    print("prompt: " ,prompt)
     return llm(
         prompt,
         **asdict(config),
     history.append([user_message, None])
     return "", history  # clear user_message
 def bot(history):
     user_message = history[-1][0]
     response = []
     return response
 logger.info("start block")
 with gr.Blocks(
     title=f"{Path(model_loc).name}",
 ) as block:
     chatbot = gr.Chatbot(height=500)
     with gr.Row():
         with gr.Column(scale=5):
             msg = gr.Textbox(
         api_name="api",
     )
 # concurrency_count=5, max_size=20
 # max_size=36, concurrency_count=14
 # CPU cpu_count=2 16G, model 7G
 # CPU UPGRADE cpu_count=8 32G, model 7G
 concurrency_count = 1
 logger.info(f"{concurrency_count=}")
 block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)