Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -57,55 +57,6 @@ if _:
|
|
57 |
url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
|
58 |
url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin" # 4.08G
|
59 |
|
60 |
-
|
61 |
-
prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
62 |
-
|
63 |
-
### Instruction: {user_prompt}
|
64 |
-
|
65 |
-
### Response:
|
66 |
-
"""
|
67 |
-
|
68 |
-
prompt_template = """System: You are a helpful,
|
69 |
-
respectful and honest assistant. Always answer as
|
70 |
-
helpfully as possible, while being safe. Your answers
|
71 |
-
should not include any harmful, unethical, racist,
|
72 |
-
sexist, toxic, dangerous, or illegal content. Please
|
73 |
-
ensure that your responses are socially unbiased and
|
74 |
-
positive in nature. If a question does not make any
|
75 |
-
sense, or is not factually coherent, explain why instead
|
76 |
-
of answering something not correct. If you don't know
|
77 |
-
the answer to a question, please don't share false
|
78 |
-
information.
|
79 |
-
User: {prompt}
|
80 |
-
Assistant: """
|
81 |
-
|
82 |
-
prompt_template = """System: You are a helpful assistant.
|
83 |
-
User: {prompt}
|
84 |
-
Assistant: """
|
85 |
-
|
86 |
-
prompt_template = """Question: {question}
|
87 |
-
Answer: Let's work this out in a step by step way to be sure we have the right answer."""
|
88 |
-
|
89 |
-
prompt_template = """[INST] <>
|
90 |
-
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
|
91 |
-
<>
|
92 |
-
|
93 |
-
What NFL team won the Super Bowl in the year Justin Bieber was born?
|
94 |
-
[/INST]"""
|
95 |
-
|
96 |
-
prompt_template = """[INST] <<SYS>>
|
97 |
-
You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
|
98 |
-
|
99 |
-
{question} [/INST]
|
100 |
-
"""
|
101 |
-
|
102 |
-
prompt_template = """[INST] <<SYS>>
|
103 |
-
You are a helpful assistant.
|
104 |
-
<</SYS>>
|
105 |
-
|
106 |
-
{question} [/INST]
|
107 |
-
"""
|
108 |
-
|
109 |
prompt_template = """### HUMAN:
|
110 |
{question}
|
111 |
|
@@ -174,7 +125,8 @@ def generate(
|
|
174 |
# print(_)
|
175 |
|
176 |
prompt = prompt_template.format(question=question)
|
177 |
-
|
|
|
178 |
return llm(
|
179 |
prompt,
|
180 |
**asdict(config),
|
@@ -195,21 +147,6 @@ def user1(user_message, history):
|
|
195 |
history.append([user_message, None])
|
196 |
return "", history # clear user_message
|
197 |
|
198 |
-
|
199 |
-
def bot_(history):
|
200 |
-
user_message = history[-1][0]
|
201 |
-
resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
|
202 |
-
bot_message = user_message + ": " + resp
|
203 |
-
history[-1][1] = ""
|
204 |
-
for character in bot_message:
|
205 |
-
history[-1][1] += character
|
206 |
-
time.sleep(0.02)
|
207 |
-
yield history
|
208 |
-
|
209 |
-
history[-1][1] = resp
|
210 |
-
yield history
|
211 |
-
|
212 |
-
|
213 |
def bot(history):
|
214 |
user_message = history[-1][0]
|
215 |
response = []
|
@@ -278,32 +215,12 @@ def predict_api(prompt):
|
|
278 |
|
279 |
return response
|
280 |
|
281 |
-
|
282 |
-
css = """
|
283 |
-
.importantButton {
|
284 |
-
background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
|
285 |
-
border: none !important;
|
286 |
-
}
|
287 |
-
.importantButton:hover {
|
288 |
-
background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
|
289 |
-
border: none !important;
|
290 |
-
}
|
291 |
-
.disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
|
292 |
-
.xsmall {font-size: x-small;}
|
293 |
-
"""
|
294 |
-
etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
|
295 |
logger.info("start block")
|
296 |
|
297 |
with gr.Blocks(
|
298 |
title=f"{Path(model_loc).name}",
|
299 |
-
theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
|
300 |
-
# css=css,
|
301 |
) as block:
|
302 |
-
# chatbot = gr.Chatbot().style(height=700) # 500
|
303 |
chatbot = gr.Chatbot(height=500)
|
304 |
-
|
305 |
-
# buff = gr.Textbox(show_label=False, visible=True)
|
306 |
-
|
307 |
with gr.Row():
|
308 |
with gr.Column(scale=5):
|
309 |
msg = gr.Textbox(
|
@@ -379,25 +296,11 @@ with gr.Blocks(
|
|
379 |
api_name="api",
|
380 |
)
|
381 |
|
382 |
-
# block.load(update_buff, [], buff, every=1)
|
383 |
-
# block.load(update_buff, [buff_var], [buff_var, buff], every=1)
|
384 |
-
|
385 |
# concurrency_count=5, max_size=20
|
386 |
# max_size=36, concurrency_count=14
|
387 |
# CPU cpu_count=2 16G, model 7G
|
388 |
# CPU UPGRADE cpu_count=8 32G, model 7G
|
389 |
|
390 |
-
# does not work
|
391 |
-
_ = """
|
392 |
-
# _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
|
393 |
-
# concurrency_count = max(_, 1)
|
394 |
-
if psutil.cpu_count(logical=False) >= 8:
|
395 |
-
# concurrency_count = max(int(32 / file_size) - 1, 1)
|
396 |
-
else:
|
397 |
-
# concurrency_count = max(int(16 / file_size) - 1, 1)
|
398 |
-
# """
|
399 |
-
|
400 |
concurrency_count = 1
|
401 |
logger.info(f"{concurrency_count=}")
|
402 |
-
|
403 |
block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
|
|
|
57 |
url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
|
58 |
url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin" # 4.08G
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
prompt_template = """### HUMAN:
|
61 |
{question}
|
62 |
|
|
|
125 |
# print(_)
|
126 |
|
127 |
prompt = prompt_template.format(question=question)
|
128 |
+
print("prompt: " ,prompt)
|
129 |
+
|
130 |
return llm(
|
131 |
prompt,
|
132 |
**asdict(config),
|
|
|
147 |
history.append([user_message, None])
|
148 |
return "", history # clear user_message
|
149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
def bot(history):
|
151 |
user_message = history[-1][0]
|
152 |
response = []
|
|
|
215 |
|
216 |
return response
|
217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
logger.info("start block")
|
219 |
|
220 |
with gr.Blocks(
|
221 |
title=f"{Path(model_loc).name}",
|
|
|
|
|
222 |
) as block:
|
|
|
223 |
chatbot = gr.Chatbot(height=500)
|
|
|
|
|
|
|
224 |
with gr.Row():
|
225 |
with gr.Column(scale=5):
|
226 |
msg = gr.Textbox(
|
|
|
296 |
api_name="api",
|
297 |
)
|
298 |
|
|
|
|
|
|
|
299 |
# concurrency_count=5, max_size=20
|
300 |
# max_size=36, concurrency_count=14
|
301 |
# CPU cpu_count=2 16G, model 7G
|
302 |
# CPU UPGRADE cpu_count=8 32G, model 7G
|
303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
concurrency_count = 1
|
305 |
logger.info(f"{concurrency_count=}")
|
|
|
306 |
block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
|