ffreemt commited on
Commit
6f9f106
1 Parent(s): ad353f0

Update from collections import deque

Browse files
Files changed (1) hide show
  1. app.py +46 -30
app.py CHANGED
@@ -6,8 +6,8 @@ import os
6
  import platform
7
  import random
8
  import time
 
9
  from pathlib import Path
10
- from queue import deque
11
  from threading import Thread
12
  from typing import Any, Dict, List, Union
13
 
@@ -134,7 +134,7 @@ You are a helpful assistant. Think step by step.
134
  {input}
135
  ### RESPONSE:"""
136
 
137
- prompt_template = """You are a helpful assistant. Think step by step.
138
  {history}
139
  ### HUMAN:
140
  {input}
@@ -186,7 +186,7 @@ class DequeCallbackHandler(BaseCallbackHandler):
186
  self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
187
  ) -> None:
188
  """Run when LLM errors."""
189
- self.q.put(sig_end)
190
 
191
 
192
  _ = psutil.cpu_count(logical=False) - 1
@@ -203,6 +203,7 @@ except Exception as exc_:
203
  raise SystemExit(1) from exc_
204
 
205
  config = Config()
 
206
  config.stream = True
207
  config.stop = stop
208
  config.threads=cpu_count
@@ -241,7 +242,31 @@ conversation = ConversationChain(
241
  memory=memory,
242
  verbose=True,
243
  )
244
- logger.debug(f"{conversation.prompt.template=}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  # conversation.predict(input="Hello, my name is Andrea")
247
 
@@ -286,6 +311,7 @@ def bot(history):
286
  response = []
287
  flag = 1
288
  then = time.time()
 
289
  with about_time() as atime: # type: ignore
290
  while True:
291
  if deq:
@@ -311,33 +337,18 @@ def bot(history):
311
 
312
 
313
  def predict_api(user_prompt):
314
- logger.debug(f"{user_prompt=}")
 
 
 
315
  try:
316
- # user_prompt = prompt
317
- Config(
318
- temperature=0.2,
319
- top_k=10,
320
- top_p=0.9,
321
- repetition_penalty=1.0,
322
- max_new_tokens=512, # adjust as needed
323
- seed=42,
324
- reset=True, # reset history (cache)
325
- stream=False,
326
- # threads=cpu_count,
327
- # stop=prompt_prefix[1:2],
328
- )
329
  _ = """
330
  response = generate(
331
  prompt,
332
  config=config,
333
  )
334
  # """
335
- conversation1 = ConversationChain(
336
- llm=LLM,
337
- prompt=prompt,
338
- verbose=True,
339
- )
340
- response = conversation1.predict(input=user_prompt)
341
  logger.debug(f"api: {response=}")
342
  except Exception as exc:
343
  logger.error(exc)
@@ -368,6 +379,8 @@ examples_list = [
368
  [
369
  "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
370
  ],
 
 
371
  ["How to pick a lock? Provide detailed steps."],
372
  ["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
373
  ["is infinity + 1 bigger than infinity?"],
@@ -506,6 +519,8 @@ with gr.Blocks(
506
  cancels=[msg_submit_event, submit_click_event],
507
  queue=False,
508
  )
 
 
509
  clear.click(lambda: None, None, chatbot, queue=False)
510
 
511
  with gr.Accordion("For Chat/Translation API", open=False, visible=False):
@@ -513,12 +528,13 @@ with gr.Blocks(
513
  api_btn = gr.Button("Go", variant="primary")
514
  out_text = gr.Text()
515
 
516
- api_btn.click(
517
- predict_api,
518
- input_text,
519
- out_text,
520
- api_name="api",
521
- )
 
522
 
523
  # block.load(update_buff, [], buff, every=1)
524
  # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
 
6
  import platform
7
  import random
8
  import time
9
+ from collections import deque
10
  from pathlib import Path
 
11
  from threading import Thread
12
  from typing import Any, Dict, List, Union
13
 
 
134
  {input}
135
  ### RESPONSE:"""
136
 
137
+ prompt_template = """You are a helpful assistant. Let's think step by step.
138
  {history}
139
  ### HUMAN:
140
  {input}
 
186
  self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
187
  ) -> None:
188
  """Run when LLM errors."""
189
+ self.q.append(sig_end)
190
 
191
 
192
  _ = psutil.cpu_count(logical=False) - 1
 
203
  raise SystemExit(1) from exc_
204
 
205
  config = Config()
206
+ # Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
207
  config.stream = True
208
  config.stop = stop
209
  config.threads=cpu_count
 
242
  memory=memory,
243
  verbose=True,
244
  )
245
+ logger.debug(f"{conversation.prompt.template=}") # type: ignore
246
+
247
+ # for api access ===
248
+ config = Config()
249
+ # Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
250
+ config.stop = stop
251
+ config.threads=cpu_count
252
+
253
+ try:
254
+ LLM_api = CTransformers(
255
+ model=model_loc,
256
+ model_type="llama",
257
+ # callbacks=[StreamingStdOutCallbackHandler(), deqcb],
258
+ callbacks=[StreamingStdOutCallbackHandler()],
259
+ **vars(config),
260
+ )
261
+ conversation_api = ConversationChain(
262
+ llm=LLM_api, # need a separate LLM, or else deq may be messed up
263
+ prompt=prompt,
264
+ verbose=True,
265
+ )
266
+ except Exception as exc_:
267
+ logger.error(exc_)
268
+ conversation_api = None
269
+ logger.warning("Not able to instantiate conversation_api, api will not work")
270
 
271
  # conversation.predict(input="Hello, my name is Andrea")
272
 
 
311
  response = []
312
  flag = 1
313
  then = time.time()
314
+ prefix = "" # to please pyright
315
  with about_time() as atime: # type: ignore
316
  while True:
317
  if deq:
 
337
 
338
 
339
  def predict_api(user_prompt):
340
+ if conversation_api is None:
341
+ return "conversation_api is None, probably due to insufficient memory, api not usable"
342
+
343
+ logger.debug(f"api: {user_prompt=}")
344
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  _ = """
346
  response = generate(
347
  prompt,
348
  config=config,
349
  )
350
  # """
351
+ response = conversation_api.predict(input=user_prompt)
 
 
 
 
 
352
  logger.debug(f"api: {response=}")
353
  except Exception as exc:
354
  logger.error(exc)
 
379
  [
380
  "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
381
  ],
382
+ ["When was Justin Bieber born?"],
383
+ ["What NFL team won the Super Bowl in 1994?"],
384
  ["How to pick a lock? Provide detailed steps."],
385
  ["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
386
  ["is infinity + 1 bigger than infinity?"],
 
519
  cancels=[msg_submit_event, submit_click_event],
520
  queue=False,
521
  )
522
+
523
+ # TODO: clear conversation memory as well
524
  clear.click(lambda: None, None, chatbot, queue=False)
525
 
526
  with gr.Accordion("For Chat/Translation API", open=False, visible=False):
 
528
  api_btn = gr.Button("Go", variant="primary")
529
  out_text = gr.Text()
530
 
531
+ if conversation_api is not None:
532
+ api_btn.click(
533
+ predict_api,
534
+ input_text,
535
+ out_text,
536
+ api_name="api",
537
+ )
538
 
539
  # block.load(update_buff, [], buff, every=1)
540
  # block.load(update_buff, [buff_var], [buff_var, buff], every=1)