ffreemt commited on
Commit
4180709
1 Parent(s): 6f9f106
Files changed (1) hide show
  1. app.py +13 -19
app.py CHANGED
@@ -29,7 +29,7 @@ from langchain.schema import LLMResult
29
  from loguru import logger
30
 
31
  deq = deque()
32
- sig_end = object() # signals the processing is done
33
 
34
  # from langchain.llms import OpenAI
35
 
@@ -206,7 +206,7 @@ config = Config()
206
  # Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
207
  config.stream = True
208
  config.stop = stop
209
- config.threads=cpu_count
210
 
211
  deqcb = DequeCallbackHandler(deq)
212
 
@@ -221,14 +221,13 @@ LLM = CTransformers(
221
 
222
  logger.info(f"done load llm {model_loc=} {file_size=}G")
223
 
224
-
225
  prompt = PromptTemplate(
226
- input_variables=['history', 'input'],
227
  output_parser=None,
228
  partial_variables={},
229
  template=prompt_template,
230
- template_format='f-string',
231
- validate_template=True
232
  )
233
 
234
  memory = ConversationBufferWindowMemory(
@@ -248,7 +247,7 @@ logger.debug(f"{conversation.prompt.template=}") # type: ignore
248
  config = Config()
249
  # Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
250
  config.stop = stop
251
- config.threads=cpu_count
252
 
253
  try:
254
  LLM_api = CTransformers(
@@ -332,7 +331,7 @@ def bot(history):
332
  f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
333
  )
334
 
335
- history[-1][1] = "".join(response) + f"\n{_}"
336
  yield history
337
 
338
 
@@ -373,8 +372,8 @@ css = """
373
  """
374
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
375
  examples_list = [
376
- ["Hello I am mike."],
377
- ["What's my name?"],
378
  ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
379
  [
380
  "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
@@ -382,7 +381,9 @@ examples_list = [
382
  ["When was Justin Bieber born?"],
383
  ["What NFL team won the Super Bowl in 1994?"],
384
  ["How to pick a lock? Provide detailed steps."],
385
- ["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
 
 
386
  ["is infinity + 1 bigger than infinity?"],
387
  ["Explain the plot of Cinderella in a sentence."],
388
  [
@@ -429,7 +430,7 @@ with gr.Blocks(
429
  gr.Markdown(
430
  f"""<h5><center>{Path(model_loc).name}</center></h4>
431
  The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
432
- It typically takes around 120 seconds for the first response to appear.
433
 
434
  Most examples are meant for another model.
435
  You probably should try to test
@@ -437,11 +438,8 @@ with gr.Blocks(
437
  elem_classes="xsmall",
438
  )
439
 
440
- # chatbot = gr.Chatbot().style(height=700) # 500
441
  chatbot = gr.Chatbot(height=500)
442
 
443
- # buff = gr.Textbox(show_label=False, visible=True)
444
-
445
  with gr.Row():
446
  with gr.Column(scale=5):
447
  msg = gr.Textbox(
@@ -482,7 +480,6 @@ with gr.Blocks(
482
  examples_per_page=40,
483
  )
484
 
485
- # with gr.Row():
486
  with gr.Accordion("Disclaimer", open=False):
487
  _ = Path(model_loc).name
488
  gr.Markdown(
@@ -536,9 +533,6 @@ with gr.Blocks(
536
  api_name="api",
537
  )
538
 
539
- # block.load(update_buff, [], buff, every=1)
540
- # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
541
-
542
  # concurrency_count=5, max_size=20
543
  # max_size=36, concurrency_count=14
544
  # CPU cpu_count=2 16G, model 7G
 
29
  from loguru import logger
30
 
31
  deq = deque()
32
+ sig_end = object() # signals the processing is done
33
 
34
  # from langchain.llms import OpenAI
35
 
 
206
  # Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
207
  config.stream = True
208
  config.stop = stop
209
+ config.threads = cpu_count
210
 
211
  deqcb = DequeCallbackHandler(deq)
212
 
 
221
 
222
  logger.info(f"done load llm {model_loc=} {file_size=}G")
223
 
 
224
  prompt = PromptTemplate(
225
+ input_variables=["history", "input"],
226
  output_parser=None,
227
  partial_variables={},
228
  template=prompt_template,
229
+ template_format="f-string",
230
+ validate_template=True,
231
  )
232
 
233
  memory = ConversationBufferWindowMemory(
 
247
  config = Config()
248
  # Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
249
  config.stop = stop
250
+ config.threads = cpu_count
251
 
252
  try:
253
  LLM_api = CTransformers(
 
331
  f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
332
  )
333
 
334
+ history[-1][1] = "".join(response) + f"\n{_}"
335
  yield history
336
 
337
 
 
372
  """
373
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
374
  examples_list = [
375
+ ["Hello I am mike."],
376
+ ["What's my name?"],
377
  ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
378
  [
379
  "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
 
381
  ["When was Justin Bieber born?"],
382
  ["What NFL team won the Super Bowl in 1994?"],
383
  ["How to pick a lock? Provide detailed steps."],
384
+ [
385
+ "If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"
386
+ ],
387
  ["is infinity + 1 bigger than infinity?"],
388
  ["Explain the plot of Cinderella in a sentence."],
389
  [
 
430
  gr.Markdown(
431
  f"""<h5><center>{Path(model_loc).name}</center></h4>
432
  The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
433
+ It typically takes about 120 seconds for the first response to appear.
434
 
435
  Most examples are meant for another model.
436
  You probably should try to test
 
438
  elem_classes="xsmall",
439
  )
440
 
 
441
  chatbot = gr.Chatbot(height=500)
442
 
 
 
443
  with gr.Row():
444
  with gr.Column(scale=5):
445
  msg = gr.Textbox(
 
480
  examples_per_page=40,
481
  )
482
 
 
483
  with gr.Accordion("Disclaimer", open=False):
484
  _ = Path(model_loc).name
485
  gr.Markdown(
 
533
  api_name="api",
534
  )
535
 
 
 
 
536
  # concurrency_count=5, max_size=20
537
  # max_size=36, concurrency_count=14
538
  # CPU cpu_count=2 16G, model 7G