NGUYEN, Xuan Phi commited on
Commit
5622434
1 Parent(s): c14f353
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -470,7 +470,7 @@ def chat_response(message, history, temperature: float, max_tokens: int, system_
470
  return f'{out}'
471
 
472
 
473
- def vllm_abort(self: LLM):
474
  scheduler = self.llm_engine.scheduler
475
  for state_queue in [scheduler.waiting, scheduler.running, scheduler.swapped]:
476
  for seq_group in state_queue:
@@ -482,7 +482,8 @@ def vllm_abort(self: LLM):
482
  continue
483
  scheduler.free_seq(seq, SequenceStatus.FINISHED_ABORTED)
484
 
485
- def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutput]:
 
486
  # Initialize tqdm.
487
  if use_tqdm:
488
  num_requests = self.llm_engine.get_num_unfinished_requests()
@@ -512,10 +513,10 @@ def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutp
512
  def vllm_generate_stream(
513
  self: LLM,
514
  prompts: Optional[Union[str, List[str]]] = None,
515
- sampling_params: Optional[SamplingParams] = None,
516
  prompt_token_ids: Optional[List[List[int]]] = None,
517
  use_tqdm: bool = False,
518
- ) -> Dict[str, RequestOutput]:
519
  """Generates the completions for the input prompts.
520
 
521
  NOTE: This class automatically batches the given prompts, considering
@@ -661,7 +662,7 @@ def debug_chat_response_echo(
661
  frequency_penalty: float = 0.4,
662
  system_prompt: str = SYSTEM_PROMPT_1,
663
  ) -> str:
664
- yield message
665
 
666
 
667
  # ============ CONSTANT ============
 
470
  return f'{out}'
471
 
472
 
473
+ def vllm_abort(self: Any):
474
  scheduler = self.llm_engine.scheduler
475
  for state_queue in [scheduler.waiting, scheduler.running, scheduler.swapped]:
476
  for seq_group in state_queue:
 
482
  continue
483
  scheduler.free_seq(seq, SequenceStatus.FINISHED_ABORTED)
484
 
485
+ # def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutput]:
486
+ def _vllm_run_engine(self: Any, use_tqdm: bool = False) -> Dict[str, Any]:
487
  # Initialize tqdm.
488
  if use_tqdm:
489
  num_requests = self.llm_engine.get_num_unfinished_requests()
 
513
  def vllm_generate_stream(
514
  self: LLM,
515
  prompts: Optional[Union[str, List[str]]] = None,
516
+ sampling_params: Optional[Any] = None,
517
  prompt_token_ids: Optional[List[List[int]]] = None,
518
  use_tqdm: bool = False,
519
+ ) -> Dict[str, Any]:
520
  """Generates the completions for the input prompts.
521
 
522
  NOTE: This class automatically batches the given prompts, considering
 
662
  frequency_penalty: float = 0.4,
663
  system_prompt: str = SYSTEM_PROMPT_1,
664
  ) -> str:
665
+ yield f"repeat: {message}"
666
 
667
 
668
  # ============ CONSTANT ============