SeaLLM-Chat

Running on Zero

NGUYEN, Xuan Phi commited on Oct 17, 2023

Commit

5622434

•

1 Parent(s): c14f353

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -470,7 +470,7 @@ def chat_response(message, history, temperature: float, max_tokens: int, system_
     return f'{out}'
-def vllm_abort(self: LLM):
     scheduler = self.llm_engine.scheduler
     for state_queue in [scheduler.waiting, scheduler.running, scheduler.swapped]:
         for seq_group in state_queue:
@@ -482,7 +482,8 @@ def vllm_abort(self: LLM):
                     continue
                 scheduler.free_seq(seq, SequenceStatus.FINISHED_ABORTED)
-def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutput]:
     # Initialize tqdm.
     if use_tqdm:
         num_requests = self.llm_engine.get_num_unfinished_requests()
@@ -512,10 +513,10 @@ def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutp
 def vllm_generate_stream(
     self: LLM,
     prompts: Optional[Union[str, List[str]]] = None,
-    sampling_params: Optional[SamplingParams] = None,
     prompt_token_ids: Optional[List[List[int]]] = None,
     use_tqdm: bool = False,
-) -> Dict[str, RequestOutput]:
     """Generates the completions for the input prompts.
     NOTE: This class automatically batches the given prompts, considering
@@ -661,7 +662,7 @@ def debug_chat_response_echo(
     frequency_penalty: float = 0.4,
     system_prompt: str = SYSTEM_PROMPT_1,
 ) -> str:
-    yield message
 # ============ CONSTANT ============

     return f'{out}'
+def vllm_abort(self: Any):
     scheduler = self.llm_engine.scheduler
     for state_queue in [scheduler.waiting, scheduler.running, scheduler.swapped]:
         for seq_group in state_queue:
                     continue
                 scheduler.free_seq(seq, SequenceStatus.FINISHED_ABORTED)
+# def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutput]:
+def _vllm_run_engine(self: Any, use_tqdm: bool = False) -> Dict[str, Any]:
     # Initialize tqdm.
     if use_tqdm:
         num_requests = self.llm_engine.get_num_unfinished_requests()
 def vllm_generate_stream(
     self: LLM,
     prompts: Optional[Union[str, List[str]]] = None,
+    sampling_params: Optional[Any] = None,
     prompt_token_ids: Optional[List[List[int]]] = None,
     use_tqdm: bool = False,
+) -> Dict[str, Any]:
     """Generates the completions for the input prompts.
     NOTE: This class automatically batches the given prompts, considering
     frequency_penalty: float = 0.4,
     system_prompt: str = SYSTEM_PROMPT_1,
 ) -> str:
+    yield f"repeat: {message}"
 # ============ CONSTANT ============