SeaLLM-Chat

Running on Zero

App Files Files Community

nxphi47 commited on Oct 26, 2023

Commit

6355b7b

•

1 Parent(s): b829cc7

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -48

app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 # Description:
 """
-VLLM-based demo script to launch Language chat model for South East Asian Languages
 """
@@ -29,12 +29,16 @@ from huggingface_hub import snapshot_download
 DEBUG = bool(int(os.environ.get("DEBUG", "1")))
 BLOCK_ZH = bool(int(os.environ.get("BLOCK_ZH", "1")))
 TENSOR_PARALLEL = int(os.environ.get("TENSOR_PARALLEL", "1"))
 DTYPE = os.environ.get("DTYPE", "bfloat16")
 # ! (no debug) whether to download HF_MODEL_NAME and save to MODEL_PATH
 DOWNLOAD_SNAPSHOT = bool(int(os.environ.get("DOWNLOAD_SNAPSHOT", "0")))
 LOG_RESPONSE = bool(int(os.environ.get("LOG_RESPONSE", "0")))
 # ! uploaded model path, will be downloaded to MODEL_PATH
 HF_MODEL_NAME = os.environ.get("HF_MODEL_NAME", "DAMO-NLP-SG/seal-13b-chat-a")
@@ -80,7 +84,6 @@ MODEL_PATH=./seal-13b-chat-a
 """
 # ==============================
 print(f'DEBUG mode: {DEBUG}')
 print(f'Torch version: {torch.__version__}')
@@ -113,9 +116,10 @@ EOS_TOKEN = '</s>'
 B_INST, E_INST = "[INST]", "[/INST]"
 B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
-SYSTEM_PROMPT_1 = """You are a multilingual, helpful, respectful and honest assistant. Your name is SeaL and you are built by DAMO Academy, Alibaba Group. Always answer as helpfully as possible, while being safe. Your \
-answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure\
- that your responses are socially unbiased and positive in nature.
 If a question does not make any sense, or is not factually coherent, explain why instead of answering something not \
 correct. If you don't know the answer to a question, please don't share false information.
@@ -127,8 +131,8 @@ Your response should adapt to the norms and customs of the respective language a
 # ============ CONSTANT ============
 # https://github.com/gradio-app/gradio/issues/884
 MODEL_NAME = "SeaLLM-13B"
-MODEL_TITLE = "SeaLLM-13B - An Assistant for South East Asian Languages"
-# ! add icon: "<img  src='file/lion.jpg' alt='image One'>"
 MODEL_TITLE = """
 <div class="container" style="
     align-items: center;
@@ -150,13 +154,13 @@ MODEL_TITLE = """
             padding-top: 2%;
             float: left;
         ">
-      <h1>SeaLLM-13B - An Assistant for South East Asian Languages</h1>
       </div>
 </div>
 """
 MODEL_DESC = """
 <span style="font-size: larger">
-This is SeaLLM-13B - a chatbot assistant optimized for South East Asian Languages. It can produce helpful responses in English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩 and Thai 🇹🇭.
 </span>
 <br>
 <span style="color: red">NOTICE: The chatbot may produce inaccurate and harmful information about people, places, or facts. \
@@ -171,19 +175,12 @@ If you find our project useful, hope you can star our repo and cite our paper as
 ```
 @article{damonlpsg2023seallm,
   author = {???},
-  title = {SeaLLM: A language model for South East Asian Languages},
   year = 2023,
 }
 ```
 """
-# warning_markdown = """
-# ## Warning:
-# <span style="color: red">The chatbot may produce inaccurate and harmful information about people, places, or facts.</span>
-# <span style="color: red">We strongly advise against misuse of the chatbot to knowingly generate harmful or unethical content, \
-# or content that violates locally applicable and international laws or regulations, including hate speech, violence, pornography, deception, etc!</span>
-# """
 path_markdown = """
 #### Model path:
 {model_path}
@@ -191,12 +188,12 @@ path_markdown = """
 def _detect_lang(text):
     from langdetect import detect as detect_lang
     dlang = None
     try:
         dlang = detect_lang(text)
     except Exception as e:
-        # No features in text.
         print(f'Error: {e}')
         if "No features in text." in str(e):
             return "en"
@@ -491,7 +488,7 @@ def new_llama_load_weights(
     load_format: str = "auto",
     revision: Optional[str] = None
 ):
-    # If use newest vllm
     from vllm.model_executor.weight_utils import (
         load_tensor_parallel_weights, hf_model_weights_iterator
     )
@@ -886,24 +883,6 @@ def _setup_events(self) -> None:
 gr.ChatInterface._setup_stop_events = _setup_stop_events
 gr.ChatInterface._setup_events = _setup_events
-def chat_response(message, history, temperature: float, max_tokens: int, system_prompt: str = '') -> str:
-    global llm
-    assert llm is not None
-    from vllm import LLM, SamplingParams
-    temperature = float(temperature)
-    max_tokens = int(max_tokens)
-    if system_prompt.strip() != '':
-        # chat version, add system prompt
-        message = llama_chat_sys_input_seq_constructor(
-            message.strip(),
-            sys_prompt=system_prompt
-        )
-    sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)
-    gen = llm.generate(message, sampling_params)
-    out = gen[0].outputs[0].text
-    return f'{out}'
 def vllm_abort(self: Any):
     from vllm.sequence import SequenceStatus
@@ -991,16 +970,19 @@ def vllm_generate_stream(
     yield from _vllm_run_engine(self, use_tqdm)
-BLOCK_MESSAGE = """Sorry, Chinese is not currently supported. Please clear the chat box for a new conversation.
-抱歉，目前不支持中文。 请清除聊天框以进行新对话。"""
-KEYWORD_BLOCK_MESSAGE = "Sorry, I cannot fulfill your request. If you have any unrelated questions, I'll be glad to help."
 def block_zh(
     message: str,
-    history: List[Tuple[str, str]]
 ) -> str:
-    if history is not None and any((BLOCK_MESSAGE in x[1].strip()) for x in history):
         return True
     elif 'zh' in _detect_lang(message):
         print(f'Detect zh: {message}')
@@ -1021,10 +1003,10 @@ def safety_check(text, history=None, ) -> Optional[str]:
     if BLOCK_ZH:
         if history is not None:
             if block_zh(text, history):
-                return BLOCK_MESSAGE
         else:
             if "zh" in _detect_lang(text):
-                return BLOCK_MESSAGE
     if len(KEYWORDS) > 0 and any(x in text.lower() for x in KEYWORDS):
         return KEYWORD_BLOCK_MESSAGE
@@ -1149,9 +1131,12 @@ def launch():
     ckpt_info = "None"
     print(
-        f'Launch config: {model_title=} / {tensor_parallel=} / {dtype=} / {max_tokens} | {BLOCK_ZH=} '
         f'\n| STREAM_YIELD_MULTIPLE={STREAM_YIELD_MULTIPLE} '
         f'\n| STREAM_CHECK_MULTIPLE={STREAM_CHECK_MULTIPLE} '
         f'\n| frequence_penalty={frequence_penalty} '
         f'\n| temperature={temperature} '
         f'\n| hf_model_name={hf_model_name} '
@@ -1159,8 +1144,8 @@ def launch():
         f'\n| DOWNLOAD_SNAPSHOT={DOWNLOAD_SNAPSHOT} '
         f'\n| gpu_memory_utilization={gpu_memory_utilization} '
         f'\n| KEYWORDS={KEYWORDS} '
-        f'\nsys={SYSTEM_PROMPT_1}'
-        f'\ndesc={model_desc}'
     )
     if DEBUG:
@@ -1230,7 +1215,8 @@ def launch():
     with demo:
         # gr.Markdown(warning_markdown)
         gr.Markdown(cite_markdown)
-        gr.Markdown(path_markdown.format(model_path=model_path))
     demo.queue()
     demo.launch(server_port=PORT)
@@ -1243,3 +1229,4 @@ def main():
 if __name__ == "__main__":
     main()

 # Description:
 """
+VLLM-based demo script to launch Language chat model for Southeast Asian Languages
 """
 DEBUG = bool(int(os.environ.get("DEBUG", "1")))
 BLOCK_ZH = bool(int(os.environ.get("BLOCK_ZH", "1")))
+# for lang block, wether to block in history too
+LANG_BLOCK_HISTORY = bool(int(os.environ.get("LANG_BLOCK_HISTORY", "0")))
 TENSOR_PARALLEL = int(os.environ.get("TENSOR_PARALLEL", "1"))
 DTYPE = os.environ.get("DTYPE", "bfloat16")
 # ! (no debug) whether to download HF_MODEL_NAME and save to MODEL_PATH
 DOWNLOAD_SNAPSHOT = bool(int(os.environ.get("DOWNLOAD_SNAPSHOT", "0")))
 LOG_RESPONSE = bool(int(os.environ.get("LOG_RESPONSE", "0")))
+# ! show model path in the demo page, only for internal
+DISPLAY_MODEL_PATH = bool(int(os.environ.get("DISPLAY_MODEL_PATH", "1")))
 # ! uploaded model path, will be downloaded to MODEL_PATH
 HF_MODEL_NAME = os.environ.get("HF_MODEL_NAME", "DAMO-NLP-SG/seal-13b-chat-a")
 """
 # ==============================
 print(f'DEBUG mode: {DEBUG}')
 print(f'Torch version: {torch.__version__}')
 B_INST, E_INST = "[INST]", "[/INST]"
 B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+SYSTEM_PROMPT_1 = """You are a multilingual, helpful, respectful and honest assistant. Your name is SeaLLM and you are built by DAMO Academy, Alibaba Group. \
+Please always answer as helpfully as possible, while being safe. Your \
+answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure \
+that your responses are socially unbiased and positive in nature.
 If a question does not make any sense, or is not factually coherent, explain why instead of answering something not \
 correct. If you don't know the answer to a question, please don't share false information.
 # ============ CONSTANT ============
 # https://github.com/gradio-app/gradio/issues/884
 MODEL_NAME = "SeaLLM-13B"
+MODEL_TITLE = "SeaLLM-13B - An Assistant for Southeast Asian Languages"
 MODEL_TITLE = """
 <div class="container" style="
     align-items: center;
             padding-top: 2%;
             float: left;
         ">
+      <h1>SeaLLM-13B - An Assistant for Southeast Asian Languages</h1>
       </div>
 </div>
 """
 MODEL_DESC = """
 <span style="font-size: larger">
+This is SeaLLM-13B - a chatbot assistant optimized for Southeast Asian Languages. It can produce helpful responses in English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩 and Thai 🇹🇭.
 </span>
 <br>
 <span style="color: red">NOTICE: The chatbot may produce inaccurate and harmful information about people, places, or facts. \
 ```
 @article{damonlpsg2023seallm,
   author = {???},
+  title = {SeaLLM: A language model for Southeast Asian Languages},
   year = 2023,
 }
 ```
 """
 path_markdown = """
 #### Model path:
 {model_path}
 def _detect_lang(text):
+    # Disable language that may have safety risk
     from langdetect import detect as detect_lang
     dlang = None
     try:
         dlang = detect_lang(text)
     except Exception as e:
         print(f'Error: {e}')
         if "No features in text." in str(e):
             return "en"
     load_format: str = "auto",
     revision: Optional[str] = None
 ):
+    # If use newest vllm, not been thoroughly tested yet.
     from vllm.model_executor.weight_utils import (
         load_tensor_parallel_weights, hf_model_weights_iterator
     )
 gr.ChatInterface._setup_stop_events = _setup_stop_events
 gr.ChatInterface._setup_events = _setup_events
 def vllm_abort(self: Any):
     from vllm.sequence import SequenceStatus
     yield from _vllm_run_engine(self, use_tqdm)
+# ! avoid saying
+LANG_BLOCK_MESSAGE = """Sorry, the language you have asked is currently not supported. If you have questions in other supported languages, I'll be glad to help. \
+Please also consider clearing the chat box for a better experience."""
+KEYWORD_BLOCK_MESSAGE = "Sorry, I cannot fulfill your request. If you have any unrelated question, I'll be glad to help."
 def block_zh(
     message: str,
+    history: List[Tuple[str, str]] = None,
 ) -> str:
+    # relieve history base block
+    if LANG_BLOCK_HISTORY and history is not None and any((LANG_BLOCK_MESSAGE in x[1].strip()) for x in history):
         return True
     elif 'zh' in _detect_lang(message):
         print(f'Detect zh: {message}')
     if BLOCK_ZH:
         if history is not None:
             if block_zh(text, history):
+                return LANG_BLOCK_MESSAGE
         else:
             if "zh" in _detect_lang(text):
+                return LANG_BLOCK_MESSAGE
     if len(KEYWORDS) > 0 and any(x in text.lower() for x in KEYWORDS):
         return KEYWORD_BLOCK_MESSAGE
     ckpt_info = "None"
     print(
+        f'Launch config: {tensor_parallel=} / {dtype=} / {max_tokens} | {BLOCK_ZH=} '
+        f'\n| model_title=`{model_title}` '
         f'\n| STREAM_YIELD_MULTIPLE={STREAM_YIELD_MULTIPLE} '
         f'\n| STREAM_CHECK_MULTIPLE={STREAM_CHECK_MULTIPLE} '
+        f'\n| DISPLAY_MODEL_PATH={DISPLAY_MODEL_PATH} '
+        f'\n| LANG_BLOCK_HISTORY={LANG_BLOCK_HISTORY} '
         f'\n| frequence_penalty={frequence_penalty} '
         f'\n| temperature={temperature} '
         f'\n| hf_model_name={hf_model_name} '
         f'\n| DOWNLOAD_SNAPSHOT={DOWNLOAD_SNAPSHOT} '
         f'\n| gpu_memory_utilization={gpu_memory_utilization} '
         f'\n| KEYWORDS={KEYWORDS} '
+        f'\n| Sys={SYSTEM_PROMPT_1}'
+        f'\n| Desc={model_desc}'
     )
     if DEBUG:
     with demo:
         # gr.Markdown(warning_markdown)
         gr.Markdown(cite_markdown)
+        if DISPLAY_MODEL_PATH:
+            gr.Markdown(path_markdown.format(model_path=model_path))
     demo.queue()
     demo.launch(server_port=PORT)
 if __name__ == "__main__":
     main()