SeaLLM-Chat

Running on L4

App Files Files Community

nxphi47 commited on Nov 29, 2023

Commit

52d5bca

1 Parent(s): 17d2ea7

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -91

app.py CHANGED Viewed

@@ -6,6 +6,7 @@
 VLLM-based demo script to launch Language chat model for Southeast Asian Languages
 """
 import os
 import numpy as np
 import argparse
@@ -972,53 +973,6 @@ gr.ChatInterface._setup_stop_events = _setup_stop_events
 gr.ChatInterface._setup_events = _setup_events
-@document()
-class CustomTabbedInterface(gr.Blocks):
-    def __init__(
-        self,
-        interface_list: list[gr.Interface],
-        tab_names: Optional[list[str]] = None,
-        title: Optional[str] = None,
-        description: Optional[str] = None,
-        theme: Optional[gr.Theme] = None,
-        analytics_enabled: Optional[bool] = None,
-        css: Optional[str] = None,
-    ):
-        """
-        Parameters:
-            interface_list: a list of interfaces to be rendered in tabs.
-            tab_names: a list of tab names. If None, the tab names will be "Tab 1", "Tab 2", etc.
-            title: a title for the interface; if provided, appears above the input and output components in large font. Also used as the tab title when opened in a browser window.
-            analytics_enabled: whether to allow basic telemetry. If None, will use GRADIO_ANALYTICS_ENABLED environment variable or default to True.
-            css: custom css or path to custom css file to apply to entire Blocks
-        Returns:
-            a Gradio Tabbed Interface for the given interfaces
-        """
-        super().__init__(
-            title=title or "Gradio",
-            theme=theme,
-            analytics_enabled=analytics_enabled,
-            mode="tabbed_interface",
-            css=css,
-        )
-        self.description = description
-        if tab_names is None:
-            tab_names = [f"Tab {i}" for i in range(len(interface_list))]
-        with self:
-            if title:
-                gr.Markdown(
-                    f"<h1 style='text-align: center; margin-bottom: 1rem'>{title}</h1>"
-                )
-            if description:
-                gr.Markdown(description)
-            with gr.Tabs():
-                for interface, tab_name in zip(interface_list, tab_names):
-                    with gr.Tab(label=tab_name):
-                        interface.render()
 def vllm_abort(self: Any):
     sh = self.llm_engine.scheduler
     for g in (sh.waiting + sh.running + sh.swapped):
@@ -1297,7 +1251,7 @@ def format_conversation(history):
 def maybe_upload_to_dataset():
     global LOG_FILE, DATA_SET_REPO_PATH, SAVE_LOGS
-    if SAVE_LOGS and os.path.exists(LOG_PATH) and DATA_SET_REPO_PATH != "":
         with open(LOG_PATH, 'r', encoding='utf-8') as f:
             convos = {}
             for l in f:
@@ -1396,7 +1350,6 @@ def maybe_delete_folder():
             except Exception as e:
                 print('Failed to delete %s. Reason: %s' % (file_path, e))
 AGREE_POP_SCRIPTS = """
 async () => {
     alert("To use our service, you are required to agree to the following terms:\\nYou must not use our service to generate any harmful, unethical or illegal content that violates local and international laws, including but not limited to hate speech, violence and deception.\\nThe service may collect user dialogue data for performance improvement, and reserves the right to distribute it under CC-BY or similar license. So do not enter any personal information!");
@@ -1413,7 +1366,6 @@ def debug_file_function(
         stop_strings: str = "[STOP],<s>,</s>",
         current_time: Optional[float] = None,
 ):
-    """This is only for debug purpose"""
     files = files if isinstance(files, list) else [files]
     print(files)
     filenames = [f.name for f in files]
@@ -1439,9 +1391,7 @@ def debug_file_function(
 def validate_file_item(filename, index, item: Dict[str, str]):
-    """
-    check safety for items in files
-    """
     message = item['prompt'].strip()
     if len(message) == 0:
@@ -1449,7 +1399,7 @@ def validate_file_item(filename, index, item: Dict[str, str]):
     message_safety = safety_check(message, history=None)
     if message_safety is not None:
-        raise gr.Error(f'Prompt {index} invalid: {message_safety}')
     tokenizer = llm.get_tokenizer() if llm is not None else None
     if tokenizer is None or len(tokenizer.encode(message, add_special_tokens=False)) >= BATCH_INFER_MAX_PROMPT_TOKENS:
@@ -1473,33 +1423,25 @@ def read_validate_json_files(files: Union[str, List[str]]):
             validate_file_item(fname, i, x)
         all_items.extend(items)
     if len(all_items) > BATCH_INFER_MAX_ITEMS:
         raise gr.Error(f"Num samples {len(all_items)} > {BATCH_INFER_MAX_ITEMS} allowed.")
-    return all_items, filenames
-def remove_gradio_cache(exclude_names=None):
-    """remove gradio cache to avoid flooding"""
     import shutil
     for root, dirs, files in os.walk('/tmp/gradio/'):
         for f in files:
-            # if not any(f in ef for ef in except_files):
-            if exclude_names is None or not any(ef in f for ef in exclude_names):
-                print(f'Remove: {f}')
-                os.unlink(os.path.join(root, f))
-        # for d in dirs:
-        #     # if not any(d in ef for ef in except_files):
-        #     if exclude_names is None or not any(ef in d for ef in exclude_names):
-        #         print(f'Remove d: {d}')
-        #         shutil.rmtree(os.path.join(root, d))
 def maybe_upload_batch_set(pred_json_path):
     global LOG_FILE, DATA_SET_REPO_PATH, SAVE_LOGS
-    if SAVE_LOGS and DATA_SET_REPO_PATH != "":
         try:
             from huggingface_hub import upload_file
             path_in_repo = "misc/" + os.path.basename(pred_json_path).replace(".json", f'.{time.time()}.json')
@@ -1528,7 +1470,7 @@ def batch_inference(
         system_prompt: Optional[str] = SYSTEM_PROMPT_1
 ):
     """
-    Handle file upload batch inference
     """
     global LOG_FILE, LOG_PATH, DEBUG, llm, RES_PRINTED
@@ -1551,10 +1493,11 @@ def batch_inference(
     frequency_penalty = float(frequency_penalty)
     max_tokens = int(max_tokens)
-    all_items, filenames = read_validate_json_files(files)
     # remove all items in /tmp/gradio/
-    remove_gradio_cache(exclude_names=['upload_chat.json', 'upload_few_shot.json'])
     if prompt_mode == 'chat':
         prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
@@ -1594,6 +1537,7 @@ def batch_inference(
     for res, item in zip(responses, all_items):
         item['response'] = res
     save_path = BATCH_INFER_SAVE_TMP_FILE
     os.makedirs(os.path.dirname(save_path), exist_ok=True)
     with open(save_path, 'w', encoding='utf-8') as f:
@@ -1608,14 +1552,60 @@ def batch_inference(
 # BATCH_INFER_MAX_ITEMS
-FILE_UPLOAD_DESCRIPTION = f"""Upload JSON file as list of dict with < {BATCH_INFER_MAX_ITEMS} items, \
-each item has `prompt` key. We put guardrails to enhance safety, so do not input any harmful content or personal information! Re-upload the file after every submit. See the examples below.
 ```
-[ {{"id": 0, "prompt": "Hello world"}} ,  {{"id": 1, "prompt": "Hi there?"}}]
 ```
 """
 def launch():
     global demo, llm, DEBUG, LOG_FILE
     model_desc = MODEL_DESC
@@ -1713,33 +1703,33 @@ def launch():
     if ENABLE_BATCH_INFER:
-        demo_file_upload = gr.Interface(
             batch_inference,
             inputs=[
                 gr.File(file_count='single', file_types=['json']),
                 gr.Radio(["chat", "few-shot"], value='chat', label="Chat or Few-shot mode", info="Chat's output more user-friendly, Few-shot's output more consistent with few-shot patterns."),
-                gr.Number(value=temperature, label='Temperature', info="Higher -> more random"),
-                gr.Number(value=max_tokens, label='Max tokens', info='Increase if want more generation'),
-                gr.Number(value=frequence_penalty, label='Frequency penalty', info='> 0 encourage new tokens over repeated tokens'),
-                gr.Number(value=presence_penalty, label='Presence penalty', info='> 0 encourage new tokens, < 0 encourage existing tokens'),
-                gr.Textbox(value="[STOP],[END],<s>,</s>", label='Stop strings', info='Comma-separated string to stop generation only in FEW-SHOT mode', lines=1),
                 gr.Number(value=0, label='current_time', visible=False),
             ],
             outputs=[
                 # "file",
                 gr.File(label="Generated file"),
                 # "json"
-                gr.JSON(label='Example outputs (display 2 samples)')
             ],
-            description=FILE_UPLOAD_DESCRIPTION,
-            allow_flagging=False,
-            examples=[
-                ["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
-                ["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
-            ],
-            # cache_examples=True,
         )
         demo_chat = gr.ChatInterface(
             response_fn,
             chatbot=ChatBot(
@@ -1767,8 +1757,8 @@ def launch():
                 # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
             ],
         )
-        demo = CustomTabbedInterface(
-            interface_list=[demo_chat, demo_file_upload],
             tab_names=["Chat Interface", "Batch Inference"],
             title=f"{model_title}",
             description=f"{model_desc}",
@@ -1834,4 +1824,3 @@ def main():
 if __name__ == "__main__":
     main()

 VLLM-based demo script to launch Language chat model for Southeast Asian Languages
 """
 import os
 import numpy as np
 import argparse
 gr.ChatInterface._setup_events = _setup_events
 def vllm_abort(self: Any):
     sh = self.llm_engine.scheduler
     for g in (sh.waiting + sh.running + sh.swapped):
 def maybe_upload_to_dataset():
     global LOG_FILE, DATA_SET_REPO_PATH, SAVE_LOGS
+    if SAVE_LOGS and os.path.exists(LOG_PATH) and DATA_SET_REPO_PATH is not "":
         with open(LOG_PATH, 'r', encoding='utf-8') as f:
             convos = {}
             for l in f:
             except Exception as e:
                 print('Failed to delete %s. Reason: %s' % (file_path, e))
 AGREE_POP_SCRIPTS = """
 async () => {
     alert("To use our service, you are required to agree to the following terms:\\nYou must not use our service to generate any harmful, unethical or illegal content that violates local and international laws, including but not limited to hate speech, violence and deception.\\nThe service may collect user dialogue data for performance improvement, and reserves the right to distribute it under CC-BY or similar license. So do not enter any personal information!");
         stop_strings: str = "[STOP],<s>,</s>",
         current_time: Optional[float] = None,
 ):
     files = files if isinstance(files, list) else [files]
     print(files)
     filenames = [f.name for f in files]
 def validate_file_item(filename, index, item: Dict[str, str]):
+    # BATCH_INFER_MAX_PROMPT_TOKENS
     message = item['prompt'].strip()
     if len(message) == 0:
     message_safety = safety_check(message, history=None)
     if message_safety is not None:
+        raise gr.Error(f'Prompt {index} unsafe or supported: {message_safety}')
     tokenizer = llm.get_tokenizer() if llm is not None else None
     if tokenizer is None or len(tokenizer.encode(message, add_special_tokens=False)) >= BATCH_INFER_MAX_PROMPT_TOKENS:
             validate_file_item(fname, i, x)
         all_items.extend(items)
     if len(all_items) > BATCH_INFER_MAX_ITEMS:
         raise gr.Error(f"Num samples {len(all_items)} > {BATCH_INFER_MAX_ITEMS} allowed.")
+    return all_items
+def remove_gradio_cache():
     import shutil
     for root, dirs, files in os.walk('/tmp/gradio/'):
         for f in files:
+            os.unlink(os.path.join(root, f))
+        for d in dirs:
+            shutil.rmtree(os.path.join(root, d))
 def maybe_upload_batch_set(pred_json_path):
     global LOG_FILE, DATA_SET_REPO_PATH, SAVE_LOGS
+    if SAVE_LOGS and DATA_SET_REPO_PATH is not "":
         try:
             from huggingface_hub import upload_file
             path_in_repo = "misc/" + os.path.basename(pred_json_path).replace(".json", f'.{time.time()}.json')
         system_prompt: Optional[str] = SYSTEM_PROMPT_1
 ):
     """
+    Must handle
     """
     global LOG_FILE, LOG_PATH, DEBUG, llm, RES_PRINTED
     frequency_penalty = float(frequency_penalty)
     max_tokens = int(max_tokens)
+    all_items = read_validate_json_files(files)
     # remove all items in /tmp/gradio/
+    remove_gradio_cache()
     if prompt_mode == 'chat':
         prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
     for res, item in zip(responses, all_items):
         item['response'] = res
+    # save_path = "/mnt/workspace/workgroup/phi/test.json"
     save_path = BATCH_INFER_SAVE_TMP_FILE
     os.makedirs(os.path.dirname(save_path), exist_ok=True)
     with open(save_path, 'w', encoding='utf-8') as f:
 # BATCH_INFER_MAX_ITEMS
+FILE_UPLOAD_DESC = f"""File upload json format, with JSON object as list of dict with < {BATCH_INFER_MAX_ITEMS} items"""
+FILE_UPLOAD_DESCRIPTION = FILE_UPLOAD_DESC + """
 ```
+[ {\"id\": 0, \"prompt\": \"Hello world\"} ,  {\"id\": 1, \"prompt\": \"Hi there?\"}]
 ```
 """
+# https://huggingface.co/spaces/yuntian-deng/ChatGPT4Turbo/blob/main/app.py
+@document()
+class CusTabbedInterface(gr.Blocks):
+    def __init__(
+        self,
+        interface_list: list[gr.Interface],
+        tab_names: Optional[list[str]] = None,
+        title: Optional[str] = None,
+        description: Optional[str] = None,
+        theme: Optional[gr.Theme] = None,
+        analytics_enabled: Optional[bool] = None,
+        css: Optional[str] = None,
+    ):
+        """
+        Parameters:
+            interface_list: a list of interfaces to be rendered in tabs.
+            tab_names: a list of tab names. If None, the tab names will be "Tab 1", "Tab 2", etc.
+            title: a title for the interface; if provided, appears above the input and output components in large font. Also used as the tab title when opened in a browser window.
+            analytics_enabled: whether to allow basic telemetry. If None, will use GRADIO_ANALYTICS_ENABLED environment variable or default to True.
+            css: custom css or path to custom css file to apply to entire Blocks
+        Returns:
+            a Gradio Tabbed Interface for the given interfaces
+        """
+        super().__init__(
+            title=title or "Gradio",
+            theme=theme,
+            analytics_enabled=analytics_enabled,
+            mode="tabbed_interface",
+            css=css,
+        )
+        self.description = description
+        if tab_names is None:
+            tab_names = [f"Tab {i}" for i in range(len(interface_list))]
+        with self:
+            if title:
+                gr.Markdown(
+                    f"<h1 style='text-align: center; margin-bottom: 1rem'>{title}</h1>"
+                )
+            if description:
+                gr.Markdown(description)
+            with gr.Tabs():
+                for interface, tab_name in zip(interface_list, tab_names):
+                    with gr.Tab(label=tab_name):
+                        interface.render()
 def launch():
     global demo, llm, DEBUG, LOG_FILE
     model_desc = MODEL_DESC
     if ENABLE_BATCH_INFER:
+        demo_file = gr.Interface(
             batch_inference,
             inputs=[
                 gr.File(file_count='single', file_types=['json']),
                 gr.Radio(["chat", "few-shot"], value='chat', label="Chat or Few-shot mode", info="Chat's output more user-friendly, Few-shot's output more consistent with few-shot patterns."),
+                gr.Number(value=temperature, label='Temperature (higher -> more random)'),
+                gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
+                gr.Number(value=frequence_penalty, label='Frequency penalty (> 0 encourage new tokens over repeated tokens)'),
+                gr.Number(value=presence_penalty, label='Presence penalty (> 0 encourage new tokens, < 0 encourage existing tokens)'),
+                gr.Textbox(value="[STOP],[END],<s>,</s>", label='Comma-separated STOP string to stop generation only in few-shot mode', lines=1),
                 gr.Number(value=0, label='current_time', visible=False),
             ],
             outputs=[
                 # "file",
                 gr.File(label="Generated file"),
+                # gr.Textbox(),
                 # "json"
+                gr.JSON(label='Example outputs (max 2 samples)')
             ],
+            # examples=[[[os.path.join(os.path.dirname(__file__),"files/titanic.csv"),
+            # os.path.join(os.path.dirname(__file__),"files/titanic.csv"),
+            # os.path.join(os.path.dirname(__file__),"files/titanic.csv")]]],
+            # cache_examples=True
+            description=FILE_UPLOAD_DESCRIPTION
         )
         demo_chat = gr.ChatInterface(
             response_fn,
             chatbot=ChatBot(
                 # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
             ],
         )
+        demo = CusTabbedInterface(
+            interface_list=[demo_chat, demo_file],
             tab_names=["Chat Interface", "Batch Inference"],
             title=f"{model_title}",
             description=f"{model_desc}",
 if __name__ == "__main__":
     main()