NGUYEN, Xuan Phi commited on
Commit
f028d50
1 Parent(s): 203c3cd
Files changed (1) hide show
  1. app.py +46 -25
app.py CHANGED
@@ -10,43 +10,45 @@ tensor_parallel must == 1
10
 
11
  """
12
 
13
- import torch
14
  import os
15
  import numpy as np
16
  import argparse
17
- from vllm import LLM, SamplingParams
18
  import gradio as gr
19
- from gradio_client.documentation import document, set_documentation_group
20
-
21
- from typing import List, Optional, Union, Dict, Tuple
22
-
23
- from tqdm import tqdm
24
- from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
25
-
26
- from vllm.engine.arg_utils import EngineArgs
27
- from vllm.engine.llm_engine import LLMEngine
28
- from vllm.outputs import RequestOutput
29
- from vllm.sampling_params import SamplingParams
30
- from vllm.utils import Counter
31
- from vllm.sequence import (Sequence, SequenceData, SequenceGroup,
32
- SequenceGroupMetadata, SequenceOutputs,
33
- SequenceStatus)
34
-
35
- # ! reconfigure vllm to faster llama
36
  from typing import Any, Iterator
37
  from typing import Iterator, List, Optional, Tuple
38
  import filelock
39
  import glob
40
  import json
41
- import os
42
- from huggingface_hub import snapshot_download
43
 
 
 
 
44
  from tqdm.auto import tqdm
 
 
 
 
 
45
 
46
- from vllm.model_executor.model_loader import _MODEL_REGISTRY
47
- from vllm.model_executor.models import LlamaForCausalLM
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- _MODEL_REGISTRY['FasterLlamaForCausalLM'] = LlamaForCausalLM
 
50
 
51
 
52
  def hf_model_weights_iterator(
@@ -661,18 +663,35 @@ def debug_chat_response_echo(
661
  yield message
662
 
663
 
 
664
  MODEL_TITLE = "DAMO-SeaL-13B - An Assistant for South East Asian Languages"
665
  MODEL_DESC = """
666
  This is a 13B DAMO-SeaL-Chat assistant model built by DAMO Academy, Alibaba Group. It can produce helpful responses in English, Vietnamese, Indonesian and Thai.
667
  """.strip()
668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
  TENSOR_PARALLEL = int(os.environ.get("TENSOR_PARALLEL", "1"))
670
  DTYPE = 'bfloat16'
671
  DTYPE = 'float16'
672
 
673
  MODEL_PATH = os.environ.get("MODEL_PATH", "notfound, please set `export MODEL_PATH=`")
674
 
675
- DEBUG = 1
 
676
 
677
  def launch():
678
  global demo, llm, DEBUG
@@ -720,6 +739,8 @@ def launch():
720
  gr.Number(value=0.4, label='Frequency penalty (> 0 encourage new tokens)'),
721
  gr.Textbox(value=sys_prompt, label='System prompt', lines=8)],
722
  )
 
 
723
  demo.queue()
724
  # demo.launch(server_port=args.port)
725
  demo.launch()
 
10
 
11
  """
12
 
13
+
14
  import os
15
  import numpy as np
16
  import argparse
17
+ import torch
18
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  from typing import Any, Iterator
20
  from typing import Iterator, List, Optional, Tuple
21
  import filelock
22
  import glob
23
  import json
 
 
24
 
25
+ from gradio_client.documentation import document, set_documentation_group
26
+
27
+ from typing import List, Optional, Union, Dict, Tuple
28
  from tqdm.auto import tqdm
29
+ from huggingface_hub import snapshot_download
30
+
31
+ DEBUG = True
32
+
33
+ if not DEBUG:
34
 
35
+ # vllm import
36
+ from vllm import LLM, SamplingParams
37
+ from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
38
+ from vllm.engine.arg_utils import EngineArgs
39
+ from vllm.engine.llm_engine import LLMEngine
40
+ from vllm.outputs import RequestOutput
41
+ from vllm.sampling_params import SamplingParams
42
+ from vllm.utils import Counter
43
+ from vllm.sequence import (Sequence, SequenceData, SequenceGroup,
44
+ SequenceGroupMetadata, SequenceOutputs,
45
+ SequenceStatus)
46
+ # ! reconfigure vllm to faster llama
47
+ from vllm.model_executor.model_loader import _MODEL_REGISTRY
48
+ from vllm.model_executor.models import LlamaForCausalLM
49
 
50
+
51
+ _MODEL_REGISTRY['FasterLlamaForCausalLM'] = LlamaForCausalLM
52
 
53
 
54
  def hf_model_weights_iterator(
 
663
  yield message
664
 
665
 
666
+ # ============ CONSTANT ============
667
  MODEL_TITLE = "DAMO-SeaL-13B - An Assistant for South East Asian Languages"
668
  MODEL_DESC = """
669
  This is a 13B DAMO-SeaL-Chat assistant model built by DAMO Academy, Alibaba Group. It can produce helpful responses in English, Vietnamese, Indonesian and Thai.
670
  """.strip()
671
 
672
+
673
+ cite_markdown = """
674
+ ## Citation
675
+ If you find our project useful, hope you can star our repo and cite our paper as follows:
676
+ ```
677
+ @article{damonlpsg2023seallm,
678
+ author = {???},
679
+ title = {SeaL: A language model for South East Asian Languages},
680
+ year = 2023,
681
+ }
682
+ """
683
+ # journal = {arXiv preprint arXiv:2306.02858}
684
+ # url = {https://arxiv.org/abs/2306.02858}
685
+
686
+
687
  TENSOR_PARALLEL = int(os.environ.get("TENSOR_PARALLEL", "1"))
688
  DTYPE = 'bfloat16'
689
  DTYPE = 'float16'
690
 
691
  MODEL_PATH = os.environ.get("MODEL_PATH", "notfound, please set `export MODEL_PATH=`")
692
 
693
+
694
+
695
 
696
  def launch():
697
  global demo, llm, DEBUG
 
739
  gr.Number(value=0.4, label='Frequency penalty (> 0 encourage new tokens)'),
740
  gr.Textbox(value=sys_prompt, label='System prompt', lines=8)],
741
  )
742
+
743
+ gr.Markdown(cite_markdown)
744
  demo.queue()
745
  # demo.launch(server_port=args.port)
746
  demo.launch()