qianhuiwu commited on
Commit
b8f3522
1 Parent(s): ab61a2b

Remove env setup. Update Readme.

Browse files
Makefile DELETED
@@ -1,16 +0,0 @@
1
- .PHONY: install style test
2
-
3
- PYTHON := python
4
- CHECK_DIRS := llmlingua tests
5
-
6
- install:
7
- @${PYTHON} setup.py bdist_wheel
8
- @${PYTHON} -m pip install dist/sdtools*
9
-
10
- style:
11
- black $(CHECK_DIRS)
12
- isort -rc $(CHECK_DIRS)
13
- flake8 $(CHECK_DIRS)
14
-
15
- test:
16
- @${PYTHON} -m pytest -n auto --dist=loadfile -s -v ./tests/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -9,5 +9,42 @@ app_file: app.py
9
  pinned: false
10
  license: cc-by-nc-sa-4.0
11
  ---
12
-
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  pinned: false
10
  license: cc-by-nc-sa-4.0
11
  ---
12
+
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ LLMLingua-2 is one of the branch from [LLMLingua Series](https://llmlingua.com/). Please check the links below for more information.
16
+ <div style="display: flex; align-items: center;">
17
+ <div style="width: 100px; margin-right: 10px; height:auto;" align="left">
18
+ <img src="images/LLMLingua_logo.png" alt="LLMLingua" width="100" align="left">
19
+ </div>
20
+ <div style="flex-grow: 1;" align="center">
21
+ <h2 align="center">LLMLingua Series | Effectively Deliver Information to LLMs via Prompt Compression</h2>
22
+ </div>
23
+ </div>
24
+
25
+ <p align="center">
26
+ | <a href="https://llmlingua.com/"><b>Project Page</b></a> |
27
+ <a href="https://aclanthology.org/2023.emnlp-main.825/"><b>LLMLingua</b></a> |
28
+ <a href="https://arxiv.org/abs/2310.06839"><b>LongLLMLingua</b></a> |
29
+ <a href="https://arxiv.org/abs/2403."><b>LLMLingua-2</b></a> |
30
+ <a href="https://huggingface.co/spaces/microsoft/LLMLingua"><b>LLMLingua Demo</b></a> |
31
+ <a href="https://huggingface.co/spaces/microsoft/LLMLingua-2"><b>LLMLingua-2 Demo</b></a> |
32
+ </p>
33
+
34
+
35
+ ## Brief Introduction
36
+
37
+ **LLMLingua** utilizes a compact, well-trained language model (e.g., GPT2-small, LLaMA-7B) to identify and remove non-essential tokens in prompts. This approach enables efficient inference with large language models (LLMs), achieving up to 20x compression with minimal performance loss.
38
+
39
+ - [LLMLingua: Compressing Prompts for Accelerated Inference of Large Language Models](https://aclanthology.org/2023.emnlp-main.825/) (EMNLP 2023)<br>
40
+ _Huiqiang Jiang, Qianhui Wu, Chin-Yew Lin, Yuqing Yang and Lili Qiu_
41
+
42
+ **LongLLMLingua** mitigates the 'lost in the middle' issue in LLMs, enhancing long-context information processing. It reduces costs and boosts efficiency with prompt compression, improving RAG performance by up to 21.4% using only 1/4 of the tokens.
43
+
44
+ - [LongLLMLingua: Accelerating and Enhancing LLMs in Long Context Scenarios via Prompt Compression](https://arxiv.org/abs/2310.06839) (ICLR ME-FoMo 2024)<br>
45
+ _Huiqiang Jiang, Qianhui Wu, Xufang Luo, Dongsheng Li, Chin-Yew Lin, Yuqing Yang and Lili Qiu_
46
+
47
+ **LLMLingua-2**, a small-size yet powerful prompt compression method trained via data distillation from GPT-4 for token classification with a BERT-level encoder, excels in task-agnostic compression. It surpasses LLMLingua in handling out-of-domain data, offering 3x-6x faster performance.
48
+
49
+ - [LLMLingua-2: Context-Aware Data Distillation for Efficient and Faithful Task-Agnostic Prompt Compression](https://arxiv.org/abs/2403.) (Under Review)<br>
50
+ _Zhuoshi Pan, Qianhui Wu, Huiqiang Jiang, Menglin Xia, Xufang Luo, Jue Zhang, Qingwei Lin, Victor Ruhle, Yuqing Yang, Chin-Yew Lin, H. Vicky Zhao, Lili Qiu, Dongmei Zhang_
app.py CHANGED
@@ -1,9 +1,3 @@
1
-
2
- # build the environment
3
- import sys
4
- import subprocess
5
- subprocess.run([sys.executable, "-m", "pip", "install", "-e", "."])
6
-
7
  # import the required libraries
8
  import gradio as gr
9
  import json
@@ -60,15 +54,12 @@ def compress(original_prompt, compression_rate, base_model="xlm-roberta-large",
60
 
61
 
62
  title = "LLMLingua-2"
63
- header = ("""
64
- <div align='center'>
65
- <h1></h1>
66
- <h1>LLMLingua-2: Efficient and Faithful Task-Agnostic Prompt Compression via Data Distillation</h1>
67
- <h3>Zhuoshi Pan, Qianhui Wu, Huiqiang Jiang, Menglin Xia, Xufang Luo, Jue Zhang, Qingwei Lin, Victor Ruehle, Yuqing Yang, Chin-Yew Lin, H. Vicky Zhao, Lili Qiu, and Dongmei Zhang</h3>
68
- <h3><a href='https://llmlingua.com/llmlingua2.html' target='_blank' rel='noopener'>[Project]</a><a href='https://arxiv.org/pdf/2403.12968.pdf' target='_blank' rel='noopener'>[Paper]</a><a href='https://github.com/microsoft/LLMLingua' target='_blank' rel='noopener'>[Code]</a>
69
- </div>
70
- """
71
- )
72
  theme = "soft"
73
  css = """#anno-img .mask {opacity: 0.5; transition: all 0.2s ease-in-out;}
74
  #anno-img .mask.active {opacity: 0.7}"""
@@ -76,8 +67,8 @@ css = """#anno-img .mask {opacity: 0.5; transition: all 0.2s ease-in-out;}
76
  original_prompt_text = """John: So, um, I've been thinking about the project, you know, and I believe we need to, uh, make some changes. I mean, we want the project to succeed, right? So, like, I think we should consider maybe revising the timeline.
77
  Sarah: I totally agree, John. I mean, we have to be realistic, you know. The timeline is, like, too tight. You know what I mean? We should definitely extend it.
78
  """
79
- # with gr.Blocks(title=title, theme=gr.themes.Soft(), css=css) as app:
80
- with gr.Blocks(title=title, css=css) as app: # 'YenLai/Superhuman' 'HaleyCH/HaleyCH_Theme' 'gradio/monochrome' ''zkunn/Alipay_Gradio_theme''
81
  gr.Markdown(header)
82
  with gr.Row():
83
  with gr.Column(scale=3):
 
 
 
 
 
 
 
1
  # import the required libraries
2
  import gradio as gr
3
  import json
 
54
 
55
 
56
  title = "LLMLingua-2"
57
+
58
+ header = """# LLMLingua-2: Efficient and Faithful Task-Agnostic Prompt Compression via Data Distillation
59
+ _Zhuoshi Pan, Qianhui Wu, Huiqiang Jiang, Menglin Xia, Xufang Luo, Jue Zhang, Qingwei Lin, Victor Ruehle, Yuqing Yang, Chin-Yew Lin, H. Vicky Zhao, Lili Qiu, Dongmei Zhang_<br/>
60
+ [[project page]](https://llmlingua.com/llmlingua2.html) [[paper]](https://arxiv.org/abs/2403.12968) [[code]](https://github.com/microsoft/LLMLingua)
61
+ """
62
+
 
 
 
63
  theme = "soft"
64
  css = """#anno-img .mask {opacity: 0.5; transition: all 0.2s ease-in-out;}
65
  #anno-img .mask.active {opacity: 0.7}"""
 
67
  original_prompt_text = """John: So, um, I've been thinking about the project, you know, and I believe we need to, uh, make some changes. I mean, we want the project to succeed, right? So, like, I think we should consider maybe revising the timeline.
68
  Sarah: I totally agree, John. I mean, we have to be realistic, you know. The timeline is, like, too tight. You know what I mean? We should definitely extend it.
69
  """
70
+
71
+ with gr.Blocks(title=title, css=css) as app:
72
  gr.Markdown(header)
73
  with gr.Row():
74
  with gr.Column(scale=3):
images/LLMLingua_logo.png ADDED
llmlingua/__init__.py DELETED
@@ -1,4 +0,0 @@
1
- # Copyright (c) 2024 Microsoft
2
- # Licensed under The cc-by-nc-sa-4.0 License [see LICENSE for details]
3
- # flake8: noqa
4
- from .prompt_compressor import PromptCompressor
 
 
 
 
 
llmlingua/prompt_compressor.py DELETED
The diff for this file is too large to render. See raw diff
 
llmlingua/utils.py DELETED
@@ -1,98 +0,0 @@
1
- import torch
2
- from torch.utils.data import Dataset
3
- import random, os
4
- import numpy as np
5
- import torch
6
- import string
7
-
8
- class TokenClfDataset(Dataset):
9
- def __init__(
10
- self,
11
- texts,
12
- max_len=512,
13
- tokenizer=None,
14
- model_name="bert-base-multilingual-cased",
15
- ):
16
- self.len = len(texts)
17
- self.texts = texts
18
- self.tokenizer = tokenizer
19
- self.max_len = max_len
20
- self.model_name = model_name
21
- if "bert-base-multilingual-cased" in model_name:
22
- self.cls_token = "[CLS]"
23
- self.sep_token = "[SEP]"
24
- self.unk_token = "[UNK]"
25
- self.pad_token = "[PAD]"
26
- self.mask_token = "[MASK]"
27
- elif "xlm-roberta-large" in model_name:
28
- self.bos_token = "<s>"
29
- self.eos_token = "</s>"
30
- self.sep_token = "</s>"
31
- self.cls_token = "<s>"
32
- self.unk_token = "<unk>"
33
- self.pad_token = "<pad>"
34
- self.mask_token = "<mask>"
35
- else:
36
- raise NotImplementedError()
37
-
38
- def __getitem__(self, index):
39
- text = self.texts[index]
40
- tokenized_text = self.tokenizer.tokenize(text)
41
-
42
- tokenized_text = (
43
- [self.cls_token] + tokenized_text + [self.sep_token]
44
- ) # add special tokens
45
-
46
- if len(tokenized_text) > self.max_len:
47
- tokenized_text = tokenized_text[: self.max_len]
48
- else:
49
- tokenized_text = tokenized_text + [
50
- self.pad_token for _ in range(self.max_len - len(tokenized_text))
51
- ]
52
-
53
- attn_mask = [1 if tok != self.pad_token else 0 for tok in tokenized_text]
54
-
55
- ids = self.tokenizer.convert_tokens_to_ids(tokenized_text)
56
-
57
- return {
58
- "ids": torch.tensor(ids, dtype=torch.long),
59
- "mask": torch.tensor(attn_mask, dtype=torch.long),
60
- }
61
-
62
- def __len__(self):
63
- return self.len
64
-
65
-
66
- def seed_everything(seed: int):
67
- random.seed(seed)
68
- os.environ["PYTHONHASHSEED"] = str(seed)
69
- np.random.seed(seed)
70
- torch.manual_seed(seed)
71
- torch.cuda.manual_seed(seed)
72
- torch.backends.cudnn.deterministic = True
73
- torch.backends.cudnn.benchmark = False
74
-
75
- def is_begin_of_new_word(token, model_name, force_tokens, token_map):
76
- if "bert-base-multilingual-cased" in model_name:
77
- if token.lstrip("##") in force_tokens or token.lstrip("##") in set(token_map.values()):
78
- return True
79
- return not token.startswith("##")
80
- elif "xlm-roberta-large" in model_name:
81
- if token in string.punctuation or token in force_tokens or token in set(token_map.values()):
82
- return True
83
- return token.startswith("▁")
84
- else:
85
- raise NotImplementedError()
86
-
87
- def replace_added_token(token, token_map):
88
- for ori_token, new_token in token_map.items():
89
- token = token.replace(new_token, ori_token)
90
- return token
91
-
92
- def get_pure_token(token, model_name):
93
- if "bert-base-multilingual-cased" in model_name:
94
- return token.lstrip("##")
95
- elif "xlm-roberta-large" in model_name:
96
- return token.lstrip("▁")
97
- else:
98
- raise NotImplementedError()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llmlingua/version.py DELETED
@@ -1,14 +0,0 @@
1
- # Copyright (c) 2023 Microsoft
2
- # Licensed under The MIT License [see LICENSE for details]
3
-
4
- _MAJOR = "0"
5
- _MINOR = "1"
6
- # On master and in a nightly release the patch should be one ahead of the last
7
- # released build.
8
- _PATCH = "6"
9
- # This is mainly for nightly builds which have the suffix ".dev$DATE". See
10
- # https://semver.org/#is-v123-a-semantic-version for the semantics.
11
- _SUFFIX = ""
12
-
13
- VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
14
- VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
setup.cfg DELETED
@@ -1,28 +0,0 @@
1
- [isort]
2
- default_section = FIRSTPARTY
3
- ensure_newline_before_comments = True
4
- force_grid_wrap = 0
5
- include_trailing_comma = True
6
- known_first_party = sdtools
7
- known_third_party =
8
- imblearn
9
- numpy
10
- pandas
11
- pytorch-tabnet
12
- scipy
13
- sklearn
14
- torch
15
- torchaudio
16
- torchvision
17
- torch_xla
18
- tqdm
19
- xgboost
20
-
21
- line_length = 119
22
- lines_after_imports = 2
23
- multi_line_output = 3
24
- use_parentheses = True
25
-
26
- [flake8]
27
- ignore = E203, E501, E741, W503, W605
28
- max-line-length = 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
setup.py DELETED
@@ -1,70 +0,0 @@
1
- # Copyright (c) 2023 Microsoft
2
- # Licensed under The MIT License [see LICENSE for details]
3
-
4
- from setuptools import find_packages, setup
5
-
6
- # PEP0440 compatible formatted version, see:
7
- # https://www.python.org/dev/peps/pep-0440/
8
- #
9
- # release markers:
10
- # X.Y
11
- # X.Y.Z # For bugfix releases
12
- #
13
- # pre-release markers:
14
- # X.YaN # Alpha release
15
- # X.YbN # Beta release
16
- # X.YrcN # Release Candidate
17
- # X.Y # Final release
18
-
19
- # version.py defines the VERSION and VERSION_SHORT variables.
20
- # We use exec here so we don't import allennlp whilst setting up.
21
- VERSION = {} # type: ignore
22
- with open("llmlingua/version.py", "r") as version_file:
23
- exec(version_file.read(), VERSION)
24
-
25
- INSTALL_REQUIRES = [
26
- "transformers>=4.26.0",
27
- "accelerate",
28
- "torch",
29
- "tiktoken",
30
- "nltk",
31
- "numpy",
32
- ]
33
- QUANLITY_REQUIRES = [
34
- "black==21.4b0",
35
- "flake8>=3.8.3",
36
- "isort>=5.5.4",
37
- "pre-commit",
38
- "pytest",
39
- "pytest-xdist",
40
- ]
41
- DEV_REQUIRES = INSTALL_REQUIRES + QUANLITY_REQUIRES
42
-
43
- setup(
44
- name="llmlingua",
45
- version=VERSION["VERSION"],
46
- author="The LLMLingua team",
47
- author_email="[email protected]",
48
- description="To speed up LLMs' inference and enhance LLM's perceive of key information, compress the prompt and KV-Cache, which achieves up to 20x compression with minimal performance loss.",
49
- long_description=open("README.md", encoding="utf8").read(),
50
- long_description_content_type="text/markdown",
51
- keywords="Prompt Compression, LLMs, Inference Acceleration, Black-box LLMs, Efficient LLMs",
52
- license="MIT License",
53
- url="https://github.com/microsoft/LLMLingua",
54
- classifiers=[
55
- "Intended Audience :: Science/Research",
56
- "Development Status :: 3 - Alpha",
57
- "Programming Language :: Python :: 3",
58
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
59
- ],
60
- package_dir={"": "."},
61
- packages=find_packages("."),
62
- extras_require={
63
- "dev": DEV_REQUIRES,
64
- "quality": QUANLITY_REQUIRES,
65
- },
66
- install_requires=INSTALL_REQUIRES,
67
- include_package_data=True,
68
- python_requires=">=3.8.0",
69
- zip_safe=False,
70
- )