michaelfeil commited on
Commit
d8749c7
1 Parent(s): 9f7ef37

Upload EleutherAI/gpt-neox-20b ctranslate fp16 weights

Browse files
Files changed (6) hide show
  1. .gitattributes +48 -8
  2. README.md +226 -0
  3. merges.txt +0 -0
  4. model.bin +2 -2
  5. special_tokens_map.json +1 -0
  6. vocab.json +0 -0
.gitattributes CHANGED
@@ -1,28 +1,22 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
@@ -30,5 +24,51 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
31
  *.xz filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
 
20
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
 
24
  *.wasm filter=lfs diff=lfs merge=lfs -text
25
  *.xz filter=lfs diff=lfs merge=lfs -text
26
  *.zip filter=lfs diff=lfs merge=lfs -text
27
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
28
  *tfevents* filter=lfs diff=lfs merge=lfs -text
29
+ model-00010-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
30
+ model-00012-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
31
+ model-00015-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
32
+ model-00031-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
33
+ model-00029-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
34
+ model-00005-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
35
+ model-00037-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
36
+ model-00025-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
37
+ model-00027-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
38
+ model-00018-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
39
+ model-00044-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
40
+ model-00021-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
41
+ model-00017-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
42
+ model-00023-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
43
+ model-00040-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
44
+ model-00034-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
45
+ model-00046-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
46
+ model-00022-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
47
+ model-00033-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
48
+ model-00035-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
49
+ model-00002-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
50
+ model-00020-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
51
+ model-00026-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
52
+ model-00042-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
53
+ model-00013-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
54
+ model-00019-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
55
+ model-00008-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
56
+ model-00016-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
57
+ model-00014-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
58
+ model-00030-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
59
+ model-00028-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
60
+ model-00045-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
61
+ model-00024-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
62
+ model-00009-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
63
+ model-00036-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
64
+ model-00039-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
65
+ model-00006-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
66
+ model-00011-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
67
+ model-00007-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
68
+ model-00032-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
69
+ model-00041-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
70
+ model-00001-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
71
+ model-00043-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
72
+ model-00038-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
73
+ model-00004-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
74
+ model-00003-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - ctranslate2
6
+ - int8
7
+ - float16
8
+ - pytorch
9
+ - causal-lm
10
+ license: apache-2.0
11
+ datasets:
12
+ - the_pile
13
+ ---
14
+ # # Fast-Inference with Ctranslate2
15
+ Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on CPU or GPU.
16
+
17
+ quantized version of [EleutherAI/gpt-neox-20b](https://huggingface.co/EleutherAI/gpt-neox-20b)
18
+ ```bash
19
+ pip install hf-hub-ctranslate2>=2.0.6
20
+ ```
21
+ Converted on 2023-05-19 using
22
+ ```
23
+ ct2-transformers-converter --model EleutherAI/gpt-neox-20b --output_dir /home/feil_m/tmp-ct2fast-gpt-neox-20b --force --copy_files merges.txt tokenizer.json README.md tokenizer_config.json vocab.json special_tokens_map.json .gitattributes --quantization float16
24
+ ```
25
+
26
+ Checkpoint compatible to [ctranslate2>=3.13.0](https://github.com/OpenNMT/CTranslate2) and [hf-hub-ctranslate2>=2.0.6](https://github.com/michaelfeil/hf-hub-ctranslate2)
27
+ - `compute_type=int8_float16` for `device="cuda"`
28
+ - `compute_type=int8` for `device="cpu"`
29
+
30
+ ```python
31
+ from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
32
+ from transformers import AutoTokenizer
33
+
34
+ model_name = "michaelfeil/ct2fast-gpt-neox-20b"
35
+ # use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
36
+ model = GeneratorCT2fromHfHub(
37
+ # load in int8 on CUDA
38
+ model_name_or_path=model_name,
39
+ device="cuda",
40
+ compute_type="int8_float16",
41
+ tokenizer=AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
42
+ )
43
+ outputs = model.generate(
44
+ text=["How do you call a fast Flan-ingo?", "User: How are you doing? Bot:"],
45
+ )
46
+ print(outputs)
47
+ ```
48
+
49
+ # Licence and other remarks:
50
+ This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.
51
+
52
+ # Original description
53
+
54
+
55
+ GPT-NeoX-20B is a 20 billion parameter autoregressive language model trained
56
+ on [the Pile](https://pile.eleuther.ai/) using the [GPT-NeoX
57
+ library](https://github.com/EleutherAI/gpt-neox). Its architecture intentionally
58
+ resembles that of GPT-3, and is almost identical to that of [GPT-J-
59
+ 6B](https://huggingface.co/EleutherAI/gpt-j-6B). Its training dataset contains
60
+ a multitude of English-language texts, reflecting the general-purpose nature
61
+ of this model. See the [accompanying paper](https://arxiv.org/abs/2204.06745)
62
+ for details about model architecture (including how it differs from GPT-3),
63
+ training procedure, and additional evaluations.
64
+
65
+ ### Model details
66
+
67
+ - Developed by: [EleutherAI](http://eleuther.ai)
68
+ - Model type: Transformer-based Language Model
69
+ - Language: English
70
+ - Learn more: [GPT-NeoX-20B: An Open-Source Autoregressive Language
71
+ Model](https://arxiv.org/abs/2204.06745). For details about the training dataset,
72
+ see [the Pile paper](https://arxiv.org/abs/2101.00027), and [its data
73
+ sheet](https://arxiv.org/abs/2201.07311).
74
+ - License: Apache 2.0
75
+ - Contact: to ask questions about this model, join the [EleutherAI
76
+ Discord](https://discord.gg/zBGx3azzUn), and post them in `#release-discussion`.
77
+ Please read the existing GPT-NeoX-20B documentation before asking about the model
78
+ on Discord. For general correspondence: [contact@eleuther.
79
+ ai](mailto:[email protected]).
80
+
81
+ <figure style="width:30em">
82
+
83
+ | Hyperparameter | Value |
84
+ | ---------------------- | ----------- |
85
+ | n<sub>parameters</sub> | 20554567680 |
86
+ | n<sub>layers</sub> | 44 |
87
+ | d<sub>model</sub> | 6144 |
88
+ | n<sub>heads</sub> | 64 |
89
+ | d<sub>head</sub> | 96 |
90
+ | n<sub>vocab</sub> | 50257 |
91
+ | Sequence Length | 2048 |
92
+ | Learning Rate | 0.97 x 10<sup>-5</sup> |
93
+ | Positional Encoding | [Rotary Position Embedding (RoPE)](https://arxiv.org/abs/2104.09864) |
94
+ </figure>
95
+
96
+ ### Uses and limitations
97
+
98
+ #### Intended use
99
+
100
+ GPT-NeoX-20B was developed primarily for research purposes. It learns an inner
101
+ representation of the English language that can be used to extract features
102
+ useful for downstream tasks.
103
+
104
+ In addition to scientific uses, you may also further fine-tune and adapt
105
+ GPT-NeoX-20B for deployment, as long as your use is in accordance with the
106
+ Apache 2.0 license. This model works with the [Transformers
107
+ Library](https://huggingface.co/docs/transformers/index). If you decide to use
108
+ pre-trained GPT-NeoX-20B as a basis for your fine-tuned model, please note that
109
+ you need to conduct your own risk and bias assessment.
110
+
111
+ #### Out-of-scope use
112
+
113
+ GPT-NeoX-20B is **not** intended for deployment as-is. It is not a product
114
+ and cannot be used for human-facing interactions without supervision.
115
+
116
+ GPT-NeoX-20B has not been fine-tuned for downstream tasks for which language
117
+ models are commonly deployed, such as writing genre prose, or commercial
118
+ chatbots. This means GPT-NeoX-20B will likely **not** respond to a given prompt
119
+ the way products such as ChatGPT do. This is because, unlike GPT-NeoX-20B,
120
+ ChatGPT was fine-tuned using methods such as Reinforcement Learning from Human
121
+ Feedback (RLHF) to better “understand” human instructions and dialogue.
122
+
123
+ This model is English-language only, and thus cannot be used for translation
124
+ or generating text in other languages.
125
+
126
+ #### Limitations and biases
127
+
128
+ The core functionality of GPT-NeoX-20B is to take a string of text and predict
129
+ the next token. Remember that the statistically most likely next token need
130
+ not result in the most “accurate” text. Never rely on GPT-NeoX-20B to produce
131
+ factually accurate output.
132
+
133
+ This model was trained on [the Pile](https://pile.eleuther.ai/), a dataset
134
+ known to contain profanity and texts that are lewd or otherwise offensive.
135
+ See [Section 6 of the Pile paper](https://arxiv.org/abs/2101.00027) for a
136
+ discussion of documented biases with regards to gender, religion, and race.
137
+ GPT-NeoX-20B may produce socially unacceptable or undesirable text, *even if*
138
+ the prompt itself does not include anything explicitly offensive.
139
+
140
+ We recommend curating the outputs of this model before presenting it to a human
141
+ reader. Please inform your audience that you are using artificially generated
142
+ text.
143
+
144
+ #### How to use
145
+ If you simply want to try out some prompts, check out [this
146
+ playground](https://20b.eleuther.ai/).
147
+
148
+ GPT-NeoX-20B can be loaded using the `AutoModelForCausalLM` functionality:
149
+ ```python
150
+ from transformers import AutoTokenizer, AutoModelForCausalLM
151
+
152
+ tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
153
+ model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
154
+ ```
155
+
156
+ ### Training
157
+
158
+ #### Training dataset
159
+
160
+ The Pile is a 825GiB general-purpose dataset in English. It was created by
161
+ EleutherAI specifically for training large language models. It contains texts
162
+ from 22 diverse sources, roughly broken down into five categories: academic
163
+ writing (e.g. arXiv), internet (e.g. CommonCrawl), prose (e.g. Project
164
+ Gutenberg), dialogue (e.g. YouTube subtitles), and miscellaneous (e.g. GitHub,
165
+ Enron Emails). See [the Pile paper](https://arxiv.org/abs/2101.00027) for
166
+ a breakdown of all data sources, methodology, and a discussion of ethical
167
+ implications. Consult [the datasheet](https://arxiv.org/abs/2201.07311) for
168
+ more detailed documentation about the Pile and its component datasets. The
169
+ Pile can be downloaded from the [official website](https://pile.eleuther.ai/),
170
+ or from a [community mirror](https://the-eye.eu/public/AI/pile/).
171
+
172
+ The Pile was **not** deduplicated before being used to train GPT-NeoX-20B.
173
+
174
+ #### Training procedure
175
+
176
+ GPT-NeoX-20B was trained with a batch size of approximately 3.15M tokens
177
+ (1538 sequences of 2048 tokens each), for a total of 150,000 steps. Tensor
178
+ parallelism and pipeline parallelism were used to distribute the model across
179
+ GPUs. Additional details about the training procedure are in [Section 3 of
180
+ the accompanying paper](https://arxiv.org/abs/2204.06745).
181
+
182
+
183
+ ### Evaluations
184
+
185
+ <figure style="width:55em">
186
+
187
+ | Model | OpenAI’s LAMBADA | SciQ | PIQA | TriviaQA | ARC (Challenge) |
188
+ | ------------- | :--------------: | :-----------: | :-----------: | :-----------: | :-------------: |
189
+ | GPT-J-6B | 0.683 ± 0.006 | 0.910 ± 0.009 | 0.752 ± 0.010 | 0.170 ± 0.004 | 0.340 ± 0.014 |
190
+ | FairSeq 6.7B | 0.673 ± 0.007 | 0.895 ± 0.010 | 0.762 ± 0.010 | 0.221 ± 0.004 | 0.329 ± 0.014 |
191
+ | GPT-3 Curie | 0.693 ± 0.006 | 0.918 ± 0.009 | 0.767 ± 0.010 | 0.196 ± 0.004 | 0.334 ± 0.014 |
192
+ | FairSeq 13B | 0.709 ± 0.006 | 0.910 ± 0.009 | 0.769 ± 0.010 | 0.270 ± 0.004 | 0.345 ± 0.014 |
193
+ | GPT-NeoX-20B | 0.720 ± 0.006 | 0.928 ± 0.008 | 0.779 ± 0.010 | 0.259 ± 0.004 | 0.380 ± 0.014 |
194
+ | GPT-3 DaVinci | 0.752 ± 0.006 | 0.949 ± 0.007 | 0.791 ± 0.009 | 0.409 ± 0.005 | 0.435 ± 0.014 |
195
+ <figcaption>Zero-shot performance on selected natural language tasks.</figcaption>
196
+ </figure>
197
+
198
+ This is a heavily abridged version of the evaluation results. Appendix D of the
199
+ [GPT-NeoX-20B paper](https://arxiv.org/abs/2204.06745) compares more model
200
+ sizes, and contains additional evaluations, including on: zero and five-shot
201
+ natural language tasks, zero and five-shot Basic Arithmetic and MATH,
202
+ and zero-shot Hendrycks tasks.
203
+
204
+ ### BibTeX
205
+
206
+ To cite the GPT-NeoX-20B paper:
207
+
208
+ ```
209
+ @misc{https://doi.org/10.48550/arxiv.2204.06745,
210
+ doi = {10.48550/ARXIV.2204.06745},
211
+
212
+ url = {https://arxiv.org/abs/2204.06745},
213
+
214
+ author = {Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel},
215
+
216
+ keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
217
+
218
+ title = {GPT-NeoX-20B: An Open-Source Autoregressive Language Model},
219
+
220
+ publisher = {arXiv},
221
+
222
+ year = {2022},
223
+
224
+ copyright = {Creative Commons Attribution 4.0 International}
225
+ }
226
+ ```
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9017c96177e17d51d870f64af2fa29133fa3b367e398d85eeb6ca57c409046e2
3
- size 20567760329
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b622e32bb32c06036afbc59dc0231c03a599c55b5bfe6f9d6ff37e1167e216c6
3
+ size 41108655105
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff