Upload EleutherAI/gpt-neox-20b ctranslate fp16 weights

Browse files

Files changed (6) hide show

.gitattributes +48 -8
README.md +226 -0
merges.txt +0 -0
model.bin +2 -2
special_tokens_map.json +1 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -1,28 +1,22 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
@@ -30,5 +24,51 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00012-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00015-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00031-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00029-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00037-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00025-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00027-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00018-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00044-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00021-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00017-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00023-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00040-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00034-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00046-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00022-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00033-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00035-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00020-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00026-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00042-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00013-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00019-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00016-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00014-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00030-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00028-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00045-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00024-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00036-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00039-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00032-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00041-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00043-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00038-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00046.safetensors filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,226 @@

+---
+language:
+- en
+tags:
+- ctranslate2
+- int8
+- float16
+- pytorch
+- causal-lm
+license: apache-2.0
+datasets:
+- the_pile
+---
+# # Fast-Inference with Ctranslate2
+Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on CPU or GPU.
+quantized version of [EleutherAI/gpt-neox-20b](https://huggingface.co/EleutherAI/gpt-neox-20b)
+```bash
+pip install hf-hub-ctranslate2>=2.0.6
+```
+Converted on 2023-05-19 using
+```
+ct2-transformers-converter --model EleutherAI/gpt-neox-20b --output_dir /home/feil_m/tmp-ct2fast-gpt-neox-20b --force --copy_files merges.txt tokenizer.json README.md tokenizer_config.json vocab.json special_tokens_map.json .gitattributes --quantization float16
+```
+Checkpoint compatible to [ctranslate2>=3.13.0](https://github.com/OpenNMT/CTranslate2) and [hf-hub-ctranslate2>=2.0.6](https://github.com/michaelfeil/hf-hub-ctranslate2)
+- `compute_type=int8_float16` for `device="cuda"`
+- `compute_type=int8`  for `device="cpu"`
+```python
+from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
+from transformers import AutoTokenizer
+model_name = "michaelfeil/ct2fast-gpt-neox-20b"
+# use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
+model = GeneratorCT2fromHfHub(
+        # load in int8 on CUDA
+        model_name_or_path=model_name,
+        device="cuda",
+        compute_type="int8_float16",
+        tokenizer=AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
+)
+outputs = model.generate(
+    text=["How do you call a fast Flan-ingo?", "User: How are you doing? Bot:"],
+)
+print(outputs)
+```
+# Licence and other remarks:
+This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.
+# Original description
+GPT-NeoX-20B is a 20 billion parameter autoregressive language model trained
+on [the Pile](https://pile.eleuther.ai/) using the [GPT-NeoX
+library](https://github.com/EleutherAI/gpt-neox). Its architecture intentionally
+resembles that of GPT-3, and is almost identical to that of [GPT-J-
+6B](https://huggingface.co/EleutherAI/gpt-j-6B). Its training dataset contains
+a multitude of English-language texts, reflecting the general-purpose nature
+of this model. See the [accompanying paper](https://arxiv.org/abs/2204.06745)
+for details about model architecture (including how it differs from GPT-3),
+training procedure, and additional evaluations.
+### Model details
+- Developed by: [EleutherAI](http://eleuther.ai)
+- Model type: Transformer-based Language Model
+- Language: English
+- Learn more: [GPT-NeoX-20B: An Open-Source Autoregressive Language
+Model](https://arxiv.org/abs/2204.06745). For details about the training dataset,
+see [the Pile paper](https://arxiv.org/abs/2101.00027), and [its data
+sheet](https://arxiv.org/abs/2201.07311).
+- License: Apache 2.0
+- Contact: to ask questions about this model, join the [EleutherAI
+Discord](https://discord.gg/zBGx3azzUn), and post them in `#release-discussion`.
+Please read the existing GPT-NeoX-20B documentation before asking about the model
+on Discord. For general correspondence: [contact@eleuther.
+ai](mailto:[email protected]).
+<figure style="width:30em">
+| Hyperparameter         | Value       |
+| ---------------------- | ----------- |
+| n<sub>parameters</sub> | 20554567680 |
+| n<sub>layers</sub>     | 44          |
+| d<sub>model</sub>      | 6144        |
+| n<sub>heads</sub>      | 64          |
+| d<sub>head</sub>       | 96          |
+| n<sub>vocab</sub>      | 50257       |
+| Sequence Length        | 2048        |
+| Learning Rate          | 0.97 x 10<sup>-5</sup> |
+| Positional Encoding    | [Rotary Position Embedding (RoPE)](https://arxiv.org/abs/2104.09864) |
+</figure>
+### Uses and limitations
+#### Intended use
+GPT-NeoX-20B was developed primarily for research purposes. It learns an inner
+representation of the English language that can be used to extract features
+useful for downstream tasks.
+In addition to scientific uses, you may also further fine-tune and adapt
+GPT-NeoX-20B for deployment, as long as your use is in accordance with the
+Apache 2.0 license. This model works with the [Transformers
+Library](https://huggingface.co/docs/transformers/index). If you decide to use
+pre-trained GPT-NeoX-20B as a basis for your fine-tuned model, please note that
+you need to conduct your own risk and bias assessment.
+#### Out-of-scope use
+GPT-NeoX-20B is **not** intended for deployment as-is. It is not a product
+and cannot be used for human-facing interactions without supervision.
+GPT-NeoX-20B has not been fine-tuned for downstream tasks for which language
+models are commonly deployed, such as writing genre prose, or commercial
+chatbots. This means GPT-NeoX-20B will likely **not** respond to a given prompt
+the way products such as ChatGPT do. This is because, unlike GPT-NeoX-20B,
+ChatGPT was fine-tuned using methods such as Reinforcement Learning from Human
+Feedback (RLHF) to better “understand” human instructions and dialogue.
+This model is English-language only, and thus cannot be used for translation
+or generating text in other languages.
+#### Limitations and biases
+The core functionality of GPT-NeoX-20B is to take a string of text and predict
+the next token. Remember that the statistically most likely next token need
+not result in the most “accurate” text. Never rely on GPT-NeoX-20B to produce
+factually accurate output.
+This model was trained on [the Pile](https://pile.eleuther.ai/), a dataset
+known to contain profanity and texts that are lewd or otherwise offensive.
+See [Section 6 of the Pile paper](https://arxiv.org/abs/2101.00027) for a
+discussion of documented biases with regards to gender, religion, and race.
+GPT-NeoX-20B may produce socially unacceptable or undesirable text, *even if*
+ the prompt itself does not include anything explicitly offensive.
+We recommend curating the outputs of this model before presenting it to a human
+reader. Please inform your audience that you are using artificially generated
+text.
+#### How to use
+ If you simply want to try out some prompts, check out [this
+ playground](https://20b.eleuther.ai/).
+ GPT-NeoX-20B can be loaded using the `AutoModelForCausalLM` functionality:
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
+```
+### Training
+#### Training dataset
+The Pile is a 825GiB general-purpose dataset in English. It was created by
+EleutherAI specifically for training large language models. It contains texts
+from 22 diverse sources, roughly broken down into five categories: academic
+writing (e.g. arXiv), internet (e.g. CommonCrawl), prose (e.g. Project
+Gutenberg), dialogue (e.g. YouTube subtitles), and miscellaneous (e.g. GitHub,
+Enron Emails). See [the Pile paper](https://arxiv.org/abs/2101.00027) for
+a breakdown of all data sources, methodology, and a discussion of ethical
+implications. Consult [the datasheet](https://arxiv.org/abs/2201.07311) for
+more detailed documentation about the Pile and its component datasets. The
+Pile can be downloaded from the [official website](https://pile.eleuther.ai/),
+or from a [community mirror](https://the-eye.eu/public/AI/pile/).
+The Pile was **not** deduplicated before being used to train GPT-NeoX-20B.
+#### Training procedure
+GPT-NeoX-20B was trained with a batch size of approximately 3.15M tokens
+(1538 sequences of 2048 tokens each), for a total of 150,000 steps. Tensor
+parallelism and pipeline parallelism were used to distribute the model across
+GPUs. Additional details about the training procedure are in [Section 3 of
+the accompanying paper](https://arxiv.org/abs/2204.06745).
+### Evaluations
+<figure style="width:55em">
+| Model         | OpenAI’s LAMBADA | SciQ          | PIQA          | TriviaQA      | ARC (Challenge) |
+| ------------- | :--------------: | :-----------: | :-----------: | :-----------: | :-------------: |
+| GPT-J-6B      | 0.683 ± 0.006    | 0.910 ± 0.009 | 0.752 ± 0.010 | 0.170 ± 0.004 | 0.340 ± 0.014   |
+| FairSeq 6.7B  | 0.673 ± 0.007    | 0.895 ± 0.010 | 0.762 ± 0.010 | 0.221 ± 0.004 | 0.329 ± 0.014   |
+| GPT-3 Curie   | 0.693 ± 0.006    | 0.918 ± 0.009 | 0.767 ± 0.010 | 0.196 ± 0.004 | 0.334 ± 0.014   |
+| FairSeq 13B   | 0.709 ± 0.006    | 0.910 ± 0.009 | 0.769 ± 0.010 | 0.270 ± 0.004 | 0.345 ± 0.014   |
+| GPT-NeoX-20B  | 0.720 ± 0.006    | 0.928 ± 0.008 | 0.779 ± 0.010 | 0.259 ± 0.004 | 0.380 ± 0.014   |
+| GPT-3 DaVinci | 0.752 ± 0.006    | 0.949 ± 0.007 | 0.791 ± 0.009 | 0.409 ± 0.005 | 0.435 ± 0.014   |
+<figcaption>Zero-shot performance on selected natural language tasks.</figcaption>
+</figure>
+This is a heavily abridged version of the evaluation results. Appendix D of the
+ [GPT-NeoX-20B paper](https://arxiv.org/abs/2204.06745) compares more model
+sizes, and contains additional evaluations, including on: zero and five-shot
+natural language tasks, zero and five-shot Basic Arithmetic and MATH,
+and zero-shot Hendrycks tasks.
+### BibTeX
+To cite the GPT-NeoX-20B paper:
+```
+@misc{https://doi.org/10.48550/arxiv.2204.06745,
+  doi = {10.48550/ARXIV.2204.06745},
+  url = {https://arxiv.org/abs/2204.06745},
+  author = {Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel},
+  keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
+  title = {GPT-NeoX-20B: An Open-Source Autoregressive Language Model},
+  publisher = {arXiv},
+  year = {2022},
+  copyright = {Creative Commons Attribution 4.0 International}
+}
+```

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9017c96177e17d51d870f64af2fa29133fa3b367e398d85eeb6ca57c409046e2
-size 20567760329

 version https://git-lfs.github.com/spec/v1
+oid sha256:b622e32bb32c06036afbc59dc0231c03a599c55b5bfe6f9d6ff37e1167e216c6
+size 41108655105

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "unk_token": "<\|endoftext\|>"}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff