Upload bigcode/starcoder ctranslate fp16 weights

Browse files

Files changed (4) hide show

README.md +16 -15
config.json +41 -4
model.bin +2 -2
vocabulary.json +0 -0

README.md CHANGED Viewed

@@ -264,30 +264,21 @@ Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on
 quantized version of [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)
 ```bash
-pip install hf-hub-ctranslate2>=2.0.8 ctranslate2>=3.14.0
-```
-Converted on 2023-06-01 using
-```
-ct2-transformers-converter --model bigcode/starcoder --output_dir /home/michael/tmp-ct2fast-starcoder --force --copy_files merges.txt tokenizer.json README.md tokenizer_config.json vocab.json generation_config.json special_tokens_map.json .gitattributes --quantization int8_float16 --trust_remote_code
 ```
-Checkpoint compatible to [ctranslate2>=3.14.0](https://github.com/OpenNMT/CTranslate2)
-and [hf-hub-ctranslate2>=2.0.8](https://github.com/michaelfeil/hf-hub-ctranslate2)
-- `compute_type=int8_float16` for `device="cuda"`
-- `compute_type=int8`  for `device="cpu"`
 ```python
-from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
-from transformers import AutoTokenizer
 model_name = "michaelfeil/ct2fast-starcoder"
-# use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
 model = GeneratorCT2fromHfHub(
         # load in int8 on CUDA
         model_name_or_path=model_name,
         device="cuda",
         compute_type="int8_float16",
-        # tokenizer=AutoTokenizer.from_pretrained("bigcode/starcoder")
 )
 outputs = model.generate(
     text=["def fibonnaci(", "User: How are you doing? Bot:"],
@@ -297,6 +288,16 @@ outputs = model.generate(
 print(outputs)
 ```
 # Licence and other remarks:
 This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.

 quantized version of [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)
 ```bash
+pip install hf-hub-ctranslate2>=2.12.0 ctranslate2>=3.16.0
 ```
 ```python
+# from transformers import AutoTokenizer
 model_name = "michaelfeil/ct2fast-starcoder"
+from hf_hub_ctranslate2 import GeneratorCT2fromHfHub
 model = GeneratorCT2fromHfHub(
         # load in int8 on CUDA
         model_name_or_path=model_name,
         device="cuda",
         compute_type="int8_float16",
+        # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
 )
 outputs = model.generate(
     text=["def fibonnaci(", "User: How are you doing? Bot:"],
 print(outputs)
 ```
+Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
+and [hf-hub-ctranslate2>=2.12.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
+- `compute_type=int8_float16` for `device="cuda"`
+- `compute_type=int8`  for `device="cpu"`
+Converted on 2023-06-27 using
+```
+ct2-transformers-converter --model bigcode/starcoder --output_dir ~/tmp-ct2fast-starcoder --force --copy_files merges.txt tokenizer.json README.md tokenizer_config.json vocab.json generation_config.json special_tokens_map.json .gitattributes --quantization int8_float16 --trust_remote_code
+```
 # Licence and other remarks:
 This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.

config.json CHANGED Viewed

@@ -1,5 +1,42 @@
 {
-  "bos_token": "<|endoftext|>",
-  "eos_token": "<|endoftext|>",
-  "unk_token": "<|endoftext|>"
-}

 {
+    "_name_or_path": "/fsx/bigcode/experiments/pretraining/conversions/starcoderpy/large-model",
+    "activation_function": "gelu",
+    "architectures": [
+        "GPTBigCodeForCausalLM"
+    ],
+    "attention_softmax_in_fp32": true,
+    "multi_query": true,
+    "attn_pdrop": 0.1,
+    "bos_token_id": 0,
+    "embd_pdrop": 0.1,
+    "eos_token_id": 0,
+    "inference_runner": 0,
+    "initializer_range": 0.02,
+    "layer_norm_epsilon": null,
+    "max_batch_size": null,
+    "max_sequence_length": null,
+    "model_type": "gpt_bigcode",
+    "n_embd": 6144,
+    "n_head": 48,
+    "n_inner": 24576,
+    "n_layer": 40,
+    "n_positions": 8192,
+    "pad_key_length": true,
+    "pre_allocate_kv_cache": false,
+    "resid_pdrop": 0.1,
+    "scale_attention_softmax_in_fp32": true,
+    "scale_attn_weights": true,
+    "summary_activation": null,
+    "summary_first_dropout": 0.1,
+    "summary_proj_to_labels": true,
+    "summary_type": "cls_index",
+    "summary_use_proj": true,
+    "torch_dtype": "float32",
+    "transformers_version": "4.28.1",
+    "use_cache": true,
+    "validate_runner_input": true,
+    "vocab_size": 49152,
+    "bos_token": "<|endoftext|>",
+    "eos_token": "<|endoftext|>",
+    "unk_token": "<|endoftext|>"
+}

model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fca03f2f72aba1df8d42f08ea8c40ed5329bd05b50801dd2768cd80f6ebb1136
-size 15577671563

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6ab1ecb43fb0d2e5aafb356836f92bd816e90e4d46297955c12b9a3f8c1c35d
+size 15577671723

vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff