michaelfeil commited on
Commit
483e626
·
1 Parent(s): 08cdb4c

Upload bigcode/starcoder ctranslate fp16 weights

Browse files
Files changed (4) hide show
  1. README.md +16 -15
  2. config.json +41 -4
  3. model.bin +2 -2
  4. vocabulary.json +0 -0
README.md CHANGED
@@ -264,30 +264,21 @@ Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on
264
 
265
  quantized version of [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)
266
  ```bash
267
- pip install hf-hub-ctranslate2>=2.0.8 ctranslate2>=3.14.0
268
- ```
269
- Converted on 2023-06-01 using
270
- ```
271
- ct2-transformers-converter --model bigcode/starcoder --output_dir /home/michael/tmp-ct2fast-starcoder --force --copy_files merges.txt tokenizer.json README.md tokenizer_config.json vocab.json generation_config.json special_tokens_map.json .gitattributes --quantization int8_float16 --trust_remote_code
272
  ```
273
 
274
- Checkpoint compatible to [ctranslate2>=3.14.0](https://github.com/OpenNMT/CTranslate2)
275
- and [hf-hub-ctranslate2>=2.0.8](https://github.com/michaelfeil/hf-hub-ctranslate2)
276
- - `compute_type=int8_float16` for `device="cuda"`
277
- - `compute_type=int8` for `device="cpu"`
278
-
279
  ```python
280
- from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
281
- from transformers import AutoTokenizer
282
-
283
  model_name = "michaelfeil/ct2fast-starcoder"
284
- # use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
 
 
285
  model = GeneratorCT2fromHfHub(
286
  # load in int8 on CUDA
287
  model_name_or_path=model_name,
288
  device="cuda",
289
  compute_type="int8_float16",
290
- # tokenizer=AutoTokenizer.from_pretrained("bigcode/starcoder")
291
  )
292
  outputs = model.generate(
293
  text=["def fibonnaci(", "User: How are you doing? Bot:"],
@@ -297,6 +288,16 @@ outputs = model.generate(
297
  print(outputs)
298
  ```
299
 
 
 
 
 
 
 
 
 
 
 
300
  # Licence and other remarks:
301
  This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.
302
 
 
264
 
265
  quantized version of [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)
266
  ```bash
267
+ pip install hf-hub-ctranslate2>=2.12.0 ctranslate2>=3.16.0
 
 
 
 
268
  ```
269
 
 
 
 
 
 
270
  ```python
271
+ # from transformers import AutoTokenizer
 
 
272
  model_name = "michaelfeil/ct2fast-starcoder"
273
+
274
+
275
+ from hf_hub_ctranslate2 import GeneratorCT2fromHfHub
276
  model = GeneratorCT2fromHfHub(
277
  # load in int8 on CUDA
278
  model_name_or_path=model_name,
279
  device="cuda",
280
  compute_type="int8_float16",
281
+ # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
282
  )
283
  outputs = model.generate(
284
  text=["def fibonnaci(", "User: How are you doing? Bot:"],
 
288
  print(outputs)
289
  ```
290
 
291
+ Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
292
+ and [hf-hub-ctranslate2>=2.12.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
293
+ - `compute_type=int8_float16` for `device="cuda"`
294
+ - `compute_type=int8` for `device="cpu"`
295
+
296
+ Converted on 2023-06-27 using
297
+ ```
298
+ ct2-transformers-converter --model bigcode/starcoder --output_dir ~/tmp-ct2fast-starcoder --force --copy_files merges.txt tokenizer.json README.md tokenizer_config.json vocab.json generation_config.json special_tokens_map.json .gitattributes --quantization int8_float16 --trust_remote_code
299
+ ```
300
+
301
  # Licence and other remarks:
302
  This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.
303
 
config.json CHANGED
@@ -1,5 +1,42 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "unk_token": "<|endoftext|>"
5
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
+ "_name_or_path": "/fsx/bigcode/experiments/pretraining/conversions/starcoderpy/large-model",
3
+ "activation_function": "gelu",
4
+ "architectures": [
5
+ "GPTBigCodeForCausalLM"
6
+ ],
7
+ "attention_softmax_in_fp32": true,
8
+ "multi_query": true,
9
+ "attn_pdrop": 0.1,
10
+ "bos_token_id": 0,
11
+ "embd_pdrop": 0.1,
12
+ "eos_token_id": 0,
13
+ "inference_runner": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": null,
16
+ "max_batch_size": null,
17
+ "max_sequence_length": null,
18
+ "model_type": "gpt_bigcode",
19
+ "n_embd": 6144,
20
+ "n_head": 48,
21
+ "n_inner": 24576,
22
+ "n_layer": 40,
23
+ "n_positions": 8192,
24
+ "pad_key_length": true,
25
+ "pre_allocate_kv_cache": false,
26
+ "resid_pdrop": 0.1,
27
+ "scale_attention_softmax_in_fp32": true,
28
+ "scale_attn_weights": true,
29
+ "summary_activation": null,
30
+ "summary_first_dropout": 0.1,
31
+ "summary_proj_to_labels": true,
32
+ "summary_type": "cls_index",
33
+ "summary_use_proj": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.28.1",
36
+ "use_cache": true,
37
+ "validate_runner_input": true,
38
+ "vocab_size": 49152,
39
+ "bos_token": "<|endoftext|>",
40
+ "eos_token": "<|endoftext|>",
41
+ "unk_token": "<|endoftext|>"
42
+ }
model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fca03f2f72aba1df8d42f08ea8c40ed5329bd05b50801dd2768cd80f6ebb1136
3
- size 15577671563
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ab1ecb43fb0d2e5aafb356836f92bd816e90e4d46297955c12b9a3f8c1c35d
3
+ size 15577671723
vocabulary.json ADDED
The diff for this file is too large to render. See raw diff