xzyao commited on
Commit
f997c76
·
verified ·
1 Parent(s): e5ef159

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - HuggingFaceH4/ultrachat_200k
4
+ base_model:
5
+ - meta-llama/Llama-2-7b-chat-hf
6
+ library_name: transformers
7
+ ---
8
+
9
+ ## meta-llama/Llama-2-7b-chat-hf - W4A16 Compression
10
+
11
+ This is a compressed model using [llmcompressor](https://github.com/vllm-project/llm-compressor).
12
+
13
+ ## Compression Configuration
14
+
15
+ - Base Model: meta-llama/Llama-2-7b-chat-hf
16
+ - Compression Scheme: W4A16
17
+ - Dataset: HuggingFaceH4/ultrachat_200k
18
+ - Dataset Split: train_sft
19
+ - Number of Samples: 512
20
+ - Preprocessor: chat
21
+ - Maximum Sequence Length: 4096
22
+
23
+ ## Sample Output
24
+
25
+ #### Prompt:
26
+
27
+ ```
28
+ <s>[INST] Who is Alan Turing? [/INST]
29
+ ```
30
+
31
+ #### Output:
32
+
33
+ ```
34
+ <s><s> [INST] Who is Alan Turing? [/INST] Alan Turing (1912-1954) was a British mathematician, computer scientist, logician, and cryptographer who made significant contributions to the fields of computer science, artificial intelligence, and cryptography.
35
+
36
+ Turing was born in London, England, and grew up in a family of intellectuals. He was educated at Cambridge University, where he studied mathematics and logic, and later worked at the University of Manchester, where he developed the concept of the universal Turing machine, a theoretical model for a computer.
37
+
38
+ During World War II, Turing worked at Blet
39
+ ```
40
+
41
+ ## Evaluation
42
+
43
+ <TODO>
44
+
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "max_position_embeddings": 4096,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 32,
19
+ "num_hidden_layers": 32,
20
+ "num_key_value_heads": 32,
21
+ "pretraining_tp": 1,
22
+ "quantization_config": {
23
+ "config_groups": {
24
+ "group_0": {
25
+ "input_activations": null,
26
+ "output_activations": null,
27
+ "targets": [
28
+ "Linear"
29
+ ],
30
+ "weights": {
31
+ "actorder": null,
32
+ "block_structure": null,
33
+ "dynamic": false,
34
+ "group_size": 128,
35
+ "num_bits": 4,
36
+ "observer": "minmax",
37
+ "observer_kwargs": {},
38
+ "strategy": "group",
39
+ "symmetric": true,
40
+ "type": "int"
41
+ }
42
+ }
43
+ },
44
+ "format": "pack-quantized",
45
+ "global_compression_ratio": 1.8915253764830147,
46
+ "ignore": [
47
+ "lm_head"
48
+ ],
49
+ "kv_cache_scheme": null,
50
+ "quant_method": "compressed-tensors",
51
+ "quantization_status": "compressed",
52
+ "sparsity_config": {
53
+ "format": "dense",
54
+ "global_sparsity": 0.15245697016458498,
55
+ "ignore": [],
56
+ "registry_requires_subclass": false,
57
+ "sparsity_structure": "unstructured",
58
+ "targets": []
59
+ }
60
+ },
61
+ "rms_norm_eps": 1e-05,
62
+ "rope_scaling": null,
63
+ "rope_theta": 10000.0,
64
+ "tie_word_embeddings": false,
65
+ "torch_dtype": "float16",
66
+ "transformers_version": "4.46.2",
67
+ "use_cache": true,
68
+ "vocab_size": 32000
69
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "do_sample": true,
4
+ "eos_token_id": 2,
5
+ "max_length": 4096,
6
+ "pad_token_id": 0,
7
+ "temperature": 0.6,
8
+ "top_p": 0.9,
9
+ "transformers_version": "4.46.2"
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76164ff2c0f2633a238a93929b637ae3489ad89bd125c85f8bc56b12251781de
3
+ size 3864098368
recipe.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ DEFAULT_stage:
2
+ DEFAULT_modifiers:
3
+ GPTQModifier:
4
+ targets: Linear
5
+ ignore: [lm_head]
6
+ scheme: W4A16
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": false,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": null,
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }