zhiyucheng commited on
Commit
de6d8e4
·
1 Parent(s): 46eaa7f

update readme and add artifacts

Browse files
README.md CHANGED
@@ -65,24 +65,28 @@ This model was obtained by quantizing the weights and activations of DeepSeek R1
65
 
66
  ### Deploy with TensorRT-LLM
67
 
68
- To deploy the quantized checkpoint with [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) LLM API, follow the sample codes below:
69
 
70
  * LLM API sample usage:
71
  ```
72
- from tensorrt_llm import LLM, SamplingParams
 
 
73
 
74
 
75
  def main():
76
 
 
 
77
  prompts = [
78
  "Hello, my name is",
79
  "The president of the United States is",
80
  "The capital of France is",
81
  "The future of AI is",
82
  ]
83
- sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
84
 
85
- llm = LLM(model="nvidia/DeepSeek-R1-FP4")
86
 
87
  outputs = llm.generate(prompts, sampling_params)
88
 
@@ -111,7 +115,7 @@ tar -xf data/mmlu.tar -C data && mv data/data data/mmlu
111
  2) Measure MMLU:
112
 
113
  ```sh
114
- python examples/mmlu_llmapi.py --data_dir data/mmlu --hf_model_dir nvidia/DeepSeek-R1-FP4 --backend=pytorch
115
  ```
116
 
117
  * Throughputs evaluation:
 
65
 
66
  ### Deploy with TensorRT-LLM
67
 
68
+ To deploy the quantized FP4 checkpoint with [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) LLM API, follow the sample codes below (you need 8xB200 GPU and TensorRT-LLM 0.18 or install by building from source with the latest main branch):
69
 
70
  * LLM API sample usage:
71
  ```
72
+ from tensorrt_llm import SamplingParams
73
+ from tensorrt_llm._torch import LLM
74
+ from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
75
 
76
 
77
  def main():
78
 
79
+ pytorch_config = PyTorchConfig()
80
+
81
  prompts = [
82
  "Hello, my name is",
83
  "The president of the United States is",
84
  "The capital of France is",
85
  "The future of AI is",
86
  ]
87
+ sampling_params = SamplingParams(max_tokens=32)
88
 
89
+ llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8, pytorch_backend_config=pytorch_config, enable_attention_dp=True)
90
 
91
  outputs = llm.generate(prompts, sampling_params)
92
 
 
115
  2) Measure MMLU:
116
 
117
  ```sh
118
+ python examples/mmlu_llmapi.py --data_dir data/mmlu --hf_model_dir nvidia/DeepSeek-R1-FP4 --tp_size 8 --backend=pytorch
119
  ```
120
 
121
  * Throughputs evaluation:
config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "aux_loss_alpha": 0.001,
13
+ "bos_token_id": 0,
14
+ "eos_token_id": 1,
15
+ "ep_size": 1,
16
+ "first_k_dense_replace": 3,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 7168,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 18432,
21
+ "kv_lora_rank": 512,
22
+ "max_position_embeddings": 163840,
23
+ "model_type": "deepseek_v3",
24
+ "moe_intermediate_size": 2048,
25
+ "moe_layer_freq": 1,
26
+ "n_group": 8,
27
+ "n_routed_experts": 256,
28
+ "n_shared_experts": 1,
29
+ "norm_topk_prob": true,
30
+ "num_attention_heads": 128,
31
+ "num_experts_per_tok": 8,
32
+ "num_hidden_layers": 61,
33
+ "num_key_value_heads": 128,
34
+ "num_nextn_predict_layers": 1,
35
+ "pretraining_tp": 1,
36
+ "q_lora_rank": 1536,
37
+ "qk_nope_head_dim": 128,
38
+ "qk_rope_head_dim": 64,
39
+ "quantization_config": {
40
+ "activation_scheme": "dynamic",
41
+ "fmt": "e4m3",
42
+ "quant_method": "fp8",
43
+ "weight_block_size": [
44
+ 128,
45
+ 128
46
+ ]
47
+ },
48
+ "rms_norm_eps": 1e-06,
49
+ "rope_scaling": {
50
+ "beta_fast": 32,
51
+ "beta_slow": 1,
52
+ "factor": 40,
53
+ "mscale": 1.0,
54
+ "mscale_all_dim": 1.0,
55
+ "original_max_position_embeddings": 4096,
56
+ "type": "yarn"
57
+ },
58
+ "rope_theta": 10000,
59
+ "routed_scaling_factor": 2.5,
60
+ "scoring_func": "sigmoid",
61
+ "seq_aux": true,
62
+ "tie_word_embeddings": false,
63
+ "topk_group": 4,
64
+ "topk_method": "noaux_tc",
65
+ "torch_dtype": "bfloat16",
66
+ "transformers_version": "4.46.3",
67
+ "use_cache": true,
68
+ "v_head_dim": 128,
69
+ "vocab_size": 129280
70
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "do_sample": true,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.39.3"
9
+ }
hf_quant_config.json ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "modelopt",
4
+ "version": "0.23.0"
5
+ },
6
+ "quantization": {
7
+ "quant_algo": "NVFP4",
8
+ "kv_cache_quant_algo": null,
9
+ "group_size": 16,
10
+ "exclude_modules": [
11
+ "model.layers.30.self_attn*",
12
+ "model.layers.33.input_layernorm",
13
+ "model.layers.39.input_layernorm",
14
+ "model.layers.26.mlp.gate",
15
+ "model.layers.36.mlp.gate",
16
+ "model.layers.16.input_layernorm",
17
+ "model.layers.45.post_attention_layernorm",
18
+ "model.layers.52.input_layernorm",
19
+ "model.layers.38.input_layernorm",
20
+ "model.layers.28.post_attention_layernorm",
21
+ "model.layers.59.self_attn*",
22
+ "model.layers.25.self_attn*",
23
+ "model.layers.34.input_layernorm",
24
+ "model.layers.44.mlp.gate",
25
+ "model.layers.8.mlp.gate",
26
+ "model.layers.59.input_layernorm",
27
+ "model.layers.2.self_attn*",
28
+ "model.layers.40.input_layernorm",
29
+ "model.layers.11.post_attention_layernorm",
30
+ "model.layers.40.mlp.gate",
31
+ "model.layers.5.self_attn*",
32
+ "model.layers.29.post_attention_layernorm",
33
+ "model.layers.54.self_attn*",
34
+ "model.layers.3.input_layernorm",
35
+ "model.layers.43.self_attn*",
36
+ "model.layers.51.post_attention_layernorm",
37
+ "model.layers.50.post_attention_layernorm",
38
+ "model.norm",
39
+ "model.layers.57.input_layernorm",
40
+ "model.layers.22.self_attn*",
41
+ "model.layers.27.self_attn*",
42
+ "model.layers.21.self_attn*",
43
+ "model.layers.9.mlp.gate",
44
+ "model.layers.60.self_attn*",
45
+ "model.layers.24.post_attention_layernorm",
46
+ "model.layers.27.input_layernorm",
47
+ "model.layers.1.input_layernorm",
48
+ "model.layers.3.post_attention_layernorm",
49
+ "model.layers.57.mlp.gate",
50
+ "model.layers.18.input_layernorm",
51
+ "model.layers.58.mlp.gate",
52
+ "model.layers.33.mlp.gate",
53
+ "model.layers.52.post_attention_layernorm",
54
+ "model.layers.10.mlp.gate",
55
+ "model.layers.44.self_attn*",
56
+ "model.layers.29.input_layernorm",
57
+ "model.layers.15.self_attn*",
58
+ "model.layers.21.mlp.gate",
59
+ "model.layers.48.post_attention_layernorm",
60
+ "model.layers.12.mlp.gate",
61
+ "model.layers.30.input_layernorm",
62
+ "model.layers.34.post_attention_layernorm",
63
+ "model.layers.41.self_attn*",
64
+ "model.layers.18.mlp.gate",
65
+ "model.layers.24.mlp.gate",
66
+ "model.layers.42.mlp.gate",
67
+ "model.layers.0.input_layernorm",
68
+ "model.layers.23.self_attn*",
69
+ "model.layers.20.mlp.gate",
70
+ "model.layers.6.mlp.gate",
71
+ "model.layers.52.self_attn*",
72
+ "model.layers.30.post_attention_layernorm",
73
+ "model.layers.35.post_attention_layernorm",
74
+ "model.layers.11.self_attn*",
75
+ "model.layers.23.post_attention_layernorm",
76
+ "model.layers.51.mlp.gate",
77
+ "model.layers.4.mlp.gate",
78
+ "model.layers.22.mlp.gate",
79
+ "model.layers.41.post_attention_layernorm",
80
+ "model.layers.6.input_layernorm",
81
+ "model.layers.53.mlp.gate",
82
+ "model.layers.46.post_attention_layernorm",
83
+ "model.layers.12.post_attention_layernorm",
84
+ "model.layers.13.input_layernorm",
85
+ "model.layers.4.self_attn*",
86
+ "model.layers.29.self_attn*",
87
+ "model.layers.45.self_attn*",
88
+ "model.layers.9.self_attn*",
89
+ "model.layers.56.mlp.gate",
90
+ "model.layers.31.post_attention_layernorm",
91
+ "model.layers.47.mlp.gate",
92
+ "model.layers.49.post_attention_layernorm",
93
+ "model.layers.7.input_layernorm",
94
+ "model.layers.20.post_attention_layernorm",
95
+ "model.layers.14.post_attention_layernorm",
96
+ "model.layers.11.input_layernorm",
97
+ "model.layers.12.self_attn*",
98
+ "model.layers.42.input_layernorm",
99
+ "model.layers.26.self_attn*",
100
+ "model.layers.43.post_attention_layernorm",
101
+ "model.layers.23.input_layernorm",
102
+ "model.layers.16.mlp.gate",
103
+ "model.layers.31.mlp.gate",
104
+ "model.layers.50.mlp.gate",
105
+ "model.layers.46.input_layernorm",
106
+ "model.layers.40.post_attention_layernorm",
107
+ "model.layers.1.post_attention_layernorm",
108
+ "model.layers.53.input_layernorm",
109
+ "model.layers.39.self_attn*",
110
+ "model.layers.27.post_attention_layernorm",
111
+ "model.layers.16.self_attn*",
112
+ "model.layers.33.self_attn*",
113
+ "model.layers.8.input_layernorm",
114
+ "model.layers.59.post_attention_layernorm",
115
+ "model.layers.37.input_layernorm",
116
+ "model.layers.22.post_attention_layernorm",
117
+ "model.layers.7.mlp.gate",
118
+ "model.layers.0.self_attn*",
119
+ "model.layers.37.self_attn*",
120
+ "model.layers.3.mlp.gate",
121
+ "model.layers.55.self_attn*",
122
+ "model.layers.2.post_attention_layernorm",
123
+ "model.layers.19.mlp.gate",
124
+ "model.layers.13.mlp.gate",
125
+ "model.layers.7.self_attn*",
126
+ "model.layers.47.input_layernorm",
127
+ "model.layers.32.mlp.gate",
128
+ "model.layers.10.input_layernorm",
129
+ "model.layers.50.input_layernorm",
130
+ "model.layers.51.input_layernorm",
131
+ "model.layers.55.post_attention_layernorm",
132
+ "model.layers.4.post_attention_layernorm",
133
+ "model.layers.20.input_layernorm",
134
+ "model.layers.45.input_layernorm",
135
+ "model.layers.49.self_attn*",
136
+ "model.layers.22.input_layernorm",
137
+ "model.layers.60.input_layernorm",
138
+ "model.layers.28.mlp.gate",
139
+ "model.layers.57.post_attention_layernorm",
140
+ "model.layers.51.self_attn*",
141
+ "model.layers.56.input_layernorm",
142
+ "model.layers.18.self_attn*",
143
+ "model.layers.11.mlp.gate",
144
+ "model.layers.17.input_layernorm",
145
+ "model.layers.14.self_attn*",
146
+ "model.layers.56.self_attn*",
147
+ "model.layers.15.post_attention_layernorm",
148
+ "model.layers.19.self_attn*",
149
+ "lm_head",
150
+ "model.layers.40.self_attn*",
151
+ "model.layers.41.input_layernorm",
152
+ "model.layers.44.input_layernorm",
153
+ "model.layers.25.mlp.gate",
154
+ "model.layers.12.input_layernorm",
155
+ "model.layers.53.post_attention_layernorm",
156
+ "model.layers.2.input_layernorm",
157
+ "model.layers.19.post_attention_layernorm",
158
+ "model.layers.48.input_layernorm",
159
+ "model.layers.31.self_attn*",
160
+ "model.layers.14.mlp.gate",
161
+ "model.layers.30.mlp.gate",
162
+ "model.layers.60.post_attention_layernorm",
163
+ "model.layers.41.mlp.gate",
164
+ "model.layers.1.self_attn*",
165
+ "model.layers.52.mlp.gate",
166
+ "model.layers.29.mlp.gate",
167
+ "model.layers.14.input_layernorm",
168
+ "model.layers.5.post_attention_layernorm",
169
+ "model.layers.23.mlp.gate",
170
+ "model.layers.42.post_attention_layernorm",
171
+ "model.layers.35.input_layernorm",
172
+ "model.layers.17.self_attn*",
173
+ "model.layers.28.self_attn*",
174
+ "model.layers.58.self_attn*",
175
+ "model.layers.13.post_attention_layernorm",
176
+ "model.layers.32.post_attention_layernorm",
177
+ "model.layers.10.self_attn*",
178
+ "model.layers.33.post_attention_layernorm",
179
+ "model.layers.38.mlp.gate",
180
+ "model.layers.5.input_layernorm",
181
+ "model.layers.26.post_attention_layernorm",
182
+ "model.layers.15.mlp.gate",
183
+ "model.layers.25.input_layernorm",
184
+ "model.layers.9.post_attention_layernorm",
185
+ "model.layers.43.input_layernorm",
186
+ "model.layers.47.self_attn*",
187
+ "model.layers.32.self_attn*",
188
+ "model.layers.61*",
189
+ "model.layers.35.self_attn*",
190
+ "model.layers.24.self_attn*",
191
+ "model.layers.46.self_attn*",
192
+ "model.layers.13.self_attn*",
193
+ "model.layers.53.self_attn*",
194
+ "model.layers.43.mlp.gate",
195
+ "model.layers.55.mlp.gate",
196
+ "model.layers.54.post_attention_layernorm",
197
+ "model.layers.18.post_attention_layernorm",
198
+ "model.layers.31.input_layernorm",
199
+ "model.layers.6.self_attn*",
200
+ "model.layers.17.post_attention_layernorm",
201
+ "model.layers.24.input_layernorm",
202
+ "model.layers.20.self_attn*",
203
+ "model.layers.36.post_attention_layernorm",
204
+ "model.layers.32.input_layernorm",
205
+ "model.layers.28.input_layernorm",
206
+ "model.layers.26.input_layernorm",
207
+ "model.layers.36.self_attn*",
208
+ "model.layers.0.post_attention_layernorm",
209
+ "model.layers.39.post_attention_layernorm",
210
+ "model.layers.56.post_attention_layernorm",
211
+ "model.layers.39.mlp.gate",
212
+ "model.layers.9.input_layernorm",
213
+ "model.layers.54.mlp.gate",
214
+ "model.layers.5.mlp.gate",
215
+ "model.layers.16.post_attention_layernorm",
216
+ "model.layers.55.input_layernorm",
217
+ "model.layers.46.mlp.gate",
218
+ "model.layers.57.self_attn*",
219
+ "model.layers.10.post_attention_layernorm",
220
+ "model.layers.48.self_attn*",
221
+ "model.layers.21.input_layernorm",
222
+ "model.layers.44.post_attention_layernorm",
223
+ "model.layers.17.mlp.gate",
224
+ "model.layers.37.post_attention_layernorm",
225
+ "model.layers.49.input_layernorm",
226
+ "model.layers.49.mlp.gate",
227
+ "model.layers.15.input_layernorm",
228
+ "model.layers.45.mlp.gate",
229
+ "model.layers.38.self_attn*",
230
+ "model.layers.47.post_attention_layernorm",
231
+ "model.layers.37.mlp.gate",
232
+ "model.layers.25.post_attention_layernorm",
233
+ "model.embed_tokens",
234
+ "model.layers.36.input_layernorm",
235
+ "model.layers.38.post_attention_layernorm",
236
+ "model.layers.35.mlp.gate",
237
+ "model.layers.59.mlp.gate",
238
+ "model.layers.50.self_attn*",
239
+ "model.layers.54.input_layernorm",
240
+ "model.layers.58.input_layernorm",
241
+ "model.layers.21.post_attention_layernorm",
242
+ "model.layers.3.self_attn*",
243
+ "model.layers.58.post_attention_layernorm",
244
+ "model.layers.34.mlp.gate",
245
+ "model.layers.6.post_attention_layernorm",
246
+ "model.layers.34.self_attn*",
247
+ "model.layers.7.post_attention_layernorm",
248
+ "model.layers.42.self_attn*",
249
+ "model.layers.19.input_layernorm",
250
+ "model.layers.48.mlp.gate",
251
+ "model.layers.4.input_layernorm",
252
+ "model.layers.27.mlp.gate",
253
+ "model.layers.8.self_attn*",
254
+ "model.layers.8.post_attention_layernorm",
255
+ "model.layers.60.mlp.gate"
256
+ ]
257
+ }
258
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|begin▁of▁sentence|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|end▁of▁sentence|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 16384,
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<|end▁of▁sentence|>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "sp_model_kwargs": {},
32
+ "unk_token": null,
33
+ "tokenizer_class": "LlamaTokenizerFast",
34
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}"
35
+ }