davidberenstein1957 HF staff commited on
Commit
d129960
·
1 Parent(s): 9b94fdb

add qwen stop sequences

Browse files
src/synthetic_dataset_generator/pipelines/chat.py CHANGED
@@ -125,13 +125,20 @@ DEFAULT_DATASET_DESCRIPTIONS = [
125
  "rude customer assistant for a phone company",
126
  "assistant that solves math puzzles using python",
127
  ]
128
-
129
- _STOP_SEQUENCES = [
130
- "<|eot_id|>",
131
- "<|start_header_id|>",
132
- "assistant",
133
- " \n\n",
134
- ]
 
 
 
 
 
 
 
135
 
136
 
137
  def _get_output_mappings(num_turns):
 
125
  "rude customer assistant for a phone company",
126
  "assistant that solves math puzzles using python",
127
  ]
128
+ if MAGPIE_PRE_QUERY_TEMPLATE == "llama3":
129
+ _STOP_SEQUENCES = [
130
+ "<|eot_id|>",
131
+ "<|start_header_id|>",
132
+ "assistant",
133
+ " \n\n",
134
+ ]
135
+ elif MAGPIE_PRE_QUERY_TEMPLATE == "qwen2":
136
+ _STOP_SEQUENCES = [
137
+ "<|im_end|>",
138
+ "<|im_start|>",
139
+ "assistant",
140
+ " \n",
141
+ ]
142
 
143
 
144
  def _get_output_mappings(num_turns):