Commit
·
d129960
1
Parent(s):
9b94fdb
add qwen stop sequences
Browse files
src/synthetic_dataset_generator/pipelines/chat.py
CHANGED
@@ -125,13 +125,20 @@ DEFAULT_DATASET_DESCRIPTIONS = [
|
|
125 |
"rude customer assistant for a phone company",
|
126 |
"assistant that solves math puzzles using python",
|
127 |
]
|
128 |
-
|
129 |
-
_STOP_SEQUENCES = [
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
|
137 |
def _get_output_mappings(num_turns):
|
|
|
125 |
"rude customer assistant for a phone company",
|
126 |
"assistant that solves math puzzles using python",
|
127 |
]
|
128 |
+
if MAGPIE_PRE_QUERY_TEMPLATE == "llama3":
|
129 |
+
_STOP_SEQUENCES = [
|
130 |
+
"<|eot_id|>",
|
131 |
+
"<|start_header_id|>",
|
132 |
+
"assistant",
|
133 |
+
" \n\n",
|
134 |
+
]
|
135 |
+
elif MAGPIE_PRE_QUERY_TEMPLATE == "qwen2":
|
136 |
+
_STOP_SEQUENCES = [
|
137 |
+
"<|im_end|>",
|
138 |
+
"<|im_start|>",
|
139 |
+
"assistant",
|
140 |
+
" \n",
|
141 |
+
]
|
142 |
|
143 |
|
144 |
def _get_output_mappings(num_turns):
|