Jan Philipp Harries Jan Philipp Harries commited on
Commit
c93655c
·
unverified ·
1 Parent(s): fe28543

Added Orca Mini prompt strategy (#263)

Browse files

* added Orca Mini prompt strategy

* maybe this fixed precommit errors?

* pre-commits passing

---------

Co-authored-by: Jan Philipp Harries <[email protected]>

src/axolotl/prompt_strategies/orcamini.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prompt Strategy for finetuning Orca Mini (v2) models
3
+ see also https://huggingface.co/psmathur/orca_mini_v2_7b for more information
4
+
5
+ Use dataset type: orcamini in conig.yml to use this prompt style.
6
+
7
+ Compared to the alpaca_w_system.open_orca dataset type,
8
+ this one specifies the system prompt with "### System:".
9
+
10
+ Not suited/tested for multiple-turn conversations without further adjustments.
11
+ """
12
+ from typing import Generator, Union
13
+
14
+ from axolotl.prompt_strategies.alpaca_w_system import OpenOrcaPromptTokenizingStrategy
15
+ from axolotl.prompters import AlpacaPrompter
16
+
17
+
18
+ class OrcaMiniPrompter(AlpacaPrompter):
19
+ """Adjusted Prompter for Orca Mini (v2) datasets"""
20
+
21
+ def match_prompt_style(self):
22
+ self.turn_no_input_format = (
23
+ "### System:\n{system}\n\n### User:\n{instruction}\n\n### Response:\n"
24
+ )
25
+
26
+ def build_prompt_w_system(
27
+ self,
28
+ system: str,
29
+ instruction: str,
30
+ output: Union[None, str] = None,
31
+ ) -> Generator[str, None, None]:
32
+ # returns the full prompt from instruction and optional input
33
+ # if a label (=response, =output) is provided, it's also appended.
34
+ res = self.turn_no_input_format.format(system=system, instruction=instruction)
35
+ if output:
36
+ res = f"{res}{output}"
37
+ yield res
38
+
39
+
40
+ def load(tokenizer, cfg):
41
+ return OpenOrcaPromptTokenizingStrategy(
42
+ OrcaMiniPrompter(),
43
+ tokenizer,
44
+ cfg.train_on_inputs,
45
+ cfg.sequence_len,
46
+ )