Spaces:
Running
Running
from src.dataset import formatting_prompts_func | |
def test_formatting_prompts_func(): | |
# Test case with basic input | |
examples = { | |
"instruction": ["Test instruction"], | |
"input": ["Test input"], | |
"output": ["Test output"], | |
} | |
template = "Instruction: {}\nInput: {}\nOutput: {}" | |
eos_token = "<EOS>" | |
result = formatting_prompts_func(examples, template, eos_token) | |
# Check if result contains the 'text' key | |
assert "text" in result | |
# Check if result contains exactly one formatted entry | |
assert len(result["text"]) == 1 | |
# Check if the formatted text is correct | |
expected = "Instruction: Test instruction\nInput: Test input\nOutput: Test output<EOS>" | |
assert result["text"][0] == expected | |
# Test with empty inputs (edge case) | |
examples_empty = { | |
"instruction": [""], | |
"input": [""], | |
"output": [""], | |
} | |
result_empty = formatting_prompts_func(examples_empty, template, eos_token) | |
assert result_empty["text"][0] == "Instruction: \nInput: \nOutput: <EOS>" | |
# Test with multiple examples | |
examples_multi = { | |
"instruction": ["Test instruction 1", "Test instruction 2"], | |
"input": ["Test input 1", "Test input 2"], | |
"output": ["Test output 1", "Test output 2"], | |
} | |
result_multi = formatting_prompts_func(examples_multi, template, eos_token) | |
assert len(result_multi["text"]) == 2 | |
assert result_multi["text"][0] == "Instruction: Test instruction 1\nInput: Test input 1\nOutput: Test output 1<EOS>" | |
assert result_multi["text"][1] == "Instruction: Test instruction 2\nInput: Test input 2\nOutput: Test output 2<EOS>" | |