medichat_assignment / tests /test_dataset.py
helamouri's picture
update model
eca6215
from src.dataset import formatting_prompts_func
def test_formatting_prompts_func():
# Test case with basic input
examples = {
"instruction": ["Test instruction"],
"input": ["Test input"],
"output": ["Test output"],
}
template = "Instruction: {}\nInput: {}\nOutput: {}"
eos_token = "<EOS>"
result = formatting_prompts_func(examples, template, eos_token)
# Check if result contains the 'text' key
assert "text" in result
# Check if result contains exactly one formatted entry
assert len(result["text"]) == 1
# Check if the formatted text is correct
expected = "Instruction: Test instruction\nInput: Test input\nOutput: Test output<EOS>"
assert result["text"][0] == expected
# Test with empty inputs (edge case)
examples_empty = {
"instruction": [""],
"input": [""],
"output": [""],
}
result_empty = formatting_prompts_func(examples_empty, template, eos_token)
assert result_empty["text"][0] == "Instruction: \nInput: \nOutput: <EOS>"
# Test with multiple examples
examples_multi = {
"instruction": ["Test instruction 1", "Test instruction 2"],
"input": ["Test input 1", "Test input 2"],
"output": ["Test output 1", "Test output 2"],
}
result_multi = formatting_prompts_func(examples_multi, template, eos_token)
assert len(result_multi["text"]) == 2
assert result_multi["text"][0] == "Instruction: Test instruction 1\nInput: Test input 1\nOutput: Test output 1<EOS>"
assert result_multi["text"][1] == "Instruction: Test instruction 2\nInput: Test input 2\nOutput: Test output 2<EOS>"