Spaces:

helamouri
/

medichat_assignment

Running

medichat_assignment / tests /test_dataset.py

update model

eca6215 about 1 month ago

1.72 kB

	from src.dataset import formatting_prompts_func

	def test_formatting_prompts_func():
	# Test case with basic input
	examples = {
	"instruction": ["Test instruction"],
	"input": ["Test input"],
	"output": ["Test output"],
	}
	template = "Instruction: {}\nInput: {}\nOutput: {}"
	eos_token = "<EOS>"

	result = formatting_prompts_func(examples, template, eos_token)

	# Check if result contains the 'text' key
	assert "text" in result

	# Check if result contains exactly one formatted entry
	assert len(result["text"]) == 1

	# Check if the formatted text is correct
	expected = "Instruction: Test instruction\nInput: Test input\nOutput: Test output<EOS>"
	assert result["text"][0] == expected

	# Test with empty inputs (edge case)
	examples_empty = {
	"instruction": [""],
	"input": [""],
	"output": [""],
	}
	result_empty = formatting_prompts_func(examples_empty, template, eos_token)
	assert result_empty["text"][0] == "Instruction: \nInput: \nOutput: <EOS>"

	# Test with multiple examples
	examples_multi = {
	"instruction": ["Test instruction 1", "Test instruction 2"],
	"input": ["Test input 1", "Test input 2"],
	"output": ["Test output 1", "Test output 2"],
	}
	result_multi = formatting_prompts_func(examples_multi, template, eos_token)
	assert len(result_multi["text"]) == 2
	assert result_multi["text"][0] == "Instruction: Test instruction 1\nInput: Test input 1\nOutput: Test output 1<EOS>"
	assert result_multi["text"][1] == "Instruction: Test instruction 2\nInput: Test input 2\nOutput: Test output 2<EOS>"