kanhatakeyama
/

TestMoE

Text Generation

Model card Files Files and versions Community

TestMoE / MoEModel.py

kanhatakeyama's picture

Upload model

69bf2b0 verified 11 months ago

history blame contribute delete

2.42 kB

	from transformers import PreTrainedModel
	from .MoEConfig import MoEConfig
	from transformers import AutoModelForCausalLM
	import torch
	import numpy as np


	class MoeModel(PreTrainedModel):
	config_class = MoEConfig
	verbose = True
	fix_mode = False

	def __init__(self, config):
	super().__init__(config)
	self.model_list = []
	for model_name in self.config_class.model_list:
	self.append_model(model_name)

	self.set_model_id(0)

	"""
	def set_model(self, model_name):
	self.model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	torch_dtype=torch.float16
	)
	"""

	def append_model(self, model_name):
	print("loading ", model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	torch_dtype=torch.float16
	)
	self.model_list.append(model)

	# def set_tokenizer(self, tokenizer):
	# self.tokenizer = tokenizer

	def set_model_id(self, model_id):
	self.model = self.model_list[model_id]

	def calc_perplexity(self, tokenized_input):
	ppl_list = []
	for model in self.model_list:
	ppl_list.append(perplexity(model, tokenized_input))
	return np.array(ppl_list)

	def fix_model(self, model_id):
	self.set_model_id(model_id)
	self.fix_mode = True

	def set_flexible_mode(self):
	self.fix_mode = False

	def generate(self, input_ids, attention_mask,
	**generate_kwargs):

	if not self.fix_mode:
	ppl_array = self.calc_perplexity(input_ids)
	best_model_id = np.where(ppl_array == min(ppl_array))[0][0]
	self.set_model_id(best_model_id)

	if self.verbose:
	print(f"model {best_model_id} will be used")
	print("ppl array: ", ppl_array)

	ret = self.model.generate(input_ids=input_ids,
	attention_mask=attention_mask,
	**generate_kwargs)
	return ret


	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


	def perplexity(model, tokenized_input) -> torch.Tensor:
	with torch.inference_mode():
	output = model(tokenized_input.to(device), labels=tokenized_input)
	ppl = torch.exp(output.loss)
	return ppl.item()