james92
/

lora

Generated from Trainer

Model card Files Files and versions Community

lora / lora.py

james92's picture

james92/llama7b__finetune_sample

a9b7423 11 months ago

3.39 kB

	# Lora exercise


	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import LoraConfig,get_peft_model,PeftModel,PeftConfig
	from datasets import load_dataset
	import bitsandbytes as bnb
	import transformers
	import torch.nn as nn
	import torch

	# check if cuda is available
	device = 'cuda' if torch.cuda.is_available() else 'cpu'


	model = AutoModelForCausalLM.from_pretrained(
	'bigscience/bloom-3b',
	torch_dtype=torch.float16,
	device_map='auto'
	)

	tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")

	#print the model
	print(model)


	for param in model.parameters():
	param.requires_grad=False

	if param.ndim == 1: # cast the small parameters to fp32 for stability
	param.data = param.data.to(torch.float32)


	model.gradient_checkpointing_enable()
	model.enable_input_require_grads()

	# The key takeaway here is that CastOutputToFloat is primarily designed to ensure the output of model.lm_head is in float32 format.
	class CastOutputToFloat(nn.Sequential):
	def forward(self,x): return super().forward(x).to(torch.float32)

	model.lm_head = CastOutputToFloat(model.lm_head)



	# print trainable parameters

	def print_trainable_parameters(model):
	trainable_params = 0
	all_params = 0

	for _, param in model.named_parameters():
	all_params += param.numel()
	if param.requires_grad:
	trainable_params += param.numel()

	print(f'Trainable Params:{trainable_params}, All Params:{all_params}, trainable % {100 * (trainable_params/all_params)}')





	# obtain Lora config
	config = LoraConfig(
	r=8,
	lora_alpha=16,
	target_modules=['query_key_value'],
	lora_dropout=0.05,
	bias="none",
	task_type="CAUSAL_LM"
	)

	# Find out the difference between pretrained model and the current model
	model = get_peft_model(model=model,peft_config=config)
	print_trainable_parameters(model=model)


	qa_dataset = load_dataset('squad_v2')


	def create_prompt(context, question, answer):
	result = ""
	if len(answer['text']) < 1:
	result = "I don't the answer"
	else:
	result = answer['text'][0]
	prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### AMSWER\n{result}</s>"
	return prompt_template

	mapped_dataset = qa_dataset.map(lambda samples: tokenizer(create_prompt(samples['context'],samples['question'],samples['answers'])))

	# Understand the parameters once again
	trainer = transformers.Trainer(
	model=model,
	train_dataset=mapped_dataset['train'],
	args=transformers.TrainingArguments(
	per_device_eval_batch_size=4,
	gradient_accumulation_steps=4,
	warmup_steps=100,
	max_steps=100,
	num_train_epochs=3,
	learning_rate=1e-3,
	fp16=True,
	logging_steps=1,
	output_dir='outputs'
	),
	data_collator=transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
	)

	# WHat is the use of cache here.
	model.config.use_cache = False
	trainer.train()


	# Upload to hugging_face
	model_name = "bloom7b__finetune_sample"
	HUGGING_FACE_USER_NAME = "james92"

	model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model_name}", use_auth_token=True)

	print("Model is saved in hggingface")