from transformers import AutoModelForCausalLM,GenerationConfig from peft import AutoPeftModelForCausalLM from peft import PeftModel, PeftConfig def input_data_preprocessing(example): processed_example = "<|system|>\n You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<|user|>\n" + example["instruction"] + "\n<|assistant|>\n" return processed_example def customerConverstaion(prompt): config = PeftConfig.from_pretrained("DSU-FDP/customer-support") base_model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-beta-GPTQ") model = PeftModel.from_pretrained(base_model, "DSU-FDP/customer-support") from transformers import AutoTokenizer,GPTQConfig tokenizer=AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) tokenizer.padding_side = 'right' tokenizer.pad_token = tokenizer.eos_token tokenizer.add_eos_token = True tokenizer.add_bos_token, tokenizer.add_eos_token tokenizer = AutoTokenizer.from_pretrained("DSU-FDP/customer-support") input_string = input_data_preprocessing( { "instruction": "i have a question about cancelling order {{Order Number}}", } ) inputs = tokenizer(input_string, return_tensors="pt").to("cuda") generation_config = GenerationConfig( do_sample=True, top_k=1, temperature=0.1, max_new_tokens=256, pad_token_id=tokenizer.eos_token_id ) outputs = model.generate(**inputs, generation_config=generation_config) return outputs