from transformers import AutoModelForCausalLM,GenerationConfig,AutoTokenizer,GPTQConfig from peft import AutoPeftModelForCausalLM from peft import PeftModel, PeftConfig import torch def input_data_preprocessing_1(example): processed_example = "<|system|>\n You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<|user|>\n" + example["instruction"] + "\n<|assistant|>\n" return processed_example def customerConverstaion(prompt): def input_data_preprocessing(example): processed_example = "<|system|>\n You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<|user|>\n" + example["instruction"] + "\n<|assistant|>\n" return processed_example input_string = input_data_preprocessing( { "instruction": "i have a question about cancelling order {{Order Number}}", } ) tokenizer = AutoTokenizer.from_pretrained("zephyrFT/checkpoint-100") model = AutoPeftModelForCausalLM.from_pretrained("zephyrFT/checkpoint-100", low_cpu_mem_usage=True, return_dict=True, torch_dtype=torch.float16, device_map="cuda") inputs = tokenizer(input_string, return_tensors="pt").to("cuda") generation_config = GenerationConfig( do_sample=True, top_k=1, temperature=0.1, max_new_tokens=256, pad_token_id=tokenizer.eos_token_id ) outputs = model.generate(**inputs, generation_config=generation_config) print(tokenizer.decode(outputs[0], skip_special_tokens=True)) def customerConverstaion_1(prompt): # Check GPU availability print("Available GPU devices:", torch.cuda.device_count()) print("Name of the first available GPU:", torch.cuda.get_device_name(0)) config = PeftConfig.from_pretrained("DSU-FDP/customer-support") base_model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-beta-GPTQ", device_map='cuda') model = PeftModel.from_pretrained(base_model, "DSU-FDP/customer-support") from transformers import AutoTokenizer,GPTQConfig tokenizer=AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) tokenizer.padding_side = 'right' tokenizer.pad_token = tokenizer.eos_token tokenizer.add_eos_token = True tokenizer.add_bos_token, tokenizer.add_eos_token tokenizer = AutoTokenizer.from_pretrained("DSU-FDP/customer-support") input_string = input_data_preprocessing( { "instruction": "i have a question about cancelling order {{Order Number}}", } ) inputs = tokenizer(input_string, return_tensors="pt").to("cuda") generation_config = GenerationConfig( do_sample=True, top_k=1, temperature=0.1, max_new_tokens=256, pad_token_id=tokenizer.eos_token_id ) outputs = model.generate(**inputs, generation_config=generation_config) return outputs