import torch from transformers import T5Tokenizer, T5ForConditionalGeneration # Load the fine-tuned model and tokenizer model_path = './fine-tuned-t5-efficient-tiny' tokenizer = T5Tokenizer.from_pretrained(model_path) model = T5ForConditionalGeneration.from_pretrained(model_path) def generate_response(input_text, model, tokenizer, max_length=256): # Tokenize the input text inputs = tokenizer(input_text, return_tensors='pt', truncation=True, padding='max_length', max_length=max_length) # Generate a response from the model outputs = model.generate( input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=max_length, num_beams=1, # Use greedy decoding do_sample=True, # Enable sampling temperature=1.0, top_p=0.9, early_stopping=False # Disable early stopping ) # Decode the response response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response def chat_with_model(): print("Chatbot is ready! Type 'exit' to end the conversation.") while True: user_input = input("You: ") if user_input.lower() == 'exit': print("Goodbye!") break response = generate_response(user_input, model, tokenizer) print(f"Chatbot: {response}") # Start the chat chat_with_model()