import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load the fine-tuned model and tokenizer
model_path = './fine-tuned-t5-efficient-tiny'
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

def generate_response(input_text, model, tokenizer, max_length=256):
    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors='pt', truncation=True, padding='max_length', max_length=max_length)
    
    # Generate a response from the model
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_length=max_length,
        num_beams=1,  # Use greedy decoding
        do_sample=True,  # Enable sampling
        temperature=1.0,
        top_p=0.9,
        early_stopping=False  # Disable early stopping
    )
    
    # Decode the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

def chat_with_model():
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Goodbye!")
            break
        response = generate_response(user_input, model, tokenizer)
        print(f"Chatbot: {response}")

# Start the chat
chat_with_model()