from transformers import GPT2Tokenizer, GPT2LMHeadModel
import gradio as gr

# Load the saved tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("Rehman1603/new_crm_fine_tuned_gpt2_model")
model = GPT2LMHeadModel.from_pretrained("Rehman1603/new_crm_fine_tuned_gpt2_model")

# Function to generate responses
def generate_response(question, max_length=150):
    input_text = f"<startofstring> {question} <bot>:"
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    output = model.generate(
        input_ids,
        max_length=max_length,
        num_return_sequences=1,
        no_repeat_ngram_size=2,  # Prevent repeating n-grams
        top_k=50,  # Limit sampling to the top-k tokens
        top_p=0.95,  # Use nucleus sampling
        temperature=0.7,  # Control randomness
        do_sample=True,  # Enable sampling
    )
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    # Extract only the bot's response
    response = response.split("<bot>:")[-1].strip()
    return response

# Function to handle chat interaction
def chat(message, history):
    # Generate a response using the model
    response = generate_response(message)
    return response

# Example questions
examples = [
    "Hello! Can I get more info?",
    "Restaurant",
    "online or offline?",
    "isky charges kia ha",
]

# Gradio ChatInterface
demo = gr.ChatInterface(
    fn=chat,  # Function to handle chat
    examples=examples,  # Example questions
    title="Chat with the Fine-Tuned GPT-2 Model",  # Title of the interface
    description="Ask me anything about the software!",  # Description
)

# Launch the Gradio app
demo.launch()