import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the model and tokenizer
model_name = "Artples/L-MChat-7b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Ensure the model uses CPU
device = torch.device("cpu")
model.to(device)

def chat_with_model(json_input):
    prompt = json_input['prompt']
    # Tokenize the input prompt
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    inputs = inputs.to(device)

    # Generate a response
    output = model.generate(inputs, max_length=100, num_return_sequences=1)
    response_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return {"choices": [{"text": response_text}]}

# Define the JSON input component
json_schema = {
    "title": "Request",
    "type": "object",
    "properties": {
        "prompt": {
            "type": "string",
            "description": "Enter your prompt here."
        }
    },
    "required": ["prompt"]
}

# Create Gradio interface
iface = gr.Interface(
    fn=chat_with_model,
    inputs=gr.inputs.JSON(schema=json_schema),
    outputs="json",
    title="Chat with L-MChat-7b",
    description="API-like interface using Gradio to simulate OpenAI API behavior."
)

# Run the Gradio app
iface.launch()