import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load the model and tokenizer model_name = "Artples/L-MChat-7b" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Ensure the model uses CPU device = torch.device("cpu") model.to(device) def chat_with_model(json_input): prompt = json_input['prompt'] # Tokenize the input prompt inputs = tokenizer.encode(prompt, return_tensors="pt") inputs = inputs.to(device) # Generate a response output = model.generate(inputs, max_length=100, num_return_sequences=1) response_text = tokenizer.decode(output[0], skip_special_tokens=True) return {"choices": [{"text": response_text}]} # Define the JSON input component json_schema = { "title": "Request", "type": "object", "properties": { "prompt": { "type": "string", "description": "Enter your prompt here." } }, "required": ["prompt"] } # Create Gradio interface iface = gr.Interface( fn=chat_with_model, inputs=gr.inputs.JSON(schema=json_schema), outputs="json", title="Chat with L-MChat-7b", description="API-like interface using Gradio to simulate OpenAI API behavior." ) # Run the Gradio app iface.launch()