Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Load the model and tokenizer | |
model_name = "Artples/L-MChat-7b" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Ensure the model uses CPU | |
device = torch.device("cpu") | |
model.to(device) | |
def chat_with_model(json_input): | |
prompt = json_input['prompt'] | |
# Tokenize the input prompt | |
inputs = tokenizer.encode(prompt, return_tensors="pt") | |
inputs = inputs.to(device) | |
# Generate a response | |
output = model.generate(inputs, max_length=100, num_return_sequences=1) | |
response_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
return {"choices": [{"text": response_text}]} | |
# Define the JSON input component | |
json_schema = { | |
"title": "Request", | |
"type": "object", | |
"properties": { | |
"prompt": { | |
"type": "string", | |
"description": "Enter your prompt here." | |
} | |
}, | |
"required": ["prompt"] | |
} | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=chat_with_model, | |
inputs=gr.inputs.JSON(schema=json_schema), | |
outputs="json", | |
title="Chat with L-MChat-7b", | |
description="API-like interface using Gradio to simulate OpenAI API behavior." | |
) | |
# Run the Gradio app | |
iface.launch() | |