Spaces:

Artples
/

L-MChat-ZeroGPU

Running on Zero

L-MChat-ZeroGPU / app.py

Update app.py

49365db verified 10 months ago

1.33 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Load the model and tokenizer
	model_name = "Artples/L-MChat-7b"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	# Ensure the model uses CPU
	device = torch.device("cpu")
	model.to(device)

	def chat_with_model(json_input):
	prompt = json_input['prompt']
	# Tokenize the input prompt
	inputs = tokenizer.encode(prompt, return_tensors="pt")
	inputs = inputs.to(device)

	# Generate a response
	output = model.generate(inputs, max_length=100, num_return_sequences=1)
	response_text = tokenizer.decode(output[0], skip_special_tokens=True)

	return {"choices": [{"text": response_text}]}

	# Define the JSON input component
	json_schema = {
	"title": "Request",
	"type": "object",
	"properties": {
	"prompt": {
	"type": "string",
	"description": "Enter your prompt here."
	}
	},
	"required": ["prompt"]
	}

	# Create Gradio interface
	iface = gr.Interface(
	fn=chat_with_model,
	inputs=gr.inputs.JSON(schema=json_schema),
	outputs="json",
	title="Chat with L-MChat-7b",
	description="API-like interface using Gradio to simulate OpenAI API behavior."
	)

	# Run the Gradio app
	iface.launch()