Spaces:

raduqus
/

llm_test2

Sleeping

App Files Files Community

llm_test2 / app.py

raduqus

Update app.py

c830c47 verified 23 days ago

raw

history blame

3.56 kB

	import gradio as gr
	import numpy as np
	import random
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import spaces
	from huggingface_hub import login
	import os
	if "HF_API_TOKEN" in os.environ:
	login(token=os.environ["HF_API_TOKEN"])
	# Right after login(token=...):
	from huggingface_hub import whoami
	info = whoami()
	print("Authenticated user info:", info)

	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Configuration
	model_id = "raduqus/reco_1b_16bit"
	MAX_SEED = np.iinfo(np.int32).max

	@spaces.GPU()
	def infer(prompt, seed=0, randomize_seed=True, max_new_tokens=100, temperature=0.7, top_p=0.9):
	# Random seed handling
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	torch.manual_seed(seed)

	# Load model and tokenizer dynamically
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	device_map="auto" if device == "cuda" else None,
	).to(device)

	# Construct Alpaca-style prompt
	alpaca_prompt = f"### Instruction:Based on user interests and past activities, recommend tasks they might enjoy.\n ### Input:{prompt}\n\n### Response:\n"

	# Tokenize the input
	inputs = tokenizer(alpaca_prompt, return_tensors="pt").to(device)

	# Generate only the response
	outputs = model.generate(
	inputs.input_ids,
	max_new_tokens=max_new_tokens, # Generate only new tokens
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	)

	# Decode and return only the generated response
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	response_only = response.split("### Response:")[-1].strip()
	return response_only

	examples = [
	'{"user_interests": ["fitness", "food", "community"], "past_tasks": [{"title": "Led group runs", "description": "Organized weekly jogs."}, {"title": "Tried meal prep", "description": "Cooked for a full week."}, {"title": "Joined charity walks", "description": "Helped fundraise for causes."}]}'
	,

	]

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 520px;
	}
	"""

	power_device = "GPU" if device == "cuda" else "CPU"

	# Gradio app
	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown(f"""
	# ZeroGPU Text-to-Text Recommendation
	Currently running on {power_device}.
	""")

	with gr.Row():
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Enter your prompt",
	show_label=False,
	lines=2,
	)
	run_button = gr.Button("Generate")

	result = gr.Textbox(label="Generated Recommendation", lines=4, interactive=False)

	with gr.Accordion("Advanced Settings", open=False):
	seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
	randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
	max_length = gr.Slider(label="Max Length", minimum=10, maximum=200, value=100)
	temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7)
	top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9)

	gr.Examples(examples=examples, inputs=[prompt])

	run_button.click(
	fn=infer,
	inputs=[prompt, seed, randomize_seed, max_length, temperature, top_p],
	outputs=[result],
	)

	demo.queue().launch()