File size: 3,562 Bytes
7599399 27465f4 e12ad60 7bf94df 0e1df40 e12ad60 f326ebf 60ccf92 27465f4 60ccf92 27465f4 c830c47 029911f e12ad60 60ccf92 b817a2f 029911f e12ad60 029911f e12ad60 029911f 999f5b0 029911f 60ccf92 e12ad60 029911f 60ccf92 029911f 27465f4 e12ad60 999f5b0 e12ad60 fc4ecf7 e12ad60 fc4ecf7 7599399 e12ad60 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import gradio as gr
import numpy as np
import random
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces
from huggingface_hub import login
import os
if "HF_API_TOKEN" in os.environ:
login(token=os.environ["HF_API_TOKEN"])
# Right after login(token=...):
from huggingface_hub import whoami
info = whoami()
print("Authenticated user info:", info)
device = "cuda" if torch.cuda.is_available() else "cpu"
# Configuration
model_id = "raduqus/reco_1b_16bit"
MAX_SEED = np.iinfo(np.int32).max
@spaces.GPU()
def infer(prompt, seed=0, randomize_seed=True, max_new_tokens=100, temperature=0.7, top_p=0.9):
# Random seed handling
if randomize_seed:
seed = random.randint(0, MAX_SEED)
torch.manual_seed(seed)
# Load model and tokenizer dynamically
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16,
device_map="auto" if device == "cuda" else None,
).to(device)
# Construct Alpaca-style prompt
alpaca_prompt = f"### Instruction:Based on user interests and past activities, recommend tasks they might enjoy.\n ### Input:{prompt}\n\n### Response:\n"
# Tokenize the input
inputs = tokenizer(alpaca_prompt, return_tensors="pt").to(device)
# Generate only the response
outputs = model.generate(
inputs.input_ids,
max_new_tokens=max_new_tokens, # Generate only new tokens
temperature=temperature,
top_p=top_p,
do_sample=True,
)
# Decode and return only the generated response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response_only = response.split("### Response:")[-1].strip()
return response_only
examples = [
'{"user_interests": ["fitness", "food", "community"], "past_tasks": [{"title": "Led group runs", "description": "Organized weekly jogs."}, {"title": "Tried meal prep", "description": "Cooked for a full week."}, {"title": "Joined charity walks", "description": "Helped fundraise for causes."}]}'
,
]
css = """
#col-container {
margin: 0 auto;
max-width: 520px;
}
"""
power_device = "GPU" if device == "cuda" else "CPU"
# Gradio app
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f"""
# ZeroGPU Text-to-Text Recommendation
Currently running on {power_device}.
""")
with gr.Row():
prompt = gr.Textbox(
label="Prompt",
placeholder="Enter your prompt",
show_label=False,
lines=2,
)
run_button = gr.Button("Generate")
result = gr.Textbox(label="Generated Recommendation", lines=4, interactive=False)
with gr.Accordion("Advanced Settings", open=False):
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
max_length = gr.Slider(label="Max Length", minimum=10, maximum=200, value=100)
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7)
top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9)
gr.Examples(examples=examples, inputs=[prompt])
run_button.click(
fn=infer,
inputs=[prompt, seed, randomize_seed, max_length, temperature, top_p],
outputs=[result],
)
demo.queue().launch()
|