|
import gradio as gr |
|
import numpy as np |
|
import random |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import spaces |
|
from huggingface_hub import login |
|
import os |
|
if "HF_API_TOKEN" in os.environ: |
|
login(token=os.environ["HF_API_TOKEN"]) |
|
|
|
from huggingface_hub import whoami |
|
info = whoami() |
|
print("Authenticated user info:", info) |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
model_id = "raduqus/reco_1b_16bit" |
|
MAX_SEED = np.iinfo(np.int32).max |
|
|
|
@spaces.GPU() |
|
def infer(prompt, seed=0, randomize_seed=True, max_new_tokens=100, temperature=0.7, top_p=0.9): |
|
|
|
if randomize_seed: |
|
seed = random.randint(0, MAX_SEED) |
|
torch.manual_seed(seed) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
torch_dtype=torch.float16, |
|
device_map="auto" if device == "cuda" else None, |
|
).to(device) |
|
|
|
|
|
alpaca_prompt = f"### Instruction:Based on user interests and past activities, recommend tasks they might enjoy.\n ### Input:{prompt}\n\n### Response:\n" |
|
|
|
|
|
inputs = tokenizer(alpaca_prompt, return_tensors="pt").to(device) |
|
|
|
|
|
outputs = model.generate( |
|
inputs.input_ids, |
|
max_new_tokens=max_new_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
do_sample=True, |
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
response_only = response.split("### Response:")[-1].strip() |
|
return response_only |
|
|
|
examples = [ |
|
'{"user_interests": ["fitness", "food", "community"], "past_tasks": [{"title": "Led group runs", "description": "Organized weekly jogs."}, {"title": "Tried meal prep", "description": "Cooked for a full week."}, {"title": "Joined charity walks", "description": "Helped fundraise for causes."}]}' |
|
, |
|
|
|
] |
|
|
|
css = """ |
|
#col-container { |
|
margin: 0 auto; |
|
max-width: 520px; |
|
} |
|
""" |
|
|
|
power_device = "GPU" if device == "cuda" else "CPU" |
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
with gr.Column(elem_id="col-container"): |
|
gr.Markdown(f""" |
|
# ZeroGPU Text-to-Text Recommendation |
|
Currently running on {power_device}. |
|
""") |
|
|
|
with gr.Row(): |
|
prompt = gr.Textbox( |
|
label="Prompt", |
|
placeholder="Enter your prompt", |
|
show_label=False, |
|
lines=2, |
|
) |
|
run_button = gr.Button("Generate") |
|
|
|
result = gr.Textbox(label="Generated Recommendation", lines=4, interactive=False) |
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) |
|
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) |
|
max_length = gr.Slider(label="Max Length", minimum=10, maximum=200, value=100) |
|
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7) |
|
top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9) |
|
|
|
gr.Examples(examples=examples, inputs=[prompt]) |
|
|
|
run_button.click( |
|
fn=infer, |
|
inputs=[prompt, seed, randomize_seed, max_length, temperature, top_p], |
|
outputs=[result], |
|
) |
|
|
|
demo.queue().launch() |
|
|