import gradio as gr import numpy as np import random import torch from transformers import AutoTokenizer, AutoModelForCausalLM import spaces from huggingface_hub import login import os if "HF_API_TOKEN" in os.environ: login(token=os.environ["HF_API_TOKEN"]) # Right after login(token=...): from huggingface_hub import whoami info = whoami() print("Authenticated user info:", info) device = "cuda" if torch.cuda.is_available() else "cpu" # Configuration model_id = "raduqus/reco_1b_16bit" MAX_SEED = np.iinfo(np.int32).max @spaces.GPU() def infer(prompt, seed=0, randomize_seed=True, max_new_tokens=100, temperature=0.7, top_p=0.9): # Random seed handling if randomize_seed: seed = random.randint(0, MAX_SEED) torch.manual_seed(seed) # Load model and tokenizer dynamically tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto" if device == "cuda" else None, ).to(device) # Construct Alpaca-style prompt alpaca_prompt = f"### Instruction:Based on user interests and past activities, recommend tasks they might enjoy.\n ### Input:{prompt}\n\n### Response:\n" # Tokenize the input inputs = tokenizer(alpaca_prompt, return_tensors="pt").to(device) # Generate only the response outputs = model.generate( inputs.input_ids, max_new_tokens=max_new_tokens, # Generate only new tokens temperature=temperature, top_p=top_p, do_sample=True, ) # Decode and return only the generated response response = tokenizer.decode(outputs[0], skip_special_tokens=True) response_only = response.split("### Response:")[-1].strip() return response_only examples = [ '{"user_interests": ["fitness", "food", "community"], "past_tasks": [{"title": "Led group runs", "description": "Organized weekly jogs."}, {"title": "Tried meal prep", "description": "Cooked for a full week."}, {"title": "Joined charity walks", "description": "Helped fundraise for causes."}]}' , ] css = """ #col-container { margin: 0 auto; max-width: 520px; } """ power_device = "GPU" if device == "cuda" else "CPU" # Gradio app with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(f""" # ZeroGPU Text-to-Text Recommendation Currently running on {power_device}. """) with gr.Row(): prompt = gr.Textbox( label="Prompt", placeholder="Enter your prompt", show_label=False, lines=2, ) run_button = gr.Button("Generate") result = gr.Textbox(label="Generated Recommendation", lines=4, interactive=False) with gr.Accordion("Advanced Settings", open=False): seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) max_length = gr.Slider(label="Max Length", minimum=10, maximum=200, value=100) temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7) top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9) gr.Examples(examples=examples, inputs=[prompt]) run_button.click( fn=infer, inputs=[prompt, seed, randomize_seed, max_length, temperature, top_p], outputs=[result], ) demo.queue().launch()