File size: 3,562 Bytes
7599399
 
 
 
27465f4
e12ad60
7bf94df
 
 
 
0e1df40
 
 
 
e12ad60
 
f326ebf
60ccf92
27465f4
60ccf92
27465f4
c830c47
029911f
e12ad60
60ccf92
 
b817a2f
029911f
 
e12ad60
 
 
029911f
 
e12ad60
 
029911f
999f5b0
029911f
 
 
 
 
60ccf92
e12ad60
029911f
60ccf92
 
 
 
029911f
 
 
 
 
27465f4
e12ad60
999f5b0
 
 
e12ad60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc4ecf7
e12ad60
fc4ecf7
7599399
e12ad60
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import gradio as gr
import numpy as np
import random
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces
from huggingface_hub import login
import os
if "HF_API_TOKEN" in os.environ:
    login(token=os.environ["HF_API_TOKEN"])
# Right after login(token=...):
from huggingface_hub import whoami
info = whoami()
print("Authenticated user info:", info)

device = "cuda" if torch.cuda.is_available() else "cpu"

# Configuration
model_id = "raduqus/reco_1b_16bit"
MAX_SEED = np.iinfo(np.int32).max

@spaces.GPU()
def infer(prompt, seed=0, randomize_seed=True, max_new_tokens=100, temperature=0.7, top_p=0.9):
    # Random seed handling
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    torch.manual_seed(seed)

    # Load model and tokenizer dynamically
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, 
        torch_dtype=torch.float16, 
        device_map="auto" if device == "cuda" else None,
    ).to(device)
    
    # Construct Alpaca-style prompt
    alpaca_prompt = f"### Instruction:Based on user interests and past activities, recommend tasks they might enjoy.\n ### Input:{prompt}\n\n### Response:\n"

    # Tokenize the input
    inputs = tokenizer(alpaca_prompt, return_tensors="pt").to(device)

    # Generate only the response
    outputs = model.generate(
        inputs.input_ids,
        max_new_tokens=max_new_tokens,  # Generate only new tokens
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
    )

    # Decode and return only the generated response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response_only = response.split("### Response:")[-1].strip()
    return response_only

examples = [
    '{"user_interests": ["fitness", "food", "community"], "past_tasks": [{"title": "Led group runs", "description": "Organized weekly jogs."}, {"title": "Tried meal prep", "description": "Cooked for a full week."}, {"title": "Joined charity walks", "description": "Helped fundraise for causes."}]}'
,

]

css = """
#col-container {
    margin: 0 auto;
    max-width: 520px;
}
"""

power_device = "GPU" if device == "cuda" else "CPU"

# Gradio app
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"""
        # ZeroGPU Text-to-Text Recommendation
        Currently running on {power_device}.
        """)
        
        with gr.Row():
            prompt = gr.Textbox(
                label="Prompt",
                placeholder="Enter your prompt",
                show_label=False,
                lines=2,
            )
            run_button = gr.Button("Generate")
        
        result = gr.Textbox(label="Generated Recommendation", lines=4, interactive=False)
        
        with gr.Accordion("Advanced Settings", open=False):
            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
            randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
            max_length = gr.Slider(label="Max Length", minimum=10, maximum=200, value=100)
            temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7)
            top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9)
        
        gr.Examples(examples=examples, inputs=[prompt])

    run_button.click(
        fn=infer,
        inputs=[prompt, seed, randomize_seed, max_length, temperature, top_p],
        outputs=[result],
    )

demo.queue().launch()