File size: 4,989 Bytes
6bcba58
 
 
 
 
 
 
 
 
 
5103369
0094ac8
e706c98
 
 
 
6bcba58
 
794a42a
 
 
 
6bcba58
 
 
6b02e11
 
0653671
6b02e11
 
 
 
454b0bf
 
8861375
1fc0a63
 
454b0bf
 
 
8861375
e17f0b6
 
 
 
 
 
 
 
 
 
 
 
 
 
794a42a
d989999
bdb72fd
 
 
 
 
 
 
 
 
d989999
 
 
6bcba58
2932ae3
 
 
 
 
 
6bcba58
d989999
 
 
 
 
 
6bcba58
 
d989999
 
 
6bcba58
 
 
d989999
6bcba58
 
 
d989999
6bcba58
 
d989999
 
6bcba58
 
 
d989999
 
 
 
6bcba58
 
d989999
 
 
 
 
0094ac8
6bcba58
d989999
6bcba58
 
 
 
d989999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bcba58
d989999
0094ac8
 
d989999
0094ac8
d989999
6bcba58
d989999
 
 
6bcba58
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
import os
import spaces
from transformers import GemmaTokenizer, AutoModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread

# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)


TITLE = "<h1><center>Chat with WellnessWhiz</center></h1>"




DESCRIPTION = '''
<div>
<p>🤖 **WellnessWhiz**: Your 24/7 companion for diabetes management, weight loss, and overall wellbeing.</p>
<p>💡 Get instant, personalized answers to your health questions.</p>
<p>🌟 Whether you're looking for diet tips, exercise routines, or disease management strategies, WellnessWhiz is here to guide you towards a healthier lifestyle!</p>
<p>⚠️ *Disclaimer: WellnessWhiz provides general information only and is not a substitute for professional medical advice. Always consult with a qualified healthcare provider for personalized medical guidance.*</p>
</div>
'''

LICENSE = """
<p/>

---
Built with Meta Llama 3
"""

PLACEHOLDER = """
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
   <img src="https://ysharma-dummy-chat-app.hf.space/file=/tmp/gradio/8e75e61cc9bab22b7ce3dec85ab0e6db1da5d107/Meta_lockup_positive%20primary_RGB.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55;  "> 
   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Meta llama3</h1>
   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
</div>
"""


css = """
h1 {
  text-align: center;
  display: block;
}

#duplicate-button {
  margin: auto;
  color: white;
  background: #1565c0;
  border-radius: 100vh;
}
"""

DEFAULT_SYSTEM = '''You are a expert endocrinologist and you are here to assist users with diabetes management, weight loss, nutritional guidance and other medical needs. Your primary goal is to provide accurate, helpful information while maintaining an encouraging and supportive tone.'''


CSS = """
.duplicate-button {
  margin: auto !important;
  color: white !important;
  background: black !important;
  border-radius: 100vh !important;
}
"""



# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto")  # to("cuda:0") 
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

@spaces.GPU
def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
    conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
    for prompt, answer in history:
        conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])

    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
        model.device
    )
    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

    generate_kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        do_sample=True,
    )
    if temperature == 0:
        generate_kwargs["do_sample"] = False

    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    output = ""
    for new_token in streamer:
        output += new_token
        yield output


chatbot = gr.Chatbot(height=450)

with gr.Blocks(css=CSS) as demo:
    gr.HTML(TITLE)
    gr.HTML(DESCRIPTION)
    #gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
    gr.ChatInterface(
        fn=stream_chat,
        chatbot=chatbot,
        fill_height=True,
        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
        additional_inputs=[
            gr.Text(
                value="",
                label="System",
                render=False,
            ),
            gr.Slider(
                minimum=0,
                maximum=1,
                step=0.1,
                value=0.8,
                label="Temperature",
                render=False,
            ),
            gr.Slider(
                minimum=128,
                maximum=4096,
                step=1,
                value=1024,
                label="Max new tokens",
                render=False,
            ),
        ],
        examples=[

            ["My blood sugar is running in the 300s. Can you help me?"],
            ["I have hypothyroidism and is taking levothyroxine 100 mcg. I am feeling fatigued. Whats wrong?"],
            ["How do I lose weight?"],
            ["Can you generate 1 week 1500 calorie diet plan for me?"],
        ],
        cache_examples=False,
    )


if __name__ == "__main__":
    demo.launch()