Spaces:
Sleeping
Sleeping
File size: 4,989 Bytes
6bcba58 5103369 0094ac8 e706c98 6bcba58 794a42a 6bcba58 6b02e11 0653671 6b02e11 454b0bf 8861375 1fc0a63 454b0bf 8861375 e17f0b6 794a42a d989999 bdb72fd d989999 6bcba58 2932ae3 6bcba58 d989999 6bcba58 d989999 6bcba58 d989999 6bcba58 d989999 6bcba58 d989999 6bcba58 d989999 6bcba58 d989999 0094ac8 6bcba58 d989999 6bcba58 d989999 6bcba58 d989999 0094ac8 d989999 0094ac8 d989999 6bcba58 d989999 6bcba58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import gradio as gr
import os
import spaces
from transformers import GemmaTokenizer, AutoModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)
TITLE = "<h1><center>Chat with WellnessWhiz</center></h1>"
DESCRIPTION = '''
<div>
<p>🤖 **WellnessWhiz**: Your 24/7 companion for diabetes management, weight loss, and overall wellbeing.</p>
<p>💡 Get instant, personalized answers to your health questions.</p>
<p>🌟 Whether you're looking for diet tips, exercise routines, or disease management strategies, WellnessWhiz is here to guide you towards a healthier lifestyle!</p>
<p>⚠️ *Disclaimer: WellnessWhiz provides general information only and is not a substitute for professional medical advice. Always consult with a qualified healthcare provider for personalized medical guidance.*</p>
</div>
'''
LICENSE = """
<p/>
---
Built with Meta Llama 3
"""
PLACEHOLDER = """
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
<img src="https://ysharma-dummy-chat-app.hf.space/file=/tmp/gradio/8e75e61cc9bab22b7ce3dec85ab0e6db1da5d107/Meta_lockup_positive%20primary_RGB.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Meta llama3</h1>
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
</div>
"""
css = """
h1 {
text-align: center;
display: block;
}
#duplicate-button {
margin: auto;
color: white;
background: #1565c0;
border-radius: 100vh;
}
"""
DEFAULT_SYSTEM = '''You are a expert endocrinologist and you are here to assist users with diabetes management, weight loss, nutritional guidance and other medical needs. Your primary goal is to provide accurate, helpful information while maintaining an encouraging and supportive tone.'''
CSS = """
.duplicate-button {
margin: auto !important;
color: white !important;
background: black !important;
border-radius: 100vh !important;
}
"""
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") # to("cuda:0")
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
@spaces.GPU
def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
for prompt, answer in history:
conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
conversation.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
model.device
)
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
max_new_tokens=max_new_tokens,
temperature=temperature,
do_sample=True,
)
if temperature == 0:
generate_kwargs["do_sample"] = False
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
output = ""
for new_token in streamer:
output += new_token
yield output
chatbot = gr.Chatbot(height=450)
with gr.Blocks(css=CSS) as demo:
gr.HTML(TITLE)
gr.HTML(DESCRIPTION)
#gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
gr.ChatInterface(
fn=stream_chat,
chatbot=chatbot,
fill_height=True,
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
additional_inputs=[
gr.Text(
value="",
label="System",
render=False,
),
gr.Slider(
minimum=0,
maximum=1,
step=0.1,
value=0.8,
label="Temperature",
render=False,
),
gr.Slider(
minimum=128,
maximum=4096,
step=1,
value=1024,
label="Max new tokens",
render=False,
),
],
examples=[
["My blood sugar is running in the 300s. Can you help me?"],
["I have hypothyroidism and is taking levothyroxine 100 mcg. I am feeling fatigued. Whats wrong?"],
["How do I lose weight?"],
["Can you generate 1 week 1500 calorie diet plan for me?"],
],
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()
|