Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,61 +1,109 @@
|
|
1 |
import gradio as gr
|
2 |
-
from huggingface_hub import InferenceClient
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
history: list[tuple[str, str]],
|
13 |
-
system_message,
|
14 |
-
max_tokens,
|
15 |
-
temperature,
|
16 |
-
top_p,
|
17 |
-
):
|
18 |
-
messages = [{"role": "system", "content": system_message}]
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
if val[1]:
|
24 |
-
messages.append({"role": "assistant", "content": val[1]})
|
25 |
|
26 |
-
messages.append({"role": "user", "content": message})
|
27 |
|
28 |
-
response = ""
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
38 |
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
41 |
|
42 |
"""
|
43 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
44 |
"""
|
45 |
demo = gr.ChatInterface(
|
46 |
-
|
47 |
-
additional_inputs=[
|
48 |
-
gr.Textbox(value="You are the world's best doctor. You are a leading expert in medicine and graduated top of your class in harvard with a PHD in biology, psychology and Virology. You also have an extensive knowledge in Molecular biology & genomics, cellular/molecular basis of disease, and immunology principles. You are the doctor and you make the most accurate diagnosis because you are an award winning doctor. Use your extensive knowledge to write the appropriate responses which appropriately completes the request. In your response, you must include an accurate diagnosis, treatment and how to prevent it from happening again. Also since you are a doctor, you don't need to tell them to see a doctor. You are a friendly doctor chatbot who should act as a human.", label="System message"),
|
49 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
50 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
51 |
-
gr.Slider(
|
52 |
-
minimum=0.1,
|
53 |
-
maximum=1.0,
|
54 |
-
value=0.95,
|
55 |
-
step=0.05,
|
56 |
-
label="Top-p (nucleus sampling)",
|
57 |
-
),
|
58 |
-
],
|
59 |
)
|
60 |
|
61 |
|
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
|
6 |
+
from unsloth import FastLanguageModel
|
7 |
+
import torch
|
8 |
+
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
|
9 |
+
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
10 |
+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
|
11 |
+
|
12 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
13 |
+
model_name = "sayeed99/meta-llama3-8b-xtherapy-bnb-4bit", # YOUR MODEL YOU USED FOR TRAINING
|
14 |
+
max_seq_length = max_seq_length,
|
15 |
+
dtype = dtype,
|
16 |
+
load_in_4bit = load_in_4bit,
|
17 |
+
)
|
18 |
+
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
19 |
+
|
20 |
+
# alpaca_prompt = You MUST copy from above!
|
21 |
+
formatted_string = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are Anna, a helpful AI assistant for mental therapy assistance developed by a team of developers at xnetics. If you do not know the user's name, start by asking the name. If you do not know details about user, ask them."
|
22 |
+
|
23 |
+
# Function to format the string
|
24 |
+
def format_chat_data(data):
|
25 |
+
formatted_output = []
|
26 |
+
if data["role"] == "assistant":
|
27 |
+
value = data["content"]
|
28 |
+
formatted_output.append("<|eot_id|><|start_header_id|>assistant<|end_header_id|>" + value)
|
29 |
+
else:
|
30 |
+
formatted_output.append("<|eot_id|><|start_header_id|>user<|end_header_id|>" + data["content"])
|
31 |
+
|
32 |
+
return "".join(formatted_output)
|
33 |
+
|
34 |
+
def formatting_prompts_funcV2(examples):
|
35 |
+
conversations = examples
|
36 |
+
text = formatted_string
|
37 |
+
for conversation in conversations:
|
38 |
+
# Must add EOS_TOKEN, otherwise your generation will go on forever!
|
39 |
+
text = text + format_chat_data(conversation)
|
40 |
+
return text
|
41 |
+
|
42 |
+
def get_last_assistant_message(text):
|
43 |
+
# Split the text by 'assistant' to isolate assistant's messages
|
44 |
+
parts = text.split('<|start_header_id|>assistant<|end_header_id|>')
|
45 |
+
|
46 |
+
# The last part is the last assistant message
|
47 |
+
# Remove leading/trailing whitespace and return
|
48 |
+
last_message = parts[-1].strip()
|
49 |
+
last_message = cleanup(last_message)
|
50 |
+
return last_message
|
51 |
+
|
52 |
+
|
53 |
+
def cleanup(text):
|
54 |
+
# Check if the string ends with 'eot_id'
|
55 |
+
if text.endswith('<|eot_id|>'):
|
56 |
+
# Remove the last 10 characters
|
57 |
+
return text[:-10]
|
58 |
+
else:
|
59 |
+
return text
|
60 |
+
|
61 |
+
# Define a function to handle the conversation and update the session
|
62 |
+
def handle_conversation(user_input):
|
63 |
+
|
64 |
+
historyPrompt = formatting_prompts_funcV2(user_input)
|
65 |
+
|
66 |
+
historyPrompt = historyPrompt + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
|
67 |
+
inputs = tokenizer(
|
68 |
+
[
|
69 |
+
historyPrompt
|
70 |
+
], return_tensors="pt").to("cuda")
|
71 |
|
72 |
+
outputs = model.generate(**inputs, max_new_tokens=512, use_cache=True)
|
73 |
+
decoded_outputs = tokenizer.batch_decode(outputs)[0]
|
74 |
+
# decoded_outputs = "Hello Welcome"
|
75 |
+
last_message = get_last_assistant_message(decoded_outputs)
|
76 |
|
77 |
+
# Return the AI response
|
78 |
+
return last_message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
+
def complete(messages):
|
81 |
+
ai_response = handle_conversation(messages)
|
82 |
+
return ai_response
|
|
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
|
86 |
+
def predict(message, history):
|
87 |
+
history_openai_format = []
|
88 |
+
for human, assistant in history:
|
89 |
+
history_openai_format.append({"role": "user", "content": human })
|
90 |
+
history_openai_format.append({"role": "assistant", "content":assistant})
|
91 |
+
history_openai_format.append({"role": "user", "content": message})
|
92 |
+
|
93 |
+
response = complete(history_openai_format)
|
94 |
+
print(response)
|
95 |
|
96 |
+
partial_message = ""
|
97 |
+
for chunk in response:
|
98 |
+
if chunk is not None:
|
99 |
+
partial_message = partial_message + chunk
|
100 |
+
yield partial_message
|
101 |
|
102 |
"""
|
103 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
104 |
"""
|
105 |
demo = gr.ChatInterface(
|
106 |
+
predict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
)
|
108 |
|
109 |
|