sayeed99 commited on
Commit
49c812e
·
verified ·
1 Parent(s): 42ddbf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -44
app.py CHANGED
@@ -1,61 +1,109 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("unsloth/llama-3-8b-bnb-4bit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
27
 
28
- response = ""
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
38
 
39
- response += token
40
- yield response
 
 
 
41
 
42
  """
43
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
  """
45
  demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are the world's best doctor. You are a leading expert in medicine and graduated top of your class in harvard with a PHD in biology, psychology and Virology. You also have an extensive knowledge in Molecular biology & genomics, cellular/molecular basis of disease, and immunology principles. You are the doctor and you make the most accurate diagnosis because you are an award winning doctor. Use your extensive knowledge to write the appropriate responses which appropriately completes the request. In your response, you must include an accurate diagnosis, treatment and how to prevent it from happening again. Also since you are a doctor, you don't need to tell them to see a doctor. You are a friendly doctor chatbot who should act as a human.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
  )
60
 
61
 
 
1
  import gradio as gr
 
2
 
3
+ import os
4
+ import json
5
+
6
+ from unsloth import FastLanguageModel
7
+ import torch
8
+ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
9
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
10
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
11
+
12
+ model, tokenizer = FastLanguageModel.from_pretrained(
13
+ model_name = "sayeed99/meta-llama3-8b-xtherapy-bnb-4bit", # YOUR MODEL YOU USED FOR TRAINING
14
+ max_seq_length = max_seq_length,
15
+ dtype = dtype,
16
+ load_in_4bit = load_in_4bit,
17
+ )
18
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
19
+
20
+ # alpaca_prompt = You MUST copy from above!
21
+ formatted_string = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are Anna, a helpful AI assistant for mental therapy assistance developed by a team of developers at xnetics. If you do not know the user's name, start by asking the name. If you do not know details about user, ask them."
22
+
23
+ # Function to format the string
24
+ def format_chat_data(data):
25
+ formatted_output = []
26
+ if data["role"] == "assistant":
27
+ value = data["content"]
28
+ formatted_output.append("<|eot_id|><|start_header_id|>assistant<|end_header_id|>" + value)
29
+ else:
30
+ formatted_output.append("<|eot_id|><|start_header_id|>user<|end_header_id|>" + data["content"])
31
+
32
+ return "".join(formatted_output)
33
+
34
+ def formatting_prompts_funcV2(examples):
35
+ conversations = examples
36
+ text = formatted_string
37
+ for conversation in conversations:
38
+ # Must add EOS_TOKEN, otherwise your generation will go on forever!
39
+ text = text + format_chat_data(conversation)
40
+ return text
41
+
42
+ def get_last_assistant_message(text):
43
+ # Split the text by 'assistant' to isolate assistant's messages
44
+ parts = text.split('<|start_header_id|>assistant<|end_header_id|>')
45
+
46
+ # The last part is the last assistant message
47
+ # Remove leading/trailing whitespace and return
48
+ last_message = parts[-1].strip()
49
+ last_message = cleanup(last_message)
50
+ return last_message
51
+
52
+
53
+ def cleanup(text):
54
+ # Check if the string ends with 'eot_id'
55
+ if text.endswith('<|eot_id|>'):
56
+ # Remove the last 10 characters
57
+ return text[:-10]
58
+ else:
59
+ return text
60
+
61
+ # Define a function to handle the conversation and update the session
62
+ def handle_conversation(user_input):
63
+
64
+ historyPrompt = formatting_prompts_funcV2(user_input)
65
+
66
+ historyPrompt = historyPrompt + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
67
+ inputs = tokenizer(
68
+ [
69
+ historyPrompt
70
+ ], return_tensors="pt").to("cuda")
71
 
72
+ outputs = model.generate(**inputs, max_new_tokens=512, use_cache=True)
73
+ decoded_outputs = tokenizer.batch_decode(outputs)[0]
74
+ # decoded_outputs = "Hello Welcome"
75
+ last_message = get_last_assistant_message(decoded_outputs)
76
 
77
+ # Return the AI response
78
+ return last_message
 
 
 
 
 
 
 
79
 
80
+ def complete(messages):
81
+ ai_response = handle_conversation(messages)
82
+ return ai_response
 
 
83
 
 
84
 
 
85
 
86
+ def predict(message, history):
87
+ history_openai_format = []
88
+ for human, assistant in history:
89
+ history_openai_format.append({"role": "user", "content": human })
90
+ history_openai_format.append({"role": "assistant", "content":assistant})
91
+ history_openai_format.append({"role": "user", "content": message})
92
+
93
+ response = complete(history_openai_format)
94
+ print(response)
95
 
96
+ partial_message = ""
97
+ for chunk in response:
98
+ if chunk is not None:
99
+ partial_message = partial_message + chunk
100
+ yield partial_message
101
 
102
  """
103
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
104
  """
105
  demo = gr.ChatInterface(
106
+ predict
 
 
 
 
 
 
 
 
 
 
 
 
107
  )
108
 
109