beyoru commited on
Commit
74cfe0f
·
verified ·
1 Parent(s): f016e8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -40
app.py CHANGED
@@ -1,25 +1,28 @@
1
- import gradio as gr
2
  from transformers import AutoTokenizer
 
3
  import onnxruntime as ort
4
  import numpy as np
5
  import string
6
  from huggingface_hub import InferenceClient
7
  import os
8
 
9
- # Initialize Qwen client
10
- qwen_client = InferenceClient(os.environ.get('MODEL_ID'))
 
11
 
12
- # Model and ONNX setup
13
- HG_MODEL = "livekit/turn-detector"
 
 
 
14
  ONNX_FILENAME = "model_quantized.onnx"
 
 
15
  PUNCS = string.punctuation.replace("'", "")
16
- MAX_HISTORY = 4 # Adjusted to use the last 4 messages
17
  MAX_HISTORY_TOKENS = 512
18
- EOU_THRESHOLD = 0.5 # Updated threshold to match original
19
-
20
- # Initialize ONNX model
21
- tokenizer = AutoTokenizer.from_pretrained(HG_MODEL)
22
- onnx_session = ort.InferenceSession(ONNX_FILENAME, providers=["CPUExecutionProvider"])
23
 
24
  # Softmax function
25
  def softmax(logits):
@@ -42,18 +45,15 @@ def format_chat_ctx(chat_ctx):
42
  msg["content"] = content
43
  new_chat_ctx.append(msg)
44
 
45
- # Tokenize with chat template
46
  convo_text = tokenizer.apply_chat_template(
47
  new_chat_ctx, add_generation_prompt=False, add_special_tokens=False, tokenize=False
48
  )
49
-
50
- # Remove EOU token from the current utterance
51
  ix = convo_text.rfind("<|im_end|>")
52
  return convo_text[:ix]
53
 
54
  # Calculate EOU probability
55
  def calculate_eou(chat_ctx, session):
56
- formatted_text = format_chat_ctx(chat_ctx[-MAX_HISTORY:]) # Use the last 4 messages
57
  inputs = tokenizer(
58
  formatted_text,
59
  return_tensors="np",
@@ -67,7 +67,6 @@ def calculate_eou(chat_ctx, session):
67
  eou_token_id = tokenizer.encode("<|im_end|>")[-1]
68
  return probs[eou_token_id]
69
 
70
-
71
  # Respond function
72
  def respond(
73
  message,
@@ -76,28 +75,18 @@ def respond(
76
  temperature,
77
  top_p,
78
  ):
79
- # Keep the last 4 conversation pairs (user-assistant)
80
  messages = [{"role": "system", "content": os.environ.get("CHARACTER_DESC")}]
81
-
82
- for val in history[-10:]: # Only use the last 4 pairs
83
  if val[0]:
84
  messages.append({"role": "user", "content": val[0]})
85
  if val[1]:
86
  messages.append({"role": "assistant", "content": val[1]})
87
-
88
- # Add the new user message to the context
89
  messages.append({"role": "user", "content": message})
90
-
91
- # Calculate EOU probability
92
  eou_prob = calculate_eou(messages, onnx_session)
93
- print(f"EOU Probability: {eou_prob}") # Debug output
94
-
95
- # If EOU is below the threshold, ask for more input
96
  if eou_prob < EOU_THRESHOLD:
97
  yield "[Waiting for user to continue input...]"
98
  return
99
-
100
- # Generate response with Qwen
101
  response = ""
102
  for message in qwen_client.chat_completion(
103
  messages,
@@ -109,23 +98,11 @@ def respond(
109
  token = message.choices[0].delta.content
110
  response += token
111
  yield response
112
-
113
  print(f"Generated response: {response}")
114
 
115
-
116
  # Gradio interface
117
  demo = gr.ChatInterface(
118
  respond,
119
- # additional_inputs=[
120
- # # Commented out to disable user modification of the system message
121
- # # gr.Textbox(value="You are an assistant.", label="System message"),
122
- # gr.Slider(minimum=1, maximum=4096, value=256, step=1, label="Max new tokens"),
123
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
124
- # gr.Slider(
125
- # minimum=0.1,
126
- # maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
127
- # ),
128
- # ],
129
  )
130
 
131
  if __name__ == "__main__":
 
1
+ from adapters import AutoAdapterModel # Ensure this library is correctly installed
2
  from transformers import AutoTokenizer
3
+ import gradio as gr
4
  import onnxruntime as ort
5
  import numpy as np
6
  import string
7
  from huggingface_hub import InferenceClient
8
  import os
9
 
10
+ # Load Base Model and Adapter
11
+ BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct" # Replace with the actual base model ID
12
+ ADAPTER_NAME = "ystemsrx/Qwen2.5-Sex" # Replace with the correct adapter name
13
 
14
+ model = AutoAdapterModel.from_pretrained(BASE_MODEL)
15
+ model.load_adapter(ADAPTER_NAME, set_active=True)
16
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
17
+
18
+ # ONNX setup
19
  ONNX_FILENAME = "model_quantized.onnx"
20
+ onnx_session = ort.InferenceSession(ONNX_FILENAME, providers=["CPUExecutionProvider"])
21
+
22
  PUNCS = string.punctuation.replace("'", "")
23
+ MAX_HISTORY = 4
24
  MAX_HISTORY_TOKENS = 512
25
+ EOU_THRESHOLD = 0.5
 
 
 
 
26
 
27
  # Softmax function
28
  def softmax(logits):
 
45
  msg["content"] = content
46
  new_chat_ctx.append(msg)
47
 
 
48
  convo_text = tokenizer.apply_chat_template(
49
  new_chat_ctx, add_generation_prompt=False, add_special_tokens=False, tokenize=False
50
  )
 
 
51
  ix = convo_text.rfind("<|im_end|>")
52
  return convo_text[:ix]
53
 
54
  # Calculate EOU probability
55
  def calculate_eou(chat_ctx, session):
56
+ formatted_text = format_chat_ctx(chat_ctx[-MAX_HISTORY:])
57
  inputs = tokenizer(
58
  formatted_text,
59
  return_tensors="np",
 
67
  eou_token_id = tokenizer.encode("<|im_end|>")[-1]
68
  return probs[eou_token_id]
69
 
 
70
  # Respond function
71
  def respond(
72
  message,
 
75
  temperature,
76
  top_p,
77
  ):
 
78
  messages = [{"role": "system", "content": os.environ.get("CHARACTER_DESC")}]
79
+ for val in history[-10:]:
 
80
  if val[0]:
81
  messages.append({"role": "user", "content": val[0]})
82
  if val[1]:
83
  messages.append({"role": "assistant", "content": val[1]})
 
 
84
  messages.append({"role": "user", "content": message})
 
 
85
  eou_prob = calculate_eou(messages, onnx_session)
86
+ print(f"EOU Probability: {eou_prob}")
 
 
87
  if eou_prob < EOU_THRESHOLD:
88
  yield "[Waiting for user to continue input...]"
89
  return
 
 
90
  response = ""
91
  for message in qwen_client.chat_completion(
92
  messages,
 
98
  token = message.choices[0].delta.content
99
  response += token
100
  yield response
 
101
  print(f"Generated response: {response}")
102
 
 
103
  # Gradio interface
104
  demo = gr.ChatInterface(
105
  respond,
 
 
 
 
 
 
 
 
 
 
106
  )
107
 
108
  if __name__ == "__main__":