Spaces:

joe-muller
/

livekit-turn-detector

Running

App Files Files Community

jtmuller commited on 22 days ago

Commit

00407e6

1 Parent(s): b1aea93

Update Space

Browse files

Files changed (1) hide show

app.py +19 -7

app.py CHANGED Viewed

@@ -19,16 +19,20 @@ PUNCS = string.punctuation.replace("'", "")
 # ------------------------------------------------
 # Utility functions
 # ------------------------------------------------
 def softmax(logits: np.ndarray) -> np.ndarray:
     exp_logits = np.exp(logits - np.max(logits))
     return exp_logits / np.sum(exp_logits)
 def normalize_text(text: str) -> str:
     """Lowercase, strip punctuation (except single quotes), and collapse whitespace."""
     def strip_puncs(text_in):
         return text_in.translate(str.maketrans("", "", PUNCS))
     return " ".join(strip_puncs(text).lower().split())
 def calculate_eou(chat_ctx, session, tokenizer) -> float:
     """
     Given a conversation context (list of dicts with 'role' and 'content'),
@@ -62,11 +66,13 @@ def calculate_eou(chat_ctx, session, tokenizer) -> float:
     eou_token_id = tokenizer.encode("<|im_end|>")[-1]
     return probs[eou_token_id]
 # ------------------------------------------------
 # Load ONNX session & tokenizer once
 # ------------------------------------------------
 print("Loading ONNX model session...")
-onnx_session = ort.InferenceSession(ONNX_FILENAME, providers=["CPUExecutionProvider"])
 print("Loading tokenizer...")
 turn_detector_tokenizer = AutoTokenizer.from_pretrained(HG_MODEL)
@@ -80,6 +86,8 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # ------------------------------------------------
 # Gradio Chat Handler
 # ------------------------------------------------
 def respond(message, history, system_message, max_tokens, temperature, top_p):
     """
     This function is called on each new user message in the ChatInterface.
@@ -93,19 +101,22 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     #    [{'role': 'system', 'content': ...},
     #     {'role': 'user', 'content': ...},  ...]
-    messages = []
     if system_message.strip():
-        messages.append({"role": "system", "content": system_message})
     # history is a list of tuples: [(user1, assistant1), (user2, assistant2), ...]
-    for user_text, assistant_text in history:
         if user_text:
             messages.append({"role": "user", "content": user_text})
         if assistant_text:
             messages.append({"role": "assistant", "content": assistant_text})
     # Append the new user message
-    messages.append({"role": "user", "content": message})
     # 2) Calculate EOU probability on the entire conversation
     eou_prob = calculate_eou(messages, onnx_session, turn_detector_tokenizer)
@@ -113,9 +124,10 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     # 3) Generate the assistant response from your HF model.
     #    (This code streams token-by-token.)
     response = ""
     yield f"[EOU Probability: {eou_prob:.4f}]"
 # ------------------------------------------------
 # Gradio ChatInterface
 # ------------------------------------------------
@@ -158,4 +170,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.launch()

 # ------------------------------------------------
 # Utility functions
 # ------------------------------------------------
 def softmax(logits: np.ndarray) -> np.ndarray:
     exp_logits = np.exp(logits - np.max(logits))
     return exp_logits / np.sum(exp_logits)
 def normalize_text(text: str) -> str:
     """Lowercase, strip punctuation (except single quotes), and collapse whitespace."""
     def strip_puncs(text_in):
         return text_in.translate(str.maketrans("", "", PUNCS))
     return " ".join(strip_puncs(text).lower().split())
 def calculate_eou(chat_ctx, session, tokenizer) -> float:
     """
     Given a conversation context (list of dicts with 'role' and 'content'),
     eou_token_id = tokenizer.encode("<|im_end|>")[-1]
     return probs[eou_token_id]
 # ------------------------------------------------
 # Load ONNX session & tokenizer once
 # ------------------------------------------------
 print("Loading ONNX model session...")
+onnx_session = ort.InferenceSession(
+    ONNX_FILENAME, providers=["CPUExecutionProvider"])
 print("Loading tokenizer...")
 turn_detector_tokenizer = AutoTokenizer.from_pretrained(HG_MODEL)
 # ------------------------------------------------
 # Gradio Chat Handler
 # ------------------------------------------------
 def respond(message, history, system_message, max_tokens, temperature, top_p):
     """
     This function is called on each new user message in the ChatInterface.
     #    [{'role': 'system', 'content': ...},
     #     {'role': 'user', 'content': ...},  ...]
+    messages = [
+        {"role": "user",
+         "content": message}
+    ]
     if system_message.strip():
+        messages.insert(0, {"role": "system", "content": system_message})
     # history is a list of tuples: [(user1, assistant1), (user2, assistant2), ...]
+    """ for user_text, assistant_text in history:
         if user_text:
             messages.append({"role": "user", "content": user_text})
         if assistant_text:
             messages.append({"role": "assistant", "content": assistant_text})
     # Append the new user message
+    messages.append({"role": "user", "content": message}) """
     # 2) Calculate EOU probability on the entire conversation
     eou_prob = calculate_eou(messages, onnx_session, turn_detector_tokenizer)
     # 3) Generate the assistant response from your HF model.
     #    (This code streams token-by-token.)
     response = ""
     yield f"[EOU Probability: {eou_prob:.4f}]"
 # ------------------------------------------------
 # Gradio ChatInterface
 # ------------------------------------------------
 )
 if __name__ == "__main__":
+    demo.launch()