Spaces:

beyoru
/

qew

Running

App Files Files Community

beyoru commited on 23 days ago

Commit

c7c557b

verified ·

1 Parent(s): beef6e2

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -104

app.py CHANGED Viewed

@@ -2,115 +2,80 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import string
-# Constants
-PUNCS = string.punctuation.replace("'", "")
-MAX_HISTORY = 4
-MAX_HISTORY_TOKENS = 512
-class EOUDetector:
-    def __init__(self, model_name="livekit/turn-detector"):
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForCausalLM.from_pretrained(model_name)
-        self.eou_token_id = self.tokenizer.encode("<|im_end|>")[-1]
-    def _normalize_text(self, text: str) -> str:
-        """Normalize text by removing punctuation and extra spaces."""
-        text = text.translate(str.maketrans("", "", PUNCS))
-        return " ".join(text.lower().split())
-    def _format_chat_context(self, messages: list[dict]) -> str:
-        """Format chat context using the model's chat template."""
-        normalized_messages = []
-        for msg in messages[-MAX_HISTORY:]:  # Only keep last MAX_HISTORY messages
-            if msg["role"] not in ("user", "assistant"):
-                continue
-            content = self._normalize_text(msg["content"])
-            if content:
-                normalized_messages.append({
-                    "role": msg["role"],
-                    "content": content
-                })
-        # Apply chat template without generation prompt
-        conversation = self.tokenizer.apply_chat_template(
-            normalized_messages,
-            add_generation_prompt=False,
-            add_special_tokens=False,
-            tokenize=False
-        )
-        # Remove the EOU token from current utterance if present
-        ix = conversation.rfind("<|im_end|>")
-        if ix >= 0:
-            conversation = conversation[:ix]
-        return conversation
-    def compute_eou_probability(self, messages: list[dict]) -> float:
-        """Compute the probability of end of utterance."""
-        # Format the conversation
-        conversation = self._format_chat_context(messages)
-        # Tokenize with proper truncation
-        inputs = self.tokenizer(
-            conversation,
-            add_special_tokens=False,
-            return_tensors="pt",
-            max_length=MAX_HISTORY_TOKENS,
-            truncation=True,
-            truncation_side="left"
-        )
-        # Get model predictions
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-        # Get logits for the last token
-        logits = outputs.logits[0, -1, :]
-        # Compute softmax properly
-        probabilities = torch.nn.functional.softmax(logits, dim=-1)
-        # Get probability for EOU token
-        eou_probability = probabilities[self.eou_token_id].item()
-        return eou_probability
 def respond(
-    message: str,
     history: list[tuple[str, str]],
-    system_message: str,
-    max_tokens: int,
-    temperature: float,
-    top_p: float,
-    eou_threshold: float = 0.2,
-) -> str:
-    # Initialize clients
-    eou_detector = EOUDetector()
-    client = InferenceClient("Qwen/Qwen2.5-3B-Instruct")
-    # Prepare messages
     messages = [{"role": "system", "content": system_message}]
-    for user_msg, assistant_msg in history:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    # Add current message
-    messages.append({"role": "user", "content": message})
-    # Check EOU probability
-    eou_probability = eou_detector.compute_eou_probability(messages)
-    print(f"EOU Probability: {eou_probability}")
     if eou_probability >= eou_threshold:
-        # Generate response
         response = ""
         for message in client.chat_completion(
             messages,
             max_tokens=max_tokens,
@@ -122,19 +87,29 @@ def respond(
             response += token
             yield response
     else:
         yield "Waiting for user to finish... Please continue."
-# Gradio Interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a helpful assistant", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
-        gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="EOU Threshold"),
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Load Inference Client for the response model
+client = InferenceClient("Qwen/Qwen2.5-3B-Instruct")
+# Load tokenizer and model for the EOU detection
+tokenizer = AutoTokenizer.from_pretrained("livekit/turn-detector")
+model = AutoModelForCausalLM.from_pretrained("livekit/turn-detector")
+import re
 import string
+def normalize_text(text: str) -> str:
+    """Chuẩn hóa văn bản bằng cách loại bỏ dấu câu, khoảng trắng thừa và chuyển về chữ thường."""
+    text = text.strip().lower()  # Chuyển về chữ thường và xóa khoảng trắng đầu/cuối
+    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)  # Loại bỏ dấu câu
+    return re.sub(r"\s+", " ", text)  # Loại bỏ khoảng trắng thừa
+def compute_eou_probability(chat_ctx: list[dict[str, str]], max_tokens: int = 512) -> float:
+    """Compute the probability of End of Utterance (EOU) after normalizing text."""
+    conversation = ["Assistant ready to help."]  # Add system message directly
+    for msg in chat_ctx:
+        content = msg.get("content", "")
+        if content:
+            normalized_content = normalize_text(content)  # Chuẩn hóa văn bản
+            conversation.append(normalized_content)
+    # Tokenize the conversation
+    inputs = tokenizer(
+        conversation, padding=True, truncation=True, max_length=max_tokens, return_tensors="pt"
+    )
+    with torch.no_grad():
+        outputs = model(**inputs)
+    logits = outputs.logits[0, -1, :]
+    probabilities = torch.nn.functional.softmax(logits, dim=-1)
+    # Get EOU token probability
+    eou_token_id = tokenizer.encode("<|im_end|>")[0]
+    if eou_token_id not in tokenizer.get_vocab().values():
+        raise ValueError("EOU token '<|im_end|>' not found in tokenizer vocabulary.")
+    return probabilities[eou_token_id].item()
+# Respond function with EOU checking logic
 def respond(
+    message,
     history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    eou_threshold: float = 0.2,  # Default EOU threshold
+):
     messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    # Compute EOU probability before responding
+    eou_probability = compute_eou_probability(messages, max_tokens=max_tokens)
+    console.log(eou_probability)
+    # Only respond if EOU probability exceeds threshold
     if eou_probability >= eou_threshold:
+        # Prepare message for assistant response
+        messages.append({"role": "user", "content": message})
         response = ""
         for message in client.chat_completion(
             messages,
             max_tokens=max_tokens,
             response += token
             yield response
     else:
+        # Let the user continue typing if the EOU probability is low
         yield "Waiting for user to finish... Please continue."
+        print("Waiting for user to finish... Please continue.")
+# Gradio UI
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are helpful assistant", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+        gr.Slider(
+            minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="EOU Threshold"
+        ),  # Add EOU threshold slider
     ],
 )
 if __name__ == "__main__":
+    demo.launch()