Spaces:

beyoru
/

qew

Running

App Files Files Community

beyoru commited on 28 days ago

Commit

beef6e2

verified ·

1 Parent(s): 854ef87

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -71

app.py CHANGED Viewed

@@ -2,74 +2,115 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Load Inference Client for the response model
-client = InferenceClient("Qwen/Qwen2.5-3B-Instruct")
-# Load tokenizer and model for the EOU detection
-tokenizer = AutoTokenizer.from_pretrained("livekit/turn-detector")
-model = AutoModelForCausalLM.from_pretrained("livekit/turn-detector")
-# Function to compute EOU probability
-def compute_eou_probability(chat_ctx: list[dict[str, str]], max_tokens: int = 512) -> float:
-    # Extract only the 'content' from the chat context (messages) and use a list of strings for tokenization
-    conversation = ["Assistant ready to help."]  # Add system message directly as a string
-    # Only append the 'content' of each message to the conversation list
-    for msg in chat_ctx:
-        content = msg.get("content", "")
-        if content:
-            conversation.append(content)  # Only append the content (string)
-    # Tokenize the conversation content (just the text) as a list of strings
-    inputs = tokenizer(
-        conversation, padding=True, truncation=True, max_length=max_tokens, return_tensors="pt"
-    )
-    # Get model logits
-    with torch.no_grad():
-        outputs = model(**inputs)
-    # Get the logits for the last token in the sequence
-    logits = outputs.logits[0, -1, :]
-    # Apply softmax to get probabilities
-    probabilities = torch.nn.functional.softmax(logits, dim=-1)
-    # Get the EOU token index (typically "<|im_end|>" token in the model)
-    eou_token_id = tokenizer.encode("<|im_end|>")[0]
-    eou_probability = probabilities[eou_token_id].item()
-    return eou_probability
-# Respond function with EOU checking logic
 def respond(
-    message,
     history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    eou_threshold: float = 0.2,  # Default EOU threshold
-):
     messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    # Compute EOU probability before responding
-    eou_probability = compute_eou_probability(messages, max_tokens=max_tokens)
-    print(eou_probability)
-    # Only respond if EOU probability exceeds threshold
     if eou_probability >= eou_threshold:
-        # Prepare message for assistant response
-        messages.append({"role": "user", "content": message})
         response = ""
         for message in client.chat_completion(
             messages,
             max_tokens=max_tokens,
@@ -81,29 +122,19 @@ def respond(
             response += token
             yield response
     else:
-        # Let the user continue typing if the EOU probability is low
         yield "Waiting for user to finish... Please continue."
-        print("Waiting for user to finish... Please continue.")
-# Gradio UI
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="Bạn là một trợ lý ảo", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-        gr.Slider(
-            minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="EOU Threshold"
-        ),  # Add EOU threshold slider
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+import string
+# Constants
+PUNCS = string.punctuation.replace("'", "")
+MAX_HISTORY = 4
+MAX_HISTORY_TOKENS = 512
+class EOUDetector:
+    def __init__(self, model_name="livekit/turn-detector"):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForCausalLM.from_pretrained(model_name)
+        self.eou_token_id = self.tokenizer.encode("<|im_end|>")[-1]
+    def _normalize_text(self, text: str) -> str:
+        """Normalize text by removing punctuation and extra spaces."""
+        text = text.translate(str.maketrans("", "", PUNCS))
+        return " ".join(text.lower().split())
+    def _format_chat_context(self, messages: list[dict]) -> str:
+        """Format chat context using the model's chat template."""
+        normalized_messages = []
+        for msg in messages[-MAX_HISTORY:]:  # Only keep last MAX_HISTORY messages
+            if msg["role"] not in ("user", "assistant"):
+                continue
+            content = self._normalize_text(msg["content"])
+            if content:
+                normalized_messages.append({
+                    "role": msg["role"],
+                    "content": content
+                })
+        # Apply chat template without generation prompt
+        conversation = self.tokenizer.apply_chat_template(
+            normalized_messages,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+            tokenize=False
+        )
+        # Remove the EOU token from current utterance if present
+        ix = conversation.rfind("<|im_end|>")
+        if ix >= 0:
+            conversation = conversation[:ix]
+        return conversation
+    def compute_eou_probability(self, messages: list[dict]) -> float:
+        """Compute the probability of end of utterance."""
+        # Format the conversation
+        conversation = self._format_chat_context(messages)
+        # Tokenize with proper truncation
+        inputs = self.tokenizer(
+            conversation,
+            add_special_tokens=False,
+            return_tensors="pt",
+            max_length=MAX_HISTORY_TOKENS,
+            truncation=True,
+            truncation_side="left"
+        )
+        # Get model predictions
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        # Get logits for the last token
+        logits = outputs.logits[0, -1, :]
+        # Compute softmax properly
+        probabilities = torch.nn.functional.softmax(logits, dim=-1)
+        # Get probability for EOU token
+        eou_probability = probabilities[self.eou_token_id].item()
+        return eou_probability
 def respond(
+    message: str,
     history: list[tuple[str, str]],
+    system_message: str,
+    max_tokens: int,
+    temperature: float,
+    top_p: float,
+    eou_threshold: float = 0.2,
+) -> str:
+    # Initialize clients
+    eou_detector = EOUDetector()
+    client = InferenceClient("Qwen/Qwen2.5-3B-Instruct")
+    # Prepare messages
     messages = [{"role": "system", "content": system_message}]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
+    # Add current message
+    messages.append({"role": "user", "content": message})
+    # Check EOU probability
+    eou_probability = eou_detector.compute_eou_probability(messages)
+    print(f"EOU Probability: {eou_probability}")
     if eou_probability >= eou_threshold:
+        # Generate response
         response = ""
         for message in client.chat_completion(
             messages,
             max_tokens=max_tokens,
             response += token
             yield response
     else:
         yield "Waiting for user to finish... Please continue."
+# Gradio Interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are a helpful assistant", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
+        gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="EOU Threshold"),
     ],
 )
 if __name__ == "__main__":
+    demo.launch()