Spaces:

beyoru
/

Func_calling

Sleeping

App Files Files Community

beyoru commited on Jan 25

Commit

31391ab

verified ·

1 Parent(s): 9015f33

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -24

app.py CHANGED Viewed

@@ -1,23 +1,30 @@
-import subprocess
-from threading import Thread
 import torch
-import spaces
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
-MODEL_NAME = MODEL_ID.split("/")[-1]
 CONTEXT_LENGTH = 4096
-def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
-    stop_tokens = ["<|endoftext|>", "<|im_end|>", "|im_end|"]
-    instruction = '<|im_start|>system\n' + system_prompt + '\n<|im_end|>\n'
     for user, assistant in history:
         instruction += f'<|im_start|>user\n{user}\n<|im_end|>\n<|im_start|>assistant\n{assistant}\n<|im_end|>\n'
     instruction += f'<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n'
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     enc = tokenizer(instruction, return_tensors="pt", truncation=True, max_length=CONTEXT_LENGTH)
     input_ids, attention_mask = enc.input_ids, enc.attention_mask
@@ -36,27 +43,77 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for new_token in streamer:
-        if new_token in stop_tokens:
-            break  # Stop generation but don't add the stop token
-        outputs.append(new_token)
-        yield "".join(outputs).replace("<|im_end|>", "")  # Ensure no leftover stop tokens
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
 gr.ChatInterface(
     predict,
     additional_inputs=[
-        gr.Textbox("You are a helpful assistant. Format responses clearly using natural Markdown formatting where appropriate.",
-                 label="System prompt"),
-        gr.Slider(0, 1, 0.6, label="Temperature"),
-        gr.Slider(0, 4096, 512, label="Max new tokens"),
-        gr.Slider(1, 80, 40, label="Top K sampling"),
-        gr.Slider(0, 2, 1.1, label="Repetition penalty"),
-        gr.Slider(0, 1, 0.95, label="Top P sampling"),
     ],
-    css=".message { white-space: pre-wrap; }",  # Preserve newlines
 ).queue().launch()

+import re
 import torch
+from threading import Thread
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
 CONTEXT_LENGTH = 4096
+# Add special tokens for thinking process
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+tokenizer.add_special_tokens({
+    "additional_special_tokens": ["<think>", "</think>"]
+})
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
+model.resize_token_embeddings(len(tokenizer))
+def predict(message, history, show_thinking, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
+    stop_tokens = ["<|endoftext|>", "<|im_end|>", "|im_end|", "</think>"]
+    instruction = f'<|im_start|>system\n{system_prompt}\n<|im_end|>\n'
+    # Format chat history
     for user, assistant in history:
         instruction += f'<|im_start|>user\n{user}\n<|im_end|>\n<|im_start|>assistant\n{assistant}\n<|im_end|>\n'
     instruction += f'<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n'
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     enc = tokenizer(instruction, return_tensors="pt", truncation=True, max_length=CONTEXT_LENGTH)
     input_ids, attention_mask = enc.input_ids, enc.attention_mask
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
+    thinking_buffer = []
+    in_thinking = False
+    current_chunk = ""
     for new_token in streamer:
+        current_chunk += new_token
+        # Check for thinking tags
+        if "<think>" in current_chunk and not in_thinking:
+            in_thinking = True
+            pre, _, post = current_chunk.partition("<think>")
+            if pre:
+                outputs.append(pre)
+                yield _clean_output("".join(outputs), show_thinking)
+            current_chunk = post
+        if "</think>" in current_chunk and in_thinking:
+            in_thinking = False
+            pre, _, post = current_chunk.partition("</think>")
+            thinking_buffer.append(pre)
+            if show_thinking:
+                outputs.extend(thinking_buffer)
+            thinking_buffer = []
+            current_chunk = post
+        if in_thinking:
+            thinking_buffer.append(current_chunk)
+            if show_thinking:
+                outputs.append(current_chunk)
+                yield _clean_output("".join(outputs), show_thinking)
+            current_chunk = ""
+        else:
+            if current_chunk:
+                outputs.append(current_chunk)
+                yield _clean_output("".join(outputs), show_thinking)
+                current_chunk = ""
+def _clean_output(text: str, show_thinking: bool) -> str:
+    # Remove residual tags and format thinking content
+    text = re.sub(r'\s*<think>\s*', '\n\n*Thinking:* ', text)
+    text = re.sub(r'\s*</think>\s*', ' ', text)
+    text = re.sub(r'(\*Thinking:\*)(?! )', r'\1 ', text)
+    return text.strip()
+# Create interface with toggle
 gr.ChatInterface(
     predict,
     additional_inputs=[
+        gr.Checkbox(value=True, label="🔍 Show Thinking Process"),
+        gr.Textbox(
+            "You are an AI assistant. First analyze requests using <think> tags, then provide answers. "
+            "Put all reasoning between <think> and </think> tags.",
+            label="System Prompt"
+        ),
+        gr.Slider(0, 1, 0.6, label="🌡️ Temperature"),
+        gr.Slider(0, 4096, 512, label="📏 Max New Tokens"),
+        gr.Slider(1, 80, 40, label="🎛️ Top K"),
+        gr.Slider(0.1, 2.0, 1.1, label="🔄 Repetition Penalty"),
+        gr.Slider(0, 1, 0.95, label="🧮 Top P"),
     ],
+    css="""
+    .thinking {
+        color: #666;
+        font-style: italic;
+        border-left: 3px solid #ddd;
+        padding-left: 1em;
+        margin: 0.5em 0;
+    }
+    """,
+    title="DeepSeek AI Assistant with Reasoning",
+    description="Toggle the 'Show Thinking Process' checkbox to view/hide the model's internal reasoning"
 ).queue().launch()