Pinkstack's picture
Update app.py
a6f10c7 verified
raw
history blame
5.21 kB
import gradio as gr
from huggingface_hub import InferenceClient
from typing import Iterator
client = InferenceClient("Pinkstack/Superthoughts-lite-v1")
def respond(
message: str,
history: list[tuple[str, str]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
) -> Iterator[str]:
messages = [{"role": "system", "content": system_message}]
# Add history to messages
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Initialize response
response = ""
# Stream the response
try:
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
if chunk.choices[0].delta.content is not None:
token = chunk.choices[0].delta.content
response += token
yield format_response(response)
except Exception as e:
yield f"Error: {str(e)}"
def format_response(response: str) -> str:
"""Format the response with collapsible thinking sections that maintain state"""
import re
import hashlib
def get_section_id(content):
# Create a unique ID for each thinking section based on its content
return hashlib.md5(content.encode()).hexdigest()[:8]
# Find all thinking sections and replace them with uniquely identified sections
pattern = r"<think>(.*?)</think>"
sections = re.findall(pattern, response, re.DOTALL)
formatted = response
for section in sections:
section_id = get_section_id(section)
old = f"<think>{section}</think>"
new = f'<details id="think_{section_id}" open><summary>Show thinking 🧠</summary><div class="thoughts">{section}</div></details>'
formatted = formatted.replace(old, new)
return formatted
# Custom CSS for styling
css = """
.thoughts {
border: 1px solid #ccc;
padding: 10px;
background-color: #000000;
color: #ffffff;
border-radius: 5px;
margin: 5px 0;
}
details summary {
cursor: pointer;
padding: 5px;
background-color: #000000;
color: #ffffff;
border-radius: 5px;
font-weight: bold;
margin: 5px 0;
}
details summary::-webkit-details-marker {
display: none;
}
details summary:after {
content: " ▶";
}
details[open] summary:after {
content: " ▼";
}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo:
gr.Markdown("## Chat with Superthoughts lite! (1.7B)")
gr.Markdown("**Note:** First response may take a moment to initialize. Subsequent responses will be faster.")
chatbot = gr.Chatbot(height=600)
msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
with gr.Accordion("Advanced Settings", open=False):
system_message = gr.Textbox(
value="You must act in a conversational matter and always include <think> ... </think> <output> </output> tokens.",
label="System message"
)
max_tokens = gr.Slider(
minimum=1,
maximum=4096,
value=512,
step=1,
label="Max new tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
)
def user(user_message: str, history: list) -> tuple[str, list]:
"""Add user message to history"""
return "", history + [[user_message, None]]
def bot(history: list, system_message: str, max_tokens: int, temperature: float, top_p: float) -> Iterator[list]:
"""Generate and stream bot responses"""
user_message, _ = history[-1]
history[-1][1] = "" # Initialize bot's response
for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
history[-1][1] = partial_response
yield history
# Set up chat message handling
msg.submit(
user,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot,
[chatbot, system_message, max_tokens, temperature, top_p],
chatbot
)
with gr.Row():
clear = gr.Button("Clear Conversation")
stop = gr.Button("Stop Generation")
# Add disclaimer
gr.Markdown(
"""
---
⚠️ **Disclaimer:** Superthoughts may make mistakes. Always verify important information.
This chat interface is intended for testing and experimentation purposes only.
"""
)
# Launch the interface
if __name__ == "__main__":
demo.queue()
demo.launch(share=True)