Spaces:

Pinkstack
/

Chat-with-superthoughts-lite

Running

File size: 5,210 Bytes

be82a8a
4971496
e0d2fc3
be82a8a
4971496
be82a8a
cdfe590
e0d2fc3
cdfe590
e0d2fc3
 
 
 
 
4971496
e0d2fc3
 
 
 
 
 
 
 
 
4971496
e0d2fc3
 
4971496
e0d2fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdfe590
e0d2fc3
a6f10c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be82a8a
e0d2fc3
4971496
cdfe590
4971496
cdfe590
50a5b93
 
4971496
e0d2fc3
4971496
cdfe590
4971496
 
50a5b93
 
cdfe590
 
e0d2fc3
cdfe590
 
 
 
 
 
 
 
 
4971496
 
be82a8a
e0d2fc3
cdfe590
a6f10c7
 
e0d2fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdfe590
e0d2fc3
 
cdfe590
 
e0d2fc3
 
cdfe590
e0d2fc3
 
cdfe590
e0d2fc3
 
cdfe590
e0d2fc3
 
 
 
 
 
 
 
 
 
cdfe590
be82a8a
a6f10c7
 
 
 
 
 
 
 
 
 
 
 
e0d2fc3

import gradio as gr
from huggingface_hub import InferenceClient
from typing import Iterator

client = InferenceClient("Pinkstack/Superthoughts-lite-v1")

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
) -> Iterator[str]:
    messages = [{"role": "system", "content": system_message}]
    
    # Add history to messages
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add current message
    messages.append({"role": "user", "content": message})
    
    # Initialize response
    response = ""
    
    # Stream the response
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if chunk.choices[0].delta.content is not None:
                token = chunk.choices[0].delta.content
                response += token
                yield format_response(response)
    except Exception as e:
        yield f"Error: {str(e)}"

def format_response(response: str) -> str:
    """Format the response with collapsible thinking sections that maintain state"""
    import re
    import hashlib
    
    def get_section_id(content):
        # Create a unique ID for each thinking section based on its content
        return hashlib.md5(content.encode()).hexdigest()[:8]
    
    # Find all thinking sections and replace them with uniquely identified sections
    pattern = r"<think>(.*?)</think>"
    sections = re.findall(pattern, response, re.DOTALL)
    
    formatted = response
    for section in sections:
        section_id = get_section_id(section)
        old = f"<think>{section}</think>"
        new = f'<details id="think_{section_id}" open><summary>Show thinking 🧠</summary><div class="thoughts">{section}</div></details>'
        formatted = formatted.replace(old, new)
    
    return formatted

# Custom CSS for styling
css = """
.thoughts {
    border: 1px solid #ccc;
    padding: 10px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    margin: 5px 0;
}
details summary {
    cursor: pointer;
    padding: 5px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    font-weight: bold;
    margin: 5px 0;
}
details summary::-webkit-details-marker {
    display: none;
}
details summary:after {
    content: " ▶";
}
details[open] summary:after {
    content: " ▼";
}
"""

# Create Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("## Chat with Superthoughts lite! (1.7B)")
    gr.Markdown("**Note:** First response may take a moment to initialize. Subsequent responses will be faster.")
    
    chatbot = gr.Chatbot(height=600)
    msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
    
    with gr.Accordion("Advanced Settings", open=False):
        system_message = gr.Textbox(
            value="You must act in a conversational matter and always include <think> ... </think> <output> </output> tokens.",
            label="System message"
        )
        max_tokens = gr.Slider(
            minimum=1,
            maximum=4096,
            value=512,
            step=1,
            label="Max new tokens"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=4.0,
            value=0.7,
            step=0.1,
            label="Temperature"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )

    def user(user_message: str, history: list) -> tuple[str, list]:
        """Add user message to history"""
        return "", history + [[user_message, None]]

    def bot(history: list, system_message: str, max_tokens: int, temperature: float, top_p: float) -> Iterator[list]:
        """Generate and stream bot responses"""
        user_message, _ = history[-1]
        history[-1][1] = ""  # Initialize bot's response
        
        for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
            history[-1][1] = partial_response
            yield history

    # Set up chat message handling
    msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message, max_tokens, temperature, top_p],
        chatbot
    )

    with gr.Row():
        clear = gr.Button("Clear Conversation")
        stop = gr.Button("Stop Generation")
    
    # Add disclaimer
    gr.Markdown(
        """
        ---
        ⚠️ **Disclaimer:** Superthoughts may make mistakes. Always verify important information. 
        This chat interface is intended for testing and experimentation purposes only.
        """
    )

# Launch the interface
if __name__ == "__main__":
    demo.queue()
    demo.launch(share=True)