import gradio as gr
from huggingface_hub import InferenceClient
from typing import Iterator

client = InferenceClient("Pinkstack/Superthoughts-lite-v1")

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
) -> Iterator[str]:
    messages = [{"role": "system", "content": system_message}]
    
    # Add history to messages
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add current message
    messages.append({"role": "user", "content": message})
    
    # Initialize response
    response = ""
    
    # Stream the response
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if chunk.choices[0].delta.content is not None:
                token = chunk.choices[0].delta.content
                response += token
                yield format_response(response)
    except Exception as e:
        yield f"Error: {str(e)}"

def format_response(response: str) -> str:
    """Format the response with collapsible thinking sections that maintain state"""
    import re
    import hashlib
    
    def get_section_id(content):
        # Create a unique ID for each thinking section based on its content
        return hashlib.md5(content.encode()).hexdigest()[:8]
    
    # Find all thinking sections and replace them with uniquely identified sections
    pattern = r"<think>(.*?)</think>"
    sections = re.findall(pattern, response, re.DOTALL)
    
    formatted = response
    for section in sections:
        section_id = get_section_id(section)
        old = f"<think>{section}</think>"
        new = f'<details id="think_{section_id}" open><summary>Show thinking 🧠</summary><div class="thoughts">{section}</div></details>'
        formatted = formatted.replace(old, new)
    
    return formatted

# Custom CSS for styling
css = """
.thoughts {
    border: 1px solid #ccc;
    padding: 10px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    margin: 5px 0;
}
details summary {
    cursor: pointer;
    padding: 5px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    font-weight: bold;
    margin: 5px 0;
}
details summary::-webkit-details-marker {
    display: none;
}
details summary:after {
    content: " ▶";
}
details[open] summary:after {
    content: " ▼";
}
"""

# Create Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("## Chat with Superthoughts lite! (1.7B)")
    gr.Markdown("**Note:** First response may take a moment to initialize. Subsequent responses will be faster.")
    
    chatbot = gr.Chatbot(height=600)
    msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
    
    with gr.Accordion("Advanced Settings", open=False):
        system_message = gr.Textbox(
            value="You must act in a conversational matter and always include <think> ... </think> <output> </output> tokens.",
            label="System message"
        )
        max_tokens = gr.Slider(
            minimum=1,
            maximum=4096,
            value=512,
            step=1,
            label="Max new tokens"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=4.0,
            value=0.7,
            step=0.1,
            label="Temperature"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )

    def user(user_message: str, history: list) -> tuple[str, list]:
        """Add user message to history"""
        return "", history + [[user_message, None]]

    def bot(history: list, system_message: str, max_tokens: int, temperature: float, top_p: float) -> Iterator[list]:
        """Generate and stream bot responses"""
        user_message, _ = history[-1]
        history[-1][1] = ""  # Initialize bot's response
        
        for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
            history[-1][1] = partial_response
            yield history

    # Set up chat message handling
    msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message, max_tokens, temperature, top_p],
        chatbot
    )

    with gr.Row():
        clear = gr.Button("Clear Conversation")
        stop = gr.Button("Stop Generation")
    
    # Add disclaimer
    gr.Markdown(
        """
        ---
        ⚠️ **Disclaimer:** Superthoughts may make mistakes. Always verify important information. 
        This chat interface is intended for testing and experimentation purposes only.
        """
    )

# Launch the interface
if __name__ == "__main__":
    demo.queue()
    demo.launch(share=True)