import gradio as gr
from huggingface_hub import InferenceClient
from typing import Iterator
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote_plus

def search_web(query: str, num_results: int = 3) -> list[str]:
    """
    Search the web and return text from the first n results.
    Using DuckDuckGo.
    """
    try:
        # Encode the search query
        encoded_query = quote_plus(query)
        
        # Make request to DuckDuckGo
        url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        }
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract results
        results = []
        for result in soup.find_all('div', class_='result')[:num_results]:
            title = result.find('a', class_='result__a')
            snippet = result.find('a', class_='result__snippet')
            if title and snippet:
                results.append(f"Title: {title.text.strip()}\nExcerpt: {snippet.text.strip()}\n")
        
        return results
    except Exception as e:
        return [f"Search error: {str(e)}"]

client = InferenceClient("Pinkstack/Superthoughts-lite-v1")

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    enable_search: bool,
) -> Iterator[str]:
    messages = [{"role": "system", "content": system_message}]
    
    # If search is enabled, get search results and add to context
    search_context = ""
    if enable_search:
        search_results = search_web(message)
        if search_results:
            search_context = "Search results:\n" + "\n".join(search_results) + "\n\nBased on these results, "
    
    # Add history to messages
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add current message with search context if enabled
    full_message = search_context + message if search_context else message
    messages.append({"role": "user", "content": full_message})
    
    # Initialize response
    response = ""
    
    # Stream the response
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if chunk.choices[0].delta.content is not None:
                token = chunk.choices[0].delta.content
                response += token
                yield format_response(response)
    except Exception as e:
        yield f"Error: {str(e)}"

def format_response(response: str) -> str:
    """Format the response with collapsible thinking sections"""
    response = response.replace("<think>", '<details open><summary>Show thinking 🧠</summary><div class="thoughts">')
    response = response.replace("</think>", "</div></details>")
    return response

# Custom CSS for styling
css = """
.thoughts {
    border: 1px solid #ccc;
    padding: 10px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    margin: 5px 0;
}
details summary {
    cursor: pointer;
    padding: 5px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    font-weight: bold;
    margin: 5px 0;
}
details summary::-webkit-details-marker {
    display: none;
}
details summary:after {
    content: " ▶";
}
details[open] summary:after {
    content: " ▼";
}
/* ChatGPT-like UI */
.gradio-container {
    font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
}
.chat-container {
    max-width: 800px;
    margin: auto;
}

.chat-message {
    padding: 10px;
    border-radius: 8px;
    margin-bottom: 10px;
}

.user-message {
    background-color: #f0f0f0;
    text-align: right;
}

.bot-message {
    background-color: #ffffff;
    text-align: left;
}

.message-text {
    white-space: pre-wrap;
}
.button-container {
    display: flex;
    justify-content: flex-end;
    gap: 10px; /* Space between buttons */
    margin-top: 5px;
} 
"""


# Create Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("# Chat with Superthoughts lite! (1.7B)")
    gr.Markdown("## NEW! We have added online web abilities, in advanced settings enable Web search.")
    gr.Markdown("**Warning:** The first output from the AI may take a few moments. After the first message, it should work at a decent speed, keep in mind that this chat is only meant for testing and experimenting.")

    chatbot = gr.Chatbot(height=600)
    with gr.Row():
        msg = gr.Textbox(
            label="Your message",
            placeholder="Type your message here...",
            scale=7,
            container=False
        )
        submit_btn = gr.Button("Send", variant="primary", scale=1)
        stop_btn = gr.Button("Stop", variant="stop", scale=1)

    with gr.Accordion("Advanced Settings", open=False):
        enable_search = gr.Checkbox(
            label="Enable web search [Beta]",
            value=False,
            info="When enabled, the AI will search the web for relevant information before responding, powered by duckduckgo."
        )
        system_message = gr.Textbox(
            value="You must act in a conversational matter and always include at the start <think> ... </think> <output> ... </output> tokens.",
            label="System message"
        )
        max_tokens = gr.Slider(
            minimum=1,
            maximum=4096,
            value=512,
            step=1,
            label="Max new tokens"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=2.0,
            value=0.85,
            step=0.05,
            label="Temperature/Creativeness"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )

    def user(user_message: str, history: list) -> tuple[str, list]:
        """Add user message to history"""
        return "", history + [[user_message, None]]

    def bot(
        history: list,
        system_message: str,
        max_tokens: int,
        temperature: float,
        top_p: float,
        enable_search: bool
    ) -> Iterator[list]:
        """Generate and stream bot responses"""
        user_message, _ = history[-1]
        history[-1][1] = ""  # Initialize bot's response

        for partial_response in respond(
            user_message,
            history[:-1],
            system_message,
            max_tokens,
            temperature,
            top_p,
            enable_search
        ):
            history[-1][1] = partial_response
            yield history

    submit_event = msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message, max_tokens, temperature, top_p, enable_search],
        chatbot
    )

    submit_click_event = submit_btn.click(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message, max_tokens, temperature, top_p, enable_search],
        chatbot
    )

    stop_btn.click(None, [], [], cancels=[submit_event, submit_click_event])

    # Add a clear button
    clear = gr.Button("Clear Conversation")
    clear.click(lambda: None, None, chatbot, queue=False)

    # Add disclaimer
    gr.Markdown(
        """
        ---
        ⚠️ **Disclaimer:** Superthoughts may make mistakes. Always verify important information. 
        This chat interface is intended for testing and experimentation purposes only.
        """
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch(share=True)