import gradio as gr from huggingface_hub import InferenceClient from typing import Iterator import requests from bs4 import BeautifulSoup from urllib.parse import quote_plus def search_web(query: str, num_results: int = 3) -> list[str]: """ Search the web and return text from the first n results. Using DuckDuckGo. """ try: # Encode the search query encoded_query = quote_plus(query) # Make request to DuckDuckGo url = f"https://html.duckduckgo.com/html/?q={encoded_query}" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" } response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') # Extract results results = [] for result in soup.find_all('div', class_='result')[:num_results]: title = result.find('a', class_='result__a') snippet = result.find('a', class_='result__snippet') if title and snippet: results.append(f"Title: {title.text.strip()}\nExcerpt: {snippet.text.strip()}\n") return results except Exception as e: return [f"Search error: {str(e)}"] client = InferenceClient("Pinkstack/Superthoughts-lite-v1") def respond( message: str, history: list[tuple[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, enable_search: bool, ) -> Iterator[str]: messages = [{"role": "system", "content": system_message}] # If search is enabled, get search results and add to context search_context = "" if enable_search: search_results = search_web(message) if search_results: search_context = "Search results:\n" + "\n".join(search_results) + "\n\nBased on these results, " # Add history to messages for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add current message with search context if enabled full_message = search_context + message if search_context else message messages.append({"role": "user", "content": full_message}) # Initialize response response = "" # Stream the response try: for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): if chunk.choices[0].delta.content is not None: token = chunk.choices[0].delta.content response += token yield format_response(response) except Exception as e: yield f"Error: {str(e)}" def format_response(response: str) -> str: """Format the response with collapsible thinking sections""" response = response.replace("", '
Show thinking 🧠
') response = response.replace("", "
") return response # Custom CSS for styling css = """ .thoughts { border: 1px solid #ccc; padding: 10px; background-color: #000000; color: #ffffff; border-radius: 5px; margin: 5px 0; } details summary { cursor: pointer; padding: 5px; background-color: #000000; color: #ffffff; border-radius: 5px; font-weight: bold; margin: 5px 0; } details summary::-webkit-details-marker { display: none; } details summary:after { content: " ▶"; } details[open] summary:after { content: " ▼"; } /* ChatGPT-like UI */ .gradio-container { font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; } .chat-container { max-width: 800px; margin: auto; } .chat-message { padding: 10px; border-radius: 8px; margin-bottom: 10px; } .user-message { background-color: #f0f0f0; text-align: right; } .bot-message { background-color: #ffffff; text-align: left; } .message-text { white-space: pre-wrap; } .button-container { display: flex; justify-content: flex-end; gap: 10px; /* Space between buttons */ margin-top: 5px; } """ # Create Gradio interface with gr.Blocks(css=css) as demo: gr.Markdown("# Chat with Superthoughts lite! (1.7B)") gr.Markdown("## NEW! We have added online web abilities, in advanced settings enable Web search.") gr.Markdown("**Warning:** The first output from the AI may take a few moments. After the first message, it should work at a decent speed, keep in mind that this chat is only meant for testing and experimenting.") chatbot = gr.Chatbot(height=600) with gr.Row(): msg = gr.Textbox( label="Your message", placeholder="Type your message here...", scale=7, container=False ) submit_btn = gr.Button("Send", variant="primary", scale=1) stop_btn = gr.Button("Stop", variant="stop", scale=1) with gr.Accordion("Advanced Settings", open=False): enable_search = gr.Checkbox( label="Enable web search [Beta]", value=False, info="When enabled, the AI will search the web for relevant information before responding, powered by duckduckgo." ) system_message = gr.Textbox( value="You must act in a conversational matter and always include at the start ... ... tokens.", label="System message" ) max_tokens = gr.Slider( minimum=1, maximum=4096, value=512, step=1, label="Max new tokens" ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.85, step=0.05, label="Temperature/Creativeness" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" ) def user(user_message: str, history: list) -> tuple[str, list]: """Add user message to history""" return "", history + [[user_message, None]] def bot( history: list, system_message: str, max_tokens: int, temperature: float, top_p: float, enable_search: bool ) -> Iterator[list]: """Generate and stream bot responses""" user_message, _ = history[-1] history[-1][1] = "" # Initialize bot's response for partial_response in respond( user_message, history[:-1], system_message, max_tokens, temperature, top_p, enable_search ): history[-1][1] = partial_response yield history submit_event = msg.submit( user, [msg, chatbot], [msg, chatbot], queue=False ).then( bot, [chatbot, system_message, max_tokens, temperature, top_p, enable_search], chatbot ) submit_click_event = submit_btn.click( user, [msg, chatbot], [msg, chatbot], queue=False ).then( bot, [chatbot, system_message, max_tokens, temperature, top_p, enable_search], chatbot ) stop_btn.click(None, [], [], cancels=[submit_event, submit_click_event]) # Add a clear button clear = gr.Button("Clear Conversation") clear.click(lambda: None, None, chatbot, queue=False) # Add disclaimer gr.Markdown( """ --- ⚠️ **Disclaimer:** Superthoughts may make mistakes. Always verify important information. This chat interface is intended for testing and experimentation purposes only. """ ) if __name__ == "__main__": demo.queue() demo.launch(share=True)