|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
from typing import Iterator |
|
import requests |
|
from bs4 import BeautifulSoup |
|
from urllib.parse import quote_plus |
|
|
|
def search_web(query: str, num_results: int = 3) -> list[str]: |
|
""" |
|
Search the web and return text from the first n results. |
|
Using DuckDuckGo. |
|
""" |
|
try: |
|
|
|
encoded_query = quote_plus(query) |
|
|
|
|
|
url = f"https://html.duckduckgo.com/html/?q={encoded_query}" |
|
headers = { |
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" |
|
} |
|
response = requests.get(url, headers=headers) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
results = [] |
|
for result in soup.find_all('div', class_='result')[:num_results]: |
|
title = result.find('a', class_='result__a') |
|
snippet = result.find('a', class_='result__snippet') |
|
if title and snippet: |
|
results.append(f"Title: {title.text.strip()}\nExcerpt: {snippet.text.strip()}\n") |
|
|
|
return results |
|
except Exception as e: |
|
return [f"Search error: {str(e)}"] |
|
|
|
client = InferenceClient("Pinkstack/Superthoughts-lite-v1") |
|
|
|
def respond( |
|
message: str, |
|
history: list[tuple[str, str]], |
|
system_message: str, |
|
max_tokens: int, |
|
temperature: float, |
|
top_p: float, |
|
enable_search: bool, |
|
) -> Iterator[str]: |
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
|
|
search_context = "" |
|
if enable_search: |
|
search_results = search_web(message) |
|
if search_results: |
|
search_context = "Search results:\n" + "\n".join(search_results) + "\n\nBased on these results, " |
|
|
|
|
|
for user_msg, assistant_msg in history: |
|
if user_msg: |
|
messages.append({"role": "user", "content": user_msg}) |
|
if assistant_msg: |
|
messages.append({"role": "assistant", "content": assistant_msg}) |
|
|
|
|
|
full_message = search_context + message if search_context else message |
|
messages.append({"role": "user", "content": full_message}) |
|
|
|
|
|
response = "" |
|
|
|
|
|
try: |
|
for chunk in client.chat_completion( |
|
messages, |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
): |
|
if chunk.choices[0].delta.content is not None: |
|
token = chunk.choices[0].delta.content |
|
response += token |
|
yield format_response(response) |
|
except Exception as e: |
|
yield f"Error: {str(e)}" |
|
|
|
def format_response(response: str) -> str: |
|
"""Format the response with collapsible thinking sections""" |
|
response = response.replace("<think>", '<details open><summary>Show thinking 🧠</summary><div class="thoughts">') |
|
response = response.replace("</think>", "</div></details>") |
|
return response |
|
|
|
|
|
css = """ |
|
.thoughts { |
|
border: 1px solid #ccc; |
|
padding: 10px; |
|
background-color: #000000; |
|
color: #ffffff; |
|
border-radius: 5px; |
|
margin: 5px 0; |
|
} |
|
details summary { |
|
cursor: pointer; |
|
padding: 5px; |
|
background-color: #000000; |
|
color: #ffffff; |
|
border-radius: 5px; |
|
font-weight: bold; |
|
margin: 5px 0; |
|
} |
|
details summary::-webkit-details-marker { |
|
display: none; |
|
} |
|
details summary:after { |
|
content: " ▶"; |
|
} |
|
details[open] summary:after { |
|
content: " ▼"; |
|
} |
|
/* ChatGPT-like UI */ |
|
.gradio-container { |
|
font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; |
|
} |
|
.chat-container { |
|
max-width: 800px; |
|
margin: auto; |
|
} |
|
|
|
.chat-message { |
|
padding: 10px; |
|
border-radius: 8px; |
|
margin-bottom: 10px; |
|
} |
|
|
|
.user-message { |
|
background-color: #f0f0f0; |
|
text-align: right; |
|
} |
|
|
|
.bot-message { |
|
background-color: #ffffff; |
|
text-align: left; |
|
} |
|
|
|
.message-text { |
|
white-space: pre-wrap; |
|
} |
|
.button-container { |
|
display: flex; |
|
justify-content: flex-end; |
|
gap: 10px; /* Space between buttons */ |
|
margin-top: 5px; |
|
} |
|
""" |
|
|
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown("# Chat with Superthoughts lite! (1.7B)") |
|
gr.Markdown("**Warning:** The first output from the AI may take a few moments. After the first message, it should work at a decent speed, keep in mind that this chat is only meant for testing and experimenting.") |
|
|
|
chatbot = gr.Chatbot(height=600) |
|
with gr.Row(): |
|
msg = gr.Textbox( |
|
label="Your message", |
|
placeholder="Type your message here...", |
|
scale=7, |
|
container=False |
|
) |
|
submit_btn = gr.Button("Send", variant="primary", scale=1) |
|
stop_btn = gr.Button("Stop", variant="stop", scale=1) |
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
enable_search = gr.Checkbox( |
|
label="Enable web search [Beta]", |
|
value=False, |
|
info="When enabled, the AI will search the web for relevant information before responding, powered by duckduckgo." |
|
) |
|
system_message = gr.Textbox( |
|
value="You must act in a conversational matter and always include at the start <think> ... </think> <output> ... </output> tokens.", |
|
label="System message" |
|
) |
|
max_tokens = gr.Slider( |
|
minimum=1, |
|
maximum=4096, |
|
value=512, |
|
step=1, |
|
label="Max new tokens" |
|
) |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=2.0, |
|
value=0.6, |
|
step=0.1, |
|
label="Temperature/Creativeness" |
|
) |
|
top_p = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-p (nucleus sampling)" |
|
) |
|
|
|
def user(user_message: str, history: list) -> tuple[str, list]: |
|
"""Add user message to history""" |
|
return "", history + [[user_message, None]] |
|
|
|
def bot( |
|
history: list, |
|
system_message: str, |
|
max_tokens: int, |
|
temperature: float, |
|
top_p: float, |
|
enable_search: bool |
|
) -> Iterator[list]: |
|
"""Generate and stream bot responses""" |
|
user_message, _ = history[-1] |
|
history[-1][1] = "" |
|
|
|
for partial_response in respond( |
|
user_message, |
|
history[:-1], |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
enable_search |
|
): |
|
history[-1][1] = partial_response |
|
yield history |
|
|
|
submit_event = msg.submit( |
|
user, |
|
[msg, chatbot], |
|
[msg, chatbot], |
|
queue=False |
|
).then( |
|
bot, |
|
[chatbot, system_message, max_tokens, temperature, top_p, enable_search], |
|
chatbot |
|
) |
|
|
|
submit_click_event = submit_btn.click( |
|
user, |
|
[msg, chatbot], |
|
[msg, chatbot], |
|
queue=False |
|
).then( |
|
bot, |
|
[chatbot, system_message, max_tokens, temperature, top_p, enable_search], |
|
chatbot |
|
) |
|
|
|
stop_btn.click(None, [], [], cancels=[submit_event, submit_click_event]) |
|
|
|
|
|
clear = gr.Button("Clear Conversation") |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
--- |
|
⚠️ **Disclaimer:** Superthoughts may make mistakes. Always verify important information. |
|
This chat interface is intended for testing and experimentation purposes only. |
|
""" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue() |
|
demo.launch() |