Spaces:

Pinkstack
/

Chat-with-superthoughts-lite

Running

App Files Files Community

Chat-with-superthoughts-lite / app.py

Pinkstack

Update app.py

a6f10c7 verified 23 days ago

raw

history blame

5.21 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from typing import Iterator

	client = InferenceClient("Pinkstack/Superthoughts-lite-v1")

	def respond(
	message: str,
	history: list[tuple[str, str]],
	system_message: str,
	max_tokens: int,
	temperature: float,
	top_p: float,
	) -> Iterator[str]:
	messages = [{"role": "system", "content": system_message}]

	# Add history to messages
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add current message
	messages.append({"role": "user", "content": message})

	# Initialize response
	response = ""

	# Stream the response
	try:
	for chunk in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	if chunk.choices[0].delta.content is not None:
	token = chunk.choices[0].delta.content
	response += token
	yield format_response(response)
	except Exception as e:
	yield f"Error: {str(e)}"

	def format_response(response: str) -> str:
	"""Format the response with collapsible thinking sections that maintain state"""
	import re
	import hashlib

	def get_section_id(content):
	# Create a unique ID for each thinking section based on its content
	return hashlib.md5(content.encode()).hexdigest()[:8]

	# Find all thinking sections and replace them with uniquely identified sections
	pattern = r"<think>(.*?)</think>"
	sections = re.findall(pattern, response, re.DOTALL)

	formatted = response
	for section in sections:
	section_id = get_section_id(section)
	old = f"<think>{section}</think>"
	new = f'<details id="think_{section_id}" open><summary>Show thinking 🧠</summary><div class="thoughts">{section}</div></details>'
	formatted = formatted.replace(old, new)

	return formatted

	# Custom CSS for styling
	css = """
	.thoughts {
	border: 1px solid #ccc;
	padding: 10px;
	background-color: #000000;
	color: #ffffff;
	border-radius: 5px;
	margin: 5px 0;
	}
	details summary {
	cursor: pointer;
	padding: 5px;
	background-color: #000000;
	color: #ffffff;
	border-radius: 5px;
	font-weight: bold;
	margin: 5px 0;
	}
	details summary::-webkit-details-marker {
	display: none;
	}
	details summary:after {
	content: " ▶";
	}
	details[open] summary:after {
	content: " ▼";
	}
	"""

	# Create Gradio interface
	with gr.Blocks(css=css) as demo:
	gr.Markdown("## Chat with Superthoughts lite! (1.7B)")
	gr.Markdown("Note: First response may take a moment to initialize. Subsequent responses will be faster.")

	chatbot = gr.Chatbot(height=600)
	msg = gr.Textbox(label="Your message", placeholder="Type your message here...")

	with gr.Accordion("Advanced Settings", open=False):
	system_message = gr.Textbox(
	value="You must act in a conversational matter and always include <think> ... </think> <output> </output> tokens.",
	label="System message"
	)
	max_tokens = gr.Slider(
	minimum=1,
	maximum=4096,
	value=512,
	step=1,
	label="Max new tokens"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=4.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)"
	)

	def user(user_message: str, history: list) -> tuple[str, list]:
	"""Add user message to history"""
	return "", history + [[user_message, None]]

	def bot(history: list, system_message: str, max_tokens: int, temperature: float, top_p: float) -> Iterator[list]:
	"""Generate and stream bot responses"""
	user_message, _ = history[-1]
	history[-1][1] = "" # Initialize bot's response

	for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
	history[-1][1] = partial_response
	yield history

	# Set up chat message handling
	msg.submit(
	user,
	[msg, chatbot],
	[msg, chatbot],
	queue=False
	).then(
	bot,
	[chatbot, system_message, max_tokens, temperature, top_p],
	chatbot
	)

	with gr.Row():
	clear = gr.Button("Clear Conversation")
	stop = gr.Button("Stop Generation")

	# Add disclaimer
	gr.Markdown(
	"""
	---
	⚠️ Disclaimer: Superthoughts may make mistakes. Always verify important information.
	This chat interface is intended for testing and experimentation purposes only.
	"""
	)

	# Launch the interface
	if __name__ == "__main__":
	demo.queue()
	demo.launch(share=True)