import os import gradio as gr from huggingface_hub import InferenceClient class XylariaChat: def __init__(self): # Securely load HuggingFace token self.hf_token = os.getenv("HF_TOKEN") if not self.hf_token: raise ValueError("HuggingFace token not found in environment variables") # Initialize the inference client self.client = InferenceClient( model="Qwen/QwQ-32B-Preview", # Changed model name token=self.hf_token ) # Initialize conversation history and persistent memory self.conversation_history = [] self.persistent_memory = {} # System prompt with more detailed instructions self.system_prompt = """You are Xylaria 1.4 Senoa, Made by Sk Md Saad Amin designed to provide helpful, accurate, and engaging support across a wide range of topics. Key guidelines for our interaction include: Core Principles: - Provide accurate and comprehensive assistance - Maintain a friendly and approachable communication style - Prioritize the user's needs and context Communication Style: - Be conversational and warm - Use clear, concise language - Occasionally use light, appropriate emoji to enhance communication - Adapt communication style to the user's preferences - Respond in english Important Notes: - I am an AI assistant created by an independent developer - I do not represent OpenAI or any other AI institution - For image-related queries, I can describe images or provide analysis, or generate or link to images directly Capabilities: - Assist with research, writing, analysis, problem-solving, and creative tasks - Answer questions across various domains - Provide explanations and insights - Offer supportive and constructive guidance """ def store_information(self, key, value): """Store important information in persistent memory""" self.persistent_memory[key] = value def retrieve_information(self, key): """Retrieve information from persistent memory""" return self.persistent_memory.get(key) def reset_conversation(self): """ Completely reset the conversation history and persistent memory This helps prevent exposing previous users' conversations """ self.conversation_history = [] self.persistent_memory = {} return [] def get_response(self, user_input): # Prepare messages with conversation context and persistent memory messages = [ {"role": "system", "content": self.system_prompt}, *self.conversation_history, {"role": "user", "content": user_input} ] # Add persistent memory context if available if self.persistent_memory: memory_context = "Remembered Information:\n" + "\n".join( [f"{k}: {v}" for k, v in self.persistent_memory.items()] ) messages.insert(1, {"role": "system", "content": memory_context}) # Generate response with streaming try: response_stream = self.client.text_generation( prompt=self.messages_to_prompt(messages), # Convert messages to prompt format max_new_tokens=1024, temperature=0.5, top_p=0.7, stream=True ) return response_stream except Exception as e: return f"Error generating response: {str(e)}" def messages_to_prompt(self, messages): """ Converts a list of messages in OpenAI format to a prompt string. """ prompt = "" for message in messages: if message["role"] == "system": prompt += f"<|im_start|>system\n{message['content']}<|im_end|>\n" elif message["role"] == "user": prompt += f"<|im_start|>user\n{message['content']}<|im_end|>\n" elif message["role"] == "assistant": prompt += f"<|im_start|>assistant\n{message['content']}<|im_end|>\n" prompt += "<|im_start|>assistant\n" return prompt def create_interface(self): # Local storage JavaScript functions (these are strings, not functions) load_from_local_storage_js = """ async () => { const savedHistory = localStorage.getItem('xylaria_chat_history'); return savedHistory ? JSON.parse(savedHistory) : []; } """ save_to_local_storage_js = """ async (chatHistory) => { localStorage.setItem('xylaria_chat_history', JSON.stringify(chatHistory)); } """ clear_local_storage_js = """ async () => { localStorage.removeItem('xylaria_chat_history'); } """ def streaming_response(message, chat_history): # Clear input textbox response_stream = self.get_response(message) # If it's an error, return immediately if isinstance(response_stream, str): return "", chat_history + [[message, response_stream]] # Prepare for streaming response full_response = "" updated_history = chat_history + [[message, ""]] # Streaming output for response_text in response_stream: full_response += response_text # Update the last message in chat history with partial response updated_history[-1][1] = full_response yield "", updated_history # Update conversation history self.conversation_history.append( {"role": "user", "content": message} ) self.conversation_history.append( {"role": "assistant", "content": full_response} ) # Limit conversation history to prevent token overflow if len(self.conversation_history) > 10: self.conversation_history = self.conversation_history[-10:] return "", updated_history # Custom CSS for Inter font custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); body, .gradio-container { font-family: 'Inter', sans-serif !important; } .chatbot-container .message { font-family: 'Inter', sans-serif !important; } .gradio-container input, .gradio-container textarea, .gradio-container button { font-family: 'Inter', sans-serif !important; } """ with gr.Blocks(theme='soft', css=custom_css) as demo: # Chat interface with improved styling with gr.Column(): chatbot = gr.Chatbot( label="Xylaria 1.4 Senoa", height=500, show_copy_button=True, # type="messages" # Use the 'messages' format ) # Input row with improved layout with gr.Row(): txt = gr.Textbox( show_label=False, placeholder="Type your message...", container=False, scale=4 ) btn = gr.Button("Send", scale=1) # Clear history and memory buttons clear = gr.Button("Clear Conversation") clear_memory = gr.Button("Clear Memory") # Use `gr.State` to manage initial chatbot value and `demo.load` for initialization initial_chat_history = gr.State([]) demo.load( fn=lambda: initial_chat_history.value, inputs=None, outputs=[chatbot], js=load_from_local_storage_js ) # Submit functionality with local storage save btn.click( fn=streaming_response, inputs=[txt, chatbot], outputs=[txt, chatbot] ).then( fn=None, inputs=[chatbot], # Pass chatbot history to JavaScript outputs=None, js=save_to_local_storage_js ) txt.submit( fn=streaming_response, inputs=[txt, chatbot], outputs=[txt, chatbot] ).then( fn=None, inputs=[chatbot], # Pass chatbot history to JavaScript outputs=None, js=save_to_local_storage_js ) # Clear conversation history with local storage clear clear.click( fn=lambda: [], inputs=None, outputs=[chatbot] ).then( fn=None, inputs=None, outputs=None, js=clear_local_storage_js ) # Clear persistent memory and reset conversation with local storage clear clear_memory.click( fn=self.reset_conversation, inputs=None, outputs=[chatbot] ).then( fn=None, inputs=None, outputs=None, js=clear_local_storage_js ) return demo # Launch the interface def main(): chat = XylariaChat() interface = chat.create_interface() interface.launch( share=True, # Optional: create a public link debug=True # Show detailed errors ) if __name__ == "__main__": main()