import os import gradio as gr from anthropic import Anthropic from pypdf import PdfReader from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # Set up your Anthropic API key in HF secrets ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY') os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_API_KEY # Set up username and password in HF secrets username = os.getenv('username') password = os.getenv('password') # Add the path to your desired knowledge base reference_document = "Rosenshine+Principles+red.pdf" reader = PdfReader(reference_document) full_text = ''.join(page.extract_text() for page in reader.pages) text_chunks = chunk_text(full_text) # Function to chunk the document def chunk_text(text, chunk_size=1000, overlap=100): chunks = [] start = 0 while start < len(text): end = start + chunk_size chunk = text[start:end] chunks.append(chunk) start = end - overlap return chunks # Function to find the most relevant chunks def get_relevant_chunks(query, chunks, top_n=3): vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform(chunks + [query]) cosine_similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten() relevant_indices = cosine_similarities.argsort()[-top_n:][::-1] return [chunks[i] for i in relevant_indices] def chat_with_assistant(message, history): # Find relevant chunks based on the user message relevant_chunks = get_relevant_chunks(message, text_chunks) context = "\n".join(relevant_chunks) # Prepare the system message ai_message = f"""You are an AI assistant answering questions based on a reference document. You provide short, clear answers in simple language. Use the following as context for all of your answers: {context} """ # Customize instructions as needed instructions = """ """ system message = f"{ai_message} {instructions}" # Prepare the message array messages = [{"role": "system", "content": system_message}] # Add conversation history for human_msg, ai_msg in history: messages.append({"role": "user", "content": human_msg}) messages.append({"role": "assistant", "content": ai_msg}) # Add the current user message messages.append({"role": "user", "content": message}) # Create Anthropic client client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) # Make the API call response = client.messages.create( model="claude-3-sonnet-20240307", # model ="claude-3-haiku-20240307", max_tokens=500, messages=messages ) return response.content[0].text.strip() # CSS for an Anthropic-looking style anthropic_theme = gr.themes.Default().set( body_background_fill="#FAF9F6", # Light beige background block_background_fill="#FFFFFF", # White for input blocks block_title_text_color="#4A4A4A", # Dark gray for text block_label_background_fill="#F6E3CE", # Very light orange for labels input_background_fill="#FFFFFF", # White for input fields button_primary_background_fill="#D97758", # Anthropic orange for primary buttons button_primary_background_fill_hover="#8A2BE2", # Darker orange for hover button_primary_text_color="#FFFFFF", # White text on buttons button_secondary_background_fill="#F5D0A9", # Light orange for secondary buttons button_secondary_background_fill_hover="#F5D0A9", # Slightly darker orange for hover button_secondary_text_color="#4A4A4A", # Dark gray text for secondary buttons block_border_width="1px", block_border_color="#E0E0E0", # Light gray border ) # Gradio interface iface = gr.ChatInterface( chat_with_assistant, chatbot=gr.Chatbot(height=500), textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7), # Change name and description as desired title="Claude Custom Assistant", description="Chat with an AI assistant powered by Claude 3.5 Sonnet, customs instructions, and a reference document", theme=anthropic_theme, # Change examples as desired examples=["What are the key principles of instructional design?", "What might be barriers to learning?"], cache_examples=True, retry_btn=None, undo_btn="Delete Previous", clear_btn="Clear", ) iface.launch(auth=(username, password))