|
import os |
|
import gradio as gr |
|
from anthropic import Anthropic |
|
from pypdf import PdfReader |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY') |
|
os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_API_KEY |
|
|
|
|
|
username = os.getenv('username') |
|
password = os.getenv('password') |
|
|
|
|
|
reference_document = "Rosenshine+Principles+red.pdf" |
|
reader = PdfReader(reference_document) |
|
full_text = ''.join(page.extract_text() for page in reader.pages) |
|
text_chunks = chunk_text(full_text) |
|
|
|
|
|
def chunk_text(text, chunk_size=1000, overlap=100): |
|
chunks = [] |
|
start = 0 |
|
while start < len(text): |
|
end = start + chunk_size |
|
chunk = text[start:end] |
|
chunks.append(chunk) |
|
start = end - overlap |
|
return chunks |
|
|
|
|
|
def get_relevant_chunks(query, chunks, top_n=3): |
|
vectorizer = TfidfVectorizer() |
|
tfidf_matrix = vectorizer.fit_transform(chunks + [query]) |
|
cosine_similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten() |
|
relevant_indices = cosine_similarities.argsort()[-top_n:][::-1] |
|
return [chunks[i] for i in relevant_indices] |
|
|
|
def chat_with_assistant(message, history): |
|
|
|
relevant_chunks = get_relevant_chunks(message, text_chunks) |
|
context = "\n".join(relevant_chunks) |
|
|
|
|
|
ai_message = f"""You are an AI assistant answering questions based on a reference document. |
|
You provide short, clear answers in simple language. |
|
Use the following as context for all of your answers: |
|
{context} |
|
""" |
|
|
|
|
|
instructions = """ |
|
|
|
""" |
|
system message = f"{ai_message} {instructions}" |
|
|
|
|
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
|
|
for human_msg, ai_msg in history: |
|
messages.append({"role": "user", "content": human_msg}) |
|
messages.append({"role": "assistant", "content": ai_msg}) |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) |
|
|
|
|
|
response = client.messages.create( |
|
model="claude-3-sonnet-20240307", |
|
|
|
max_tokens=500, |
|
messages=messages |
|
) |
|
|
|
return response.content[0].text.strip() |
|
|
|
|
|
anthropic_theme = gr.themes.Default().set( |
|
body_background_fill="#FAF9F6", |
|
block_background_fill="#FFFFFF", |
|
block_title_text_color="#4A4A4A", |
|
block_label_background_fill="#F6E3CE", |
|
input_background_fill="#FFFFFF", |
|
button_primary_background_fill="#D97758", |
|
button_primary_background_fill_hover="#8A2BE2", |
|
button_primary_text_color="#FFFFFF", |
|
button_secondary_background_fill="#F5D0A9", |
|
button_secondary_background_fill_hover="#F5D0A9", |
|
button_secondary_text_color="#4A4A4A", |
|
block_border_width="1px", |
|
block_border_color="#E0E0E0", |
|
) |
|
|
|
|
|
iface = gr.ChatInterface( |
|
chat_with_assistant, |
|
chatbot=gr.Chatbot(height=500), |
|
textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7), |
|
|
|
|
|
title="Claude Custom Assistant", |
|
description="Chat with an AI assistant powered by Claude 3.5 Sonnet, customs instructions, and a reference document", |
|
theme=anthropic_theme, |
|
|
|
|
|
examples=["What are the key principles of instructional design?", "What might be barriers to learning?"], |
|
cache_examples=True, |
|
retry_btn=None, |
|
undo_btn="Delete Previous", |
|
clear_btn="Clear", |
|
) |
|
|
|
iface.launch(auth=(username, password)) |