prepr /
MikeCraBash's picture
86bc341 verified
history blame
4.21 kB
# Date: 2024-5-16
# Basic Imports & Setup
import os
from openai import AsyncOpenAI
# Using Chainlit for our UI
import chainlit as cl
from chainlit.prompt import Prompt, PromptMessage
from chainlit.playground.providers import ChatOpenAI
# Getting the API key from the .env file
from dotenv import load_dotenv
# RAG pipeline imports and setup code
# Get the DeveloperWeek PDF file (future implementation: direct download from URL)
from langchain.document_loaders import PyMuPDFLoader
# Adjust the URL to the direct download format
file_id = "1UQnaQjBKKyWAiLdr6UlwSJovOp9zDdxr"
#file_id = "1JeA-w4kvbI3GHk9Dh_j19_Q0JUDE7hse"
# file_id = "12cvKg19CJf-wt98q5sPJctjp5fW-nsh6" //Used for MLOps Meetup
direct_url = f"{file_id}"
# Now load the document using the direct URL
docs = PyMuPDFLoader(direct_url).load()
import tiktoken
def tiktoken_len(text):
tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
return len(tokens)
# Split the document into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 500, # 500 tokens per chunk, experiment with this value
chunk_overlap = 50, # 50 tokens overlap between chunks, experiment with this value
length_function = tiktoken_len,
split_chunks = text_splitter.split_documents(docs)
# Load the embeddings model
from langchain_openai.embeddings import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
# Load the vector store and retriever from Qdrant
from langchain_community.vectorstores import Qdrant
qdrant_vectorstore = Qdrant.from_documents(
qdrant_retriever = qdrant_vectorstore.as_retriever()
from langchain_openai import ChatOpenAI
openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
from langchain_core.prompts import ChatPromptTemplate
You are a professional personal assistant who can answer only 4 request:
1) Please take care of Dr. Algorithm’s Email
2) What is research
3) What is lecture
4) What is grading
ANSWER THESE REQUEST in the format shown below:
REQUEST 1) When asked: "Please take care of Dr. Algorithm’s Email", you respond:
You have 3 emails from Dr. Tba requiring your immediate attention.
Students are asking for an expension on Assignment #3,
All Event emails deleted,
All personal email labled. Have a nice day.
Is there anything else I can help you with?"
REQUEST 2) When asked: What is research, you respond:
Research Answer.
REQUEST 3) When asked: What is lecture, you rspond:
Lecture Answer.
REQUEST 4) When asked: What is grading, you respond:
Grading Answer.
If you can not answer the question, you respond: Insufficent Data for Further Analysis
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
retrieval_augmented_qa_chain = (
{"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
| RunnablePassthrough.assign(context=itemgetter("context"))
| {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
# Chainlit App
async def start_chat():
settings = {
"model": "gpt-3.5-turbo",
"temperature": 0,
"max_tokens": 500,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
cl.user_session.set("settings", settings)
async def main(message: cl.Message):
chainlit_question = message.content
#chainlit_question = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
chainlit_answer = response["response"].content
msg = cl.Message(content=chainlit_answer)
await msg.send()