File size: 3,763 Bytes
fc77a86
79bf4a0
fc77a86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4505d5
 
a520d79
fc77a86
 
c0c7ec4
fc77a86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e189b1a
fc77a86
 
48ee1eb
fc77a86
48849d1
51c1690
 
 
 
 
 
48849d1
fc77a86
 
 
 
 
51c1690
48ee1eb
fc77a86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# AI MAKERSPACE PREPR 
# Date: 2024-5-16 

# Basic Imports & Setup
import os
from openai import AsyncOpenAI

# Using Chainlit for our UI
import chainlit as cl
from chainlit.prompt import Prompt, PromptMessage
from chainlit.playground.providers import ChatOpenAI

# Getting the API key from the .env file
from dotenv import load_dotenv
load_dotenv()

# RAG pipeline imports and setup code
# Get the DeveloperWeek PDF file (future implementation: direct download from URL)
from langchain.document_loaders import PyMuPDFLoader

# Adjust the URL to the direct download format
file_id = "1UQnaQjBKKyWAiLdr6UlwSJovOp9zDdxr"
#file_id = "1JeA-w4kvbI3GHk9Dh_j19_Q0JUDE7hse"
# file_id = "12cvKg19CJf-wt98q5sPJctjp5fW-nsh6"  //Used for MLOps Meetup
direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"


# Now load the document using the direct URL
docs = PyMuPDFLoader(direct_url).load()

import tiktoken
def tiktoken_len(text):
    tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
        text,
    )
    return len(tokens)

# Split the document into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,           # 500 tokens per chunk, experiment with this value
    chunk_overlap = 50,        # 50 tokens overlap between chunks, experiment with this value
    length_function = tiktoken_len,
)

split_chunks = text_splitter.split_documents(docs)

# Load the embeddings model
from langchain_openai.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

# Load the vector store and retriever from Qdrant
from langchain_community.vectorstores import Qdrant

qdrant_vectorstore = Qdrant.from_documents(
    split_chunks,
    embedding_model,
    location=":memory:",
    collection_name="Prepr",
)

qdrant_retriever = qdrant_vectorstore.as_retriever()

from langchain_openai import ChatOpenAI
openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")

from langchain_core.prompts import ChatPromptTemplate

RAG_PROMPT = """
SYSTEM:
You are a professional personal assistant who can answer only 4 questions:
1) What is email, you respond: Email Answer.
2) What is research, you respond: Research Answer.
3) What is lecture, you rspond: Lecture Answer.
4) What is grading, you respond: Grading Answer.


CONTEXT:
{context}

QUERY:
{question}
ALL ANWSERS ARE Email Answer, Research Answer, Lecture Answer, or Grading Answer.  If you can not answer the question, you respond: Insufficent Data for Further Analysis
"""
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
)

# Chainlit App
@cl.on_chat_start
async def start_chat():
    settings = {
        "model": "gpt-3.5-turbo",
        "temperature": 0,
        "max_tokens": 500,
        "top_p": 1,
        "frequency_penalty": 0,
        "presence_penalty": 0,
    }
    cl.user_session.set("settings", settings)

@cl.on_message
async def main(message: cl.Message):
    chainlit_question = message.content
    #chainlit_question = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
    response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
    chainlit_answer = response["response"].content

    msg = cl.Message(content=chainlit_answer)
    await msg.send()