File size: 5,578 Bytes
7f049a0
89aee97
 
e63e1dd
89aee97
 
7f049a0
89aee97
 
 
 
 
 
 
 
 
 
 
50a2078
89aee97
 
 
 
 
 
 
 
ecbe7ae
 
 
 
6e35603
ecbe7ae
89aee97
 
 
 
 
 
 
 
ecbe7ae
89aee97
 
 
 
 
7f049a0
89aee97
3acbbe1
89aee97
 
7f049a0
89aee97
 
 
 
 
 
 
 
 
 
7f049a0
6e35603
 
89aee97
7f049a0
89aee97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f049a0
89aee97
 
 
 
 
 
6e35603
89aee97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# HACK AI MAKERSPACE PREPR 
# Date: 2024-5-16


# Basic Imports & Setup
import os
from transformers import AutoModelForCausalLM, AutoTokenizer

# Using Chainlit for our UI
import chainlit as cl
from chainlit.prompt import Prompt, PromptMessage

# Getting the API key from the .env file
from dotenv import load_dotenv
load_dotenv()

# RAG pipeline imports and setup code
# Get the DeveloperWeek PDF file (future implementation: direct download from URL)
from langchain_community.document_loaders import PyMuPDFLoader

# Adjust the URL to the direct download format
file_id = "1JeA-w4kvbI3GHk9Dh_j19_Q0JUDE7hse"
direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"

# Now load the document using the direct URL
docs = PyMuPDFLoader(direct_url).load()

from transformers import AutoTokenizer

# Function to calculate token length using Hugging Face tokenizer
def hf_token_len(text):
    tokenizer = AutoTokenizer.from_pretrained("solar-1-mini-chat")
    tokens = tokenizer.encode(text)
    return len(tokens)

# Split the document into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,           # 500 tokens per chunk, experiment with this value
    chunk_overlap = 50,        # 50 tokens overlap between chunks, experiment with this value
    length_function = hf_token_len,
)

split_chunks = text_splitter.split_documents(docs)

# Load the embeddings model
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="solar-embedding-1-large")

# Load the vector store and retriever from Qdrant
from langchain.vectorstores import Qdrant

qdrant_vectorstore = Qdrant.from_documents(
    split_chunks,
    embedding_model,
    location=":memory:",
    collection_name="Prepr",
)

qdrant_retriever = qdrant_vectorstore.as_retriever()

# Load the Solar 10.7B model
tokenizer = AutoTokenizer.from_pretrained("solar-1-mini-chat")
model = AutoModelForCausalLM.from_pretrained("solar-1-mini-chat")

from langchain.prompts import ChatPromptTemplate

RAG_PROMPT = """
SYSTEM:
You are a professional personal assistant.
You are a helpful personal assistant who provides information about conferences.
You like to provide helpful responses to busy professionals who ask questions about conferences.

You can have a long conversation with the user about conferences.
When to talk with the user about conferences, it can be a "transactional conversation" with a prompt-response format with one prompt from the user followed by a response by you.

Here is an example of a transactional conversation:
User: When is the conference?
You: The conference is on June 1st, 2024. What else would you like to know?

It can also be a chain of questions and answers where you and the user continues the chain until they say "Got it".
Here is an example of a transactional conversation:
User: What sessions should I attend?
You: You should attend the keynote session by Bono. Would you like to know more?
User: Yes
You: The keynote session by Bono is on June 1st, 2024. What else would you like?

If asked a question about a sessions, you can provide detailed information about the session.
If there are multiple sessions, you can provide information about each session.

The format of session related replies is:
Title:
Description:
Speaker:
Background:
Date:
Topics to Be Covered:
Questions to Ask:

CONTEXT:
{context}

QUERY:
{question}
Most questions are about the date, location, and purpose of the conference.
You may be asked for fine details about the conference regarding the speakers, sponsors, and attendees.
You are capable of looking up information and providing detailed responses.
When asked a question about a conference, you should provide a detailed response.
After completing your response, you should ask the user if they would like to know more about the conference by asking "Hope that helps".
If the user says "yes", you should provide more information about the conference. If the user says "no", you should say "Goodbye! or ask if they would like to provide feedback.
If you are asked a question about Cher, you should respond with "Rock on With Your Bad Self!".
If you can not answer the question, you should say "I am sorry, I do not have that information, but I am always here to help you with any other questions you may have.".
"""
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | model, "context": itemgetter("context")}
)

# Chainlit App
@cl.on_chat_start
async def start_chat():
    settings = {
        "model": "solar-1-mini-chat",
        "temperature": 0,
        "max_tokens": 500,
        "top_p": 1,
        "frequency_penalty": 0,
        "presence_penalty": 0,
    }
    cl.user_session.set("settings", settings)

@cl.on_message
async def main(message: cl.Message):
    chainlit_question = message.content
    #chainlit_question = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
    response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
    chainlit_answer = response["response"].content

    msg = cl.Message(content=chainlit_answer)
    await msg.send()