Browse files
@@ -1,110 +1,7 @@
1 |
2 |
import time
3 |
import streamlit as st
4 |
from htmlTemplates import css, bot_template, user_template
5 |
from langchain.embeddings import HuggingFaceEmbeddings
6 |
from langchain.vectorstores import
7 |
from langchain.memory import ConversationBufferMemory
8 |
from langchain.chains import RetrievalQA
9 |
from pdfminer.high_level import extract_text
10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
11 |
from transformers import AutoTokenizer, AutoModelForCausalLM
12 |
13 |
14 |
15 |
16 |
embeddings_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
17 |
18 |
def get_pdf_text(pdf_path):
19 |
return extract_text(pdf_path)
20 |
21 |
def get_pdf_text_chunks(pdf_text):
22 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
23 |
return text_splitter.split_text(text=pdf_text)
24 |
25 |
def create_vector_store(target_source_chunks):
26 |
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
27 |
db = Chroma.from_texts(texts=target_source_chunks, persist_directory=persist_directory, embedding=embeddings)
28 |
29 |
return db
30 |
31 |
def get_vector_store(target_source_chunks):
32 |
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
33 |
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
34 |
retriver = db.as_retriever(search_kwargs={"k": target_source_chunks})
35 |
return retriver
36 |
37 |
def get_conversation_chain(retriever):
38 |
tokenizer = AutoTokenizer.from_pretrained("TinyPixel/Llama-2-7B-bf16-sharded")
39 |
model = AutoModelForCausalLM.from_pretrained("TinyPixel/Llama-2-7B-bf16-sharded")
40 |
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True,)
41 |
chain = RetrievalQA.from_llm(
42 |
43 |
44 |
45 |
46 |
return chain
47 |
48 |
49 |
def handle_userinput(user_question):
50 |
if st.session_state.conversation is None:
51 |
st.warning("Please load the Vectorstore first!")
52 |
53 |
54 |
with st.spinner('Thinking...', ):
55 |
start_time = time.time()
56 |
response = st.session_state.conversation({'query': user_question})
57 |
end_time = time.time()
58 |
59 |
st.session_state.chat_history = response['chat_history']
60 |
61 |
for i, message in enumerate(st.session_state.chat_history):
62 |
if i % 2 == 0:
63 |
st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
64 |
65 |
st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
66 |
67 |
st.write('Elapsed time: {:.2f} seconds'.format(end_time - start_time))
68 |
69 |
70 |
71 |
72 |
73 |
def main():
74 |
75 |
st.set_page_config(page_title='Java Copilot :coffee:', page_icon=':rocket:', layout='wide', )
76 |
with st.sidebar.title(':gear: Parameters'):
77 |
model_n_ctx = st.sidebar.slider('Model N_CTX', min_value=128, max_value=2048, value=1024, step=2)
78 |
model_n_batch = st.sidebar.slider('Model N_BATCH', min_value=1, max_value=model_n_ctx, value=512, step=2)
79 |
target_source_chunks = st.sidebar.slider('Target Source Chunks', min_value=1, max_value=10, value=4, step=1)
80 |
st.write(css, unsafe_allow_html=True)
81 |
82 |
if "conversation" not in st.session_state:
83 |
st.session_state.conversation = None
84 |
if "chat_history" not in st.session_state:
85 |
st.session_state.chat_history = None
86 |
87 |
st.header('Java Copilot :coffee:')
88 |
st.subheader('Upload your PDF file and start chatting with it!')
89 |
user_question = st.text_input('Enter your message here:')
90 |
pdf_file = st.file_uploader("Upload PDF", type=['pdf'])
91 |
if st.button('Start Chain'):
92 |
if pdf_file is not None:
93 |
with st.spinner('Working in progress ...'):
94 |
pdf_text = get_pdf_text(pdf_file)
95 |
pdf_text_chunks = get_pdf_text_chunks(pdf_text)
96 |
st.session_state.vector_store = create_vector_store(pdf_text_chunks)
97 |
st.session_state.conversation = get_conversation_chain(
98 |
99 |
100 |
st.success('Vectorstore created successfully! You can start chatting now!')
101 |
102 |
st.warning('Please upload a PDF file first!')
103 |
104 |
105 |
if user_question:
106 |
107 |
108 |
109 |
if __name__ == '__main__':
110 |
1 |
import Streamlit as st
2 |
from langchain.embeddings import HuggingFaceEmbeddings
3 |
from langchain.vectorstores import FAISS
4 |
5 |
st.title("Embedding Creation for Langchain")
6 |
st.header("This is a header")
7 |
files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")