red1xe commited on
Commit
a942c83
1 Parent(s): 49e9591

embeddings

Browse files
Files changed (1) hide show
  1. app.py +5 -108
app.py CHANGED
@@ -1,110 +1,7 @@
1
- import os
2
- import time
3
- import streamlit as st
4
- from htmlTemplates import css, bot_template, user_template
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.vectorstores import Chroma
7
- from langchain.memory import ConversationBufferMemory
8
- from langchain.chains import RetrievalQA
9
- from pdfminer.high_level import extract_text
10
- from langchain.text_splitter import RecursiveCharacterTextSplitter
11
- from transformers import AutoTokenizer, AutoModelForCausalLM
12
 
13
-
14
- # Updated Prompt Template
15
- persist_directory = 'db'
16
- embeddings_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
17
-
18
- def get_pdf_text(pdf_path):
19
- return extract_text(pdf_path)
20
-
21
- def get_pdf_text_chunks(pdf_text):
22
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
23
- return text_splitter.split_text(text=pdf_text)
24
-
25
- def create_vector_store(target_source_chunks):
26
- embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
27
- db = Chroma.from_texts(texts=target_source_chunks, persist_directory=persist_directory, embedding=embeddings)
28
- db.persist()
29
- return db
30
-
31
- def get_vector_store(target_source_chunks):
32
- embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
33
- db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
34
- retriver = db.as_retriever(search_kwargs={"k": target_source_chunks})
35
- return retriver
36
-
37
- def get_conversation_chain(retriever):
38
- tokenizer = AutoTokenizer.from_pretrained("TinyPixel/Llama-2-7B-bf16-sharded")
39
- model = AutoModelForCausalLM.from_pretrained("TinyPixel/Llama-2-7B-bf16-sharded")
40
- memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True,)
41
- chain = RetrievalQA.from_llm(
42
- llm=model,
43
- memory=memory,
44
- retriever=retriever,
45
- )
46
- return chain
47
-
48
-
49
- def handle_userinput(user_question):
50
- if st.session_state.conversation is None:
51
- st.warning("Please load the Vectorstore first!")
52
- return
53
- else:
54
- with st.spinner('Thinking...', ):
55
- start_time = time.time()
56
- response = st.session_state.conversation({'query': user_question})
57
- end_time = time.time()
58
-
59
- st.session_state.chat_history = response['chat_history']
60
-
61
- for i, message in enumerate(st.session_state.chat_history):
62
- if i % 2 == 0:
63
- st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
64
- else:
65
- st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
66
-
67
- st.write('Elapsed time: {:.2f} seconds'.format(end_time - start_time))
68
- st.balloons()
69
-
70
-
71
-
72
-
73
- def main():
74
-
75
- st.set_page_config(page_title='Java Copilot :coffee:', page_icon=':rocket:', layout='wide', )
76
- with st.sidebar.title(':gear: Parameters'):
77
- model_n_ctx = st.sidebar.slider('Model N_CTX', min_value=128, max_value=2048, value=1024, step=2)
78
- model_n_batch = st.sidebar.slider('Model N_BATCH', min_value=1, max_value=model_n_ctx, value=512, step=2)
79
- target_source_chunks = st.sidebar.slider('Target Source Chunks', min_value=1, max_value=10, value=4, step=1)
80
- st.write(css, unsafe_allow_html=True)
81
-
82
- if "conversation" not in st.session_state:
83
- st.session_state.conversation = None
84
- if "chat_history" not in st.session_state:
85
- st.session_state.chat_history = None
86
-
87
- st.header('Java Copilot :coffee:')
88
- st.subheader('Upload your PDF file and start chatting with it!')
89
- user_question = st.text_input('Enter your message here:')
90
- pdf_file = st.file_uploader("Upload PDF", type=['pdf'])
91
- if st.button('Start Chain'):
92
- if pdf_file is not None:
93
- with st.spinner('Working in progress ...'):
94
- pdf_text = get_pdf_text(pdf_file)
95
- pdf_text_chunks = get_pdf_text_chunks(pdf_text)
96
- st.session_state.vector_store = create_vector_store(pdf_text_chunks)
97
- st.session_state.conversation = get_conversation_chain(
98
- retriever=st.session_state.vector_store,
99
- )
100
- st.success('Vectorstore created successfully! You can start chatting now!')
101
- else:
102
- st.warning('Please upload a PDF file first!')
103
-
104
-
105
- if user_question:
106
- handle_userinput(user_question)
107
-
108
-
109
- if __name__ == '__main__':
110
- main()
 
1
+ import Streamlit as st
 
 
 
2
  from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.vectorstores import FAISS
 
 
 
 
 
4
 
5
+ st.title("Embedding Creation for Langchain")
6
+ st.header("This is a header")
7
+ files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")