File size: 2,605 Bytes
892f4c0
 
 
 
 
 
 
 
 
4b62a43
 
 
892f4c0
13977d9
79ab819
 
 
 
 
 
 
 
 
 
 
 
 
4b62a43
 
 
 
 
 
79ab819
4b62a43
79ab819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892f4c0
79ab819
 
 
 
 
 
13977d9
 
 
 
892f4c0
13977d9
892f4c0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import os
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import TextLoader
from tempfile import NamedTemporaryFile

import re
def main():
# Initialize the Streamlit app
    st.title('Document-Based Q&A System')

# API Key input securely
    api_key = st.text_input("Enter your OpenAI API key:", type="password")
    if api_key:
        os.environ["OPENAI_API_KEY"] = api_key
        st.success("API Key has been set!")

# File uploader
    uploaded_file = st.file_uploader("Upload your document", type=['txt'])
    if uploaded_file is not None:
# Read and process the document
        with NamedTemporaryFile(delete=False) as f:
            f.write(uploaded_file.getbuffer())
            loader =  TextLoader(f.name, encoding="utf-8")
            data = loader.load()

        
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        data = text_splitter.split_documents(data)

    # Create vector store
        embeddings = OpenAIEmbeddings()
        vectorstore = FAISS.from_documents(data, embedding=embeddings)

    # Create conversation chain
        llm = ChatOpenAI(temperature=0.3, model_name="gpt-4-turbo")
        memory = ConversationBufferMemory(
            memory_key='chat_history', return_messages=True, output_key='answer')
        conversation_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(),
            memory=memory,
            return_source_documents=True
        )

    # Question input
        query = st.text_input("Ask a question about the document:")
        if query:
            result = conversation_chain({"question": query})
            answer = result["answer"]
            st.write("Answer:", answer)

        # Optionally display source text snippets
            if st.checkbox("Show source text snippets"):
                st.write("Source documents:")
                for i in result["source_documents"]:
                    res = re.search(r'^[^\n]*', i.page_content)
                    st.write(i.page_content[res.span()[0]:res.span()[1]])


if __name__ == "__main__":
    main()
# Initialize the Streamlit app
# st.title('Document-Based Q&A System')