File size: 3,331 Bytes
892f4c0 76c1eb0 892f4c0 4b62a43 892f4c0 13977d9 79ab819 2ca54f8 79ab819 76c1eb0 79ab819 2ca54f8 79ab819 4b62a43 79ab819 4b62a43 79ab819 76c1eb0 79ab819 76c1eb0 79ab819 2ca54f8 79ab819 f1a21dd 79ab819 2ca54f8 52b4bc3 aea3c48 13977d9 892f4c0 13977d9 892f4c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import streamlit as st
import os
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.vectorstores import Qdrant
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import TextLoader
from tempfile import NamedTemporaryFile
import re
def main():
# Initialize the Streamlit app
st.title('Dokument-basiertes Q&A System')
# API Key input securely, API KEY defined in settings
# api_key = st.text_input("Enter your OpenAI API key:", type="password")
# if api_key:
# os.environ["OPENAI_API_KEY"] = api_key
# st.success("API Key has been set!")
# File uploader
uploaded_file = st.file_uploader("Dokument hochladen", type=['txt'])
if uploaded_file is not None:
# Read and process the document
with NamedTemporaryFile(delete=False) as f:
f.write(uploaded_file.getbuffer())
loader = TextLoader(f.name, encoding="utf-8")
data = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
data = text_splitter.split_documents(data)
# Create vector store
embeddings = OpenAIEmbeddings()
#vectorstore = FAISS.from_documents(data, embedding=embeddings)
vectorstore = Qdrant.from_documents(
data,
embeddings,
location=":memory:", # Local mode with in-memory storage only
collection_name="my_documents",
)
# Create conversation chain
llm = ChatOpenAI(temperature=0.3, model_name="gpt-4-turbo")
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True, output_key='answer')
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(),
memory=memory,
return_source_documents=True
)
# Question input
query = st.text_input("Frag deinen Dokumenten!")
if query:
systemquery = "You are a fraud analyst. You must help your colleague to answer the question below. Do not hallucinate. Provide all the relevant legal text. Answer in German"
result = conversation_chain({"question": systemquery +"\n\n"+query})
answer = result["answer"]
st.write("Antwort:", answer)
st.write("Quellen:")
for i in result["source_documents"]:
res = re.search(r'^[^\n]*', i.page_content)
st.write(i.page_content[res.span()[0]:res.span()[1]])
# Optionally display source text snippets
# if st.checkbox("Show source text snippets"):
# st.write("Source documents:")
# for i in result["source_documents"]:
# res = re.search(r'^[^\n]*', i.page_content)
# st.write(i.page_content[res.span()[0]:res.span()[1]])
if __name__ == "__main__":
main()
# Initialize the Streamlit app
# st.title('Document-Based Q&A System')
|