Priyanka-Balivada commited on
Commit
6cc785b
β€’
1 Parent(s): 73d84aa

Upload 2 files

Browse files
Files changed (2) hide show
  1. docapp.py +72 -0
  2. requirements.txt +81 -0
docapp.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import CharacterTextSplitter
3
+ from langchain.docstore.document import Document
4
+ from langchain.chains.summarize import load_summarize_chain
5
+ from langchain_community.llms import CTransformers
6
+ from langchain.callbacks.manager import CallbackManager
7
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
+ from pypdf import PdfReader
9
+
10
+ # Page title
11
+ st.set_page_config(page_title='πŸ¦œπŸ”— Text Summarization App')
12
+ st.title('πŸ¦œπŸ”— Text Summarization App')
13
+
14
+ # Function to read all PDF files and return text
15
+ def get_pdf_text(pdf_docs):
16
+ text = ""
17
+ for pdf in pdf_docs:
18
+ pdf_reader = PdfReader(pdf)
19
+ for page in pdf_reader.pages:
20
+ text += page.extract_text()
21
+ return text
22
+
23
+ # Function to split the text into smaller chunks and convert it into document format
24
+ def chunks_and_document(txt):
25
+ text_splitter = CharacterTextSplitter()
26
+ texts = text_splitter.split_text(txt)
27
+ docs = [Document(page_content=t) for t in texts]
28
+ return docs
29
+
30
+ # Loading the Llama 2's LLM
31
+ def load_llm():
32
+ # We instantiate the callback with a streaming stdout handler
33
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
34
+
35
+ # Loading the LLM model
36
+ llm = CTransformers(
37
+ model="llama-2-7b-chat.ggmlv3.q2_K.bin",
38
+ model_type="llama",
39
+ config={'max_new_tokens': 600,
40
+ 'temperature': 0.5,
41
+ 'context_length': 700}
42
+ )
43
+
44
+ return llm
45
+
46
+ # Function to apply the LLM model with our document
47
+ def chains_and_response(docs):
48
+ llm = load_llm()
49
+ chain = load_summarize_chain(llm, chain_type='map_reduce')
50
+ return chain.invoke(docs)
51
+
52
+ def main():
53
+ # Initialize messages if not already present
54
+ if "messages" not in st.session_state.keys():
55
+ st.session_state.messages = []
56
+
57
+ # Sidebar for uploading PDF files
58
+ with st.sidebar:
59
+ st.title("Menu:")
60
+ pdf_docs = st.file_uploader(
61
+ "Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True
62
+ )
63
+ if st.button("Submit & Process"):
64
+ with st.spinner("Processing..."):
65
+ txt_input = get_pdf_text(pdf_docs)
66
+ docs = chunks_and_document(txt_input)
67
+ response = chains_and_response(docs)
68
+ st.title('πŸ“βœ… Summarization Result')
69
+ for res in response:
70
+ st.info(res)
71
+
72
+ main()
requirements.txt ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.3
2
+ aiosignal==1.3.1
3
+ altair==5.2.0
4
+ annotated-types==0.6.0
5
+ anyio==4.2.0
6
+ async-timeout==4.0.3
7
+ attrs==23.2.0
8
+ blinker==1.7.0
9
+ cachetools==5.3.2
10
+ certifi==2024.2.2
11
+ charset-normalizer==3.3.2
12
+ click==8.1.7
13
+ colorama==0.4.6
14
+ ctransformers==0.2.27
15
+ dataclasses-json==0.6.4
16
+ exceptiongroup==1.2.0
17
+ filelock==3.13.1
18
+ frozenlist==1.4.1
19
+ fsspec==2024.2.0
20
+ gitdb==4.0.11
21
+ GitPython==3.1.41
22
+ greenlet==3.0.3
23
+ huggingface-hub==0.20.3
24
+ idna==3.6
25
+ importlib-metadata==7.0.1
26
+ Jinja2==3.1.3
27
+ jsonpatch==1.33
28
+ jsonpointer==2.4
29
+ jsonschema==4.21.1
30
+ jsonschema-specifications==2023.12.1
31
+ langchain==0.1.6
32
+ langchain-community==0.0.19
33
+ langchain-core==0.1.22
34
+ langsmith==0.0.87
35
+ markdown-it-py==3.0.0
36
+ MarkupSafe==2.1.5
37
+ marshmallow==3.20.2
38
+ mdurl==0.1.2
39
+ multidict==6.0.5
40
+ mypy-extensions==1.0.0
41
+ numpy==1.26.4
42
+ packaging==23.2
43
+ pandas==2.2.0
44
+ pillow==10.2.0
45
+ protobuf==4.25.2
46
+ py-cpuinfo==9.0.0
47
+ pyarrow==15.0.0
48
+ pydantic==2.6.1
49
+ pydantic_core==2.16.2
50
+ pydeck==0.8.1b0
51
+ Pygments==2.17.2
52
+ python-dateutil==2.8.2
53
+ pytz==2024.1
54
+ PyYAML==6.0.1
55
+ referencing==0.33.0
56
+ regex==2023.12.25
57
+ requests==2.31.0
58
+ rich==13.7.0
59
+ rpds-py==0.17.1
60
+ safetensors==0.4.2
61
+ six==1.16.0
62
+ smmap==5.0.1
63
+ sniffio==1.3.0
64
+ SQLAlchemy==2.0.25
65
+ streamlit==1.31.0
66
+ tenacity==8.2.3
67
+ tokenizers==0.15.1
68
+ toml==0.10.2
69
+ toolz==0.12.1
70
+ tornado==6.4
71
+ tqdm==4.66.2
72
+ transformers==4.37.2
73
+ typing-inspect==0.9.0
74
+ typing_extensions==4.9.0
75
+ tzdata==2023.4
76
+ tzlocal==5.2
77
+ urllib3==2.2.0
78
+ validators==0.22.0
79
+ watchdog==4.0.0
80
+ yarl==1.9.4
81
+ zipp==3.17.0