nurindahpratiwi commited on
Commit
358d4fa
1 Parent(s): 6780269

first commit

Browse files
Files changed (2) hide show
  1. app.py +54 -0
  2. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
5
+ from transformers import pipeline
6
+ import torch
7
+ import base64
8
+ from PIL import Image
9
+
10
+ st.image("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png")
11
+
12
+ #MODEL AND TOKENIZER
13
+ model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
14
+ model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint)
15
+ model = T5ForConditionalGeneration.from_pretrained(model_checkpoint)
16
+
17
+ #FILE LOADER AND PREPROCESSING
18
+ def preprocess_pdf(file):
19
+ loader = PyPDFLoader(file)
20
+ pages = loader.load_and_split()
21
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70)
22
+ texts = text_splitter.split_documents(pages)
23
+ final_text = ""
24
+ for text in texts:
25
+ final_text = final_text + text.page_content
26
+ return final_text
27
+
28
+ @st.cache_data
29
+ #LLM PIPELINE
30
+ def language_model_pipeline(filepath):
31
+ summarization_pipeline = pipeline(
32
+ 'summarization',
33
+ model = model,
34
+ tokenizer = model_tokenizer,
35
+ max_length = 500,
36
+ min_length = 32
37
+ )
38
+ input_text = preprocess_pdf(filepath)
39
+ summary_result = summarization_pipeline(input_text)
40
+ summarized_text = summary_result[0]['summary_text']
41
+ return summarized_text
42
+
43
+ title = st.title("PDF Summarization using LaMini")
44
+ uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf'])
45
+ if uploaded_file is not None:
46
+ st.success("File Uploaded")
47
+ if st.button ("Summarize"):
48
+ filepath = uploaded_file.name
49
+ with open(filepath, "wb") as temp_file:
50
+ temp_file.write(uploaded_file.read())
51
+
52
+ summarized_result = language_model_pipeline(filepath)
53
+ st.info("Summarization Complete")
54
+ st.success(summarized_result)
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ sentence_transformers
3
+ torch
4
+ sentencepiece
5
+ transformers
6
+ accelerate
7
+ chromadb
8
+ pypdf
9
+ tiktoken
10
+ streamlit
11
+ fastapi
12
+ uvicorn
13
+ python-multipart
14
+ aiofiles