wiwi-langing commited on
Commit
208fbeb
1 Parent(s): e8cd84c

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -62
app.py DELETED
@@ -1,62 +0,0 @@
1
- import streamlit as st
2
- from langchain.text_splitter import RecursiveCharacterTextSplitter
3
- from langchain.document_loaders import PyPDFLoader
4
- from transformers import T5Tokenizer, T5ForConditionalGeneration
5
- from transformers import pipeline
6
- import torch
7
- import base64
8
- import time
9
- from PIL import Image
10
-
11
- st.image("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png")
12
-
13
- #MODEL AND TOKENIZER
14
- model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
15
- model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint)
16
- model = T5ForConditionalGeneration.from_pretrained(model_checkpoint)
17
-
18
- #FILE LOADER AND PREPROCESSING
19
- def preprocess_pdf(file):
20
- loader = PyPDFLoader(file)
21
- pages = loader.load_and_split()
22
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70)
23
- texts = text_splitter.split_documents(pages)
24
- final_text = ""
25
- for text in texts:
26
- final_text = final_text + text.page_content
27
- return final_text
28
-
29
- @st.cache_data
30
- #LLM PIPELINE
31
- def language_model_pipeline(filepath):
32
- summarization_pipeline = pipeline(
33
- 'summarization',
34
- model = model,
35
- tokenizer = model_tokenizer,
36
- max_length = 500,
37
- min_length = 32
38
- )
39
- input_text = preprocess_pdf(filepath)
40
- summary_result = summarization_pipeline(input_text)
41
- summarized_text = summary_result[0]['summary_text']
42
- return summarized_text
43
-
44
- title = st.title("PDF Summarization using LaMini")
45
- uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf'])
46
- if uploaded_file is not None:
47
- st.success("File Uploaded")
48
- if st.button ("Summarize"):
49
- time.sleep(10)
50
-
51
- filepath = uploaded_file.name
52
- with open(filepath, "wb") as temp_file:
53
- temp_file.write(uploaded_file.read())
54
-
55
- summarized_result = language_model_pipeline(filepath)
56
- st.info("Summarization Complete")
57
- st.success(summarized_result)
58
-
59
-
60
-
61
-
62
-