Spaces:
Runtime error
Runtime error
import streamlit as st | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import PyPDFLoader | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
from transformers import pipeline | |
import torch | |
import base64 | |
import time | |
from PIL import Image | |
st.image("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png") | |
#MODEL AND TOKENIZER | |
model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M" | |
model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint) | |
model = T5ForConditionalGeneration.from_pretrained(model_checkpoint) | |
#FILE LOADER AND PREPROCESSING | |
def preprocess_pdf(file): | |
loader = PyPDFLoader(file) | |
pages = loader.load_and_split() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70) | |
texts = text_splitter.split_documents(pages) | |
final_text = "" | |
for text in texts: | |
final_text = final_text + text.page_content | |
return final_text | |
#LLM PIPELINE | |
def language_model_pipeline(filepath): | |
summarization_pipeline = pipeline( | |
'summarization', | |
model = model, | |
tokenizer = model_tokenizer, | |
max_length = 500, | |
min_length = 32 | |
) | |
input_text = preprocess_pdf(filepath) | |
summary_result = summarization_pipeline(input_text) | |
summarized_text = summary_result[0]['summary_text'] | |
return summarized_text | |
title = st.title("PDF Summarization using LaMini") | |
uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf']) | |
if uploaded_file is not None: | |
st.success("File Uploaded") | |
if st.button ("Summarize"): | |
time.sleep(10) | |
filepath = uploaded_file.name | |
with open(filepath, "wb") as temp_file: | |
temp_file.write(uploaded_file.read()) | |
summarized_result = language_model_pipeline(filepath) | |
st.info("Summarization Complete") | |
st.success(summarized_result) |