summarizer / app.py
arithescientist's picture
Update app.py
ad63b2b
raw
history blame
1.21 kB
import gradio as gr
import numpy as np
import pytesseract as pt
import pdf2image
from fpdf import FPDF
import re
import nltk
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
import os
import pdfkit
import yake
from summarizer import Summarizer,TransformerSummarizer
from transformers import pipelines
nltk.download('punkt')
from transformers import AutoTokenizer, AutoModelForPreTraining, AutoConfig, AutoModel
# model_name = 'distilbert-base-uncased'
model_name = 'nlpaueb/legal-bert-base-uncased'
#model_name = 'laxya007/gpt2_legal'
# model_name = 'facebook/bart-large-cnn'
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("laxya007/gpt2_BSA_Legal_Initiproject_OE_OS_BRM")
model = AutoModelForCausalLM.from_pretrained("laxya007/gpt2_BSA_Legal_Initiproject_OE_OS_BRM")
bert_legal_model = Summarizer(custom_model= model, custom_tokenizer= tokenizer)
print('Using model {}\n'.format(model_name))
def lincoln(input_text):
output_text= bert_legal_model(input_text, min_length = 8, ratio = 0.05)
iface = gr.Interface(
lincoln,
"text",
"text"
)
if __name__ == "__main__":
iface.launch(share=False)