Spaces:

mushroomsolutions
/

Image_Annotation

Runtime error

App Files Files Community

Image_Annotation / app.py

srinivas-mushroom

Update app.py

f33afb3 almost 2 years ago

raw

history blame contribute delete

1.83 kB

	import gradio as gr
	import PyPDF2
	import io
	import requests
	import torch
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering

	# Download and load pre-trained model and tokenizer
	model_name = "distilbert-base-cased-distilled-squad"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForQuestionAnswering.from_pretrained(model_name)

	# Define a list of pre-defined questions
	predefined_questions = [
	"What is the purpose of this document?",
	"What is the main topic of the document?",
	"Who is the target audience?",
	"What is the author's main argument?",
	"What is the conclusion of the document?",
	]

	def answer_questions(pdf_file, question):
	# Load PDF file and extract text
	pdf_reader = PyPDF2.PdfFileReader(io.BytesIO(pdf_file.read()))
	text = ""
	for i in range(pdf_reader.getNumPages()):
	page = pdf_reader.getPage(i)
	text += page.extractText()
	text = text.strip()

	# Tokenize question and text
	input_ids = tokenizer.encode(question, text)

	# Perform question answering
	outputs = model(torch.tensor([input_ids]), return_dict=True)
	answer_start = outputs.start_logits.argmax().item()
	answer_end = outputs.end_logits.argmax().item()
	answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1]))

	return answer

	inputs = [
	gr.inputs.File(label="PDF document"),
	gr.inputs.Dropdown(label="Question", choices=predefined_questions),
	]

	outputs = gr.outputs.Textbox(label="Answer")

	gr.Interface(fn=answer_questions, inputs=inputs, outputs=outputs, title="PDF Question Answering Tool",
	description="Upload a PDF document and select a question from the dropdown. The app will use a pre-trained model to find the answer.").launch()