Examine_Docs / app.py
arssite's picture
Update app.py
b5be996 verified
import torch
import gradio as gr
from transformers import pipeline
from PyPDF2 import PdfReader # Alternative for PDF handling
from docx import Document # For handling .docx files
model_path = ("../Models/models--deepset--roberta-base-squad2/snapshots"
"/cbf50ba81465d4d8676b8bab348e31835147541b")
question_answer = pipeline("question-answering",
model="deepset/roberta-base-squad2")
def read_file_content(file_obj):
try:
# Determine the file extension
file_extension = file_obj.name.split('.')[-1].lower()
if file_extension == 'txt':
# Reading text files
with open(file_obj.name, 'r', encoding='utf-8') as file:
context = file.read()
elif file_extension == 'pdf':
# Reading PDF files using PyPDF2
reader = PdfReader(file_obj.name)
context = ""
for page in reader.pages:
context += page.extract_text()
elif file_extension == 'docx':
# Reading Word documents using python-docx
doc = Document(file_obj.name)
context = "\n".join([para.text for para in doc.paragraphs])
else:
return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
return context
except Exception as e:
return f"An error occurred: {e}"
def get_answer(file, question):
context = read_file_content(file)
if "An error occurred" in context or "Unsupported" in context:
return context # Return error message directly if present
answer = question_answer(question=question, context=context)
return answer["answer"]
demo = gr.Interface(
fn=get_answer,
inputs=[
gr.File(label="Upload your file"),
gr.Textbox(label="Input your question", lines=1)
],
outputs=[gr.Textbox(label="Answer text", lines=1)],
title="Explore Documents",
description="THIS APPLICATION WILL BE USED TO ANSWER QUESTIONS BASED ON CONTEXT PROVIDED."
)
demo.launch()