Spaces:
Sleeping
Sleeping
import torch | |
import gradio as gr | |
from transformers import pipeline | |
from PyPDF2 import PdfReader # Alternative for PDF handling | |
from docx import Document # For handling .docx files | |
model_path = ("../Models/models--deepset--roberta-base-squad2/snapshots" | |
"/cbf50ba81465d4d8676b8bab348e31835147541b") | |
question_answer = pipeline("question-answering", | |
model="deepset/roberta-base-squad2") | |
def read_file_content(file_obj): | |
try: | |
# Determine the file extension | |
file_extension = file_obj.name.split('.')[-1].lower() | |
if file_extension == 'txt': | |
# Reading text files | |
with open(file_obj.name, 'r', encoding='utf-8') as file: | |
context = file.read() | |
elif file_extension == 'pdf': | |
# Reading PDF files using PyPDF2 | |
reader = PdfReader(file_obj.name) | |
context = "" | |
for page in reader.pages: | |
context += page.extract_text() | |
elif file_extension == 'docx': | |
# Reading Word documents using python-docx | |
doc = Document(file_obj.name) | |
context = "\n".join([para.text for para in doc.paragraphs]) | |
else: | |
return "Unsupported file format. Please upload a .txt, .pdf, or .docx file." | |
return context | |
except Exception as e: | |
return f"An error occurred: {e}" | |
def get_answer(file, question): | |
context = read_file_content(file) | |
if "An error occurred" in context or "Unsupported" in context: | |
return context # Return error message directly if present | |
answer = question_answer(question=question, context=context) | |
return answer["answer"] | |
demo = gr.Interface( | |
fn=get_answer, | |
inputs=[ | |
gr.File(label="Upload your file"), | |
gr.Textbox(label="Input your question", lines=1) | |
], | |
outputs=[gr.Textbox(label="Answer text", lines=1)], | |
title="Explore Documents", | |
description="THIS APPLICATION WILL BE USED TO ANSWER QUESTIONS BASED ON CONTEXT PROVIDED." | |
) | |
demo.launch() | |