Spaces:
Runtime error
Runtime error
from haystack.nodes import PreProcessor, PDFToTextConverter, EmbeddingRetriever, TransformersReader | |
from haystack.document_stores import InMemoryDocumentStore | |
from haystack.pipelines import DocumentSearchPipeline, ExtractiveQAPipeline | |
import gradio as gr | |
preprocessor = PreProcessor( | |
clean_empty_lines=True, | |
clean_whitespace=True, | |
clean_header_footer=True, | |
split_by="word", | |
split_length=100, | |
split_respect_sentence_boundary=True, | |
split_overlap=3 | |
) | |
document_store = InMemoryDocumentStore(embedding_dim=384) | |
reader = TransformersReader("sentence-transformers/all-MiniLM-L6-v2") | |
retriever = EmbeddingRetriever(document_store=document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2") | |
pipeline = ExtractiveQAPipeline(reader, retriever) | |
converter = PDFToTextConverter(remove_numeric_tables=True) | |
def print_answers(results): | |
fields = ["answer", "score"] # "context" | |
answers = results["answers"] | |
filtered_answers = [] | |
for ans in answers: | |
filtered_ans = { | |
field: getattr(ans, field) for field in fields if getattr(ans, field) is not None | |
} | |
filtered_answers.append(filtered_ans) | |
return filtered_answers | |
def write_pdf(pdf_file): | |
document = converter.convert(file_path=pdf_file.name, meta=None)[0] | |
preprocessed_docs = preprocessor.process(document) | |
document_store.write_documents(preprocessed_docs) | |
document_store.update_embeddings(retriever) | |
def predict(question, pdf_file): | |
write_pdf(pdf_file) | |
result = pipeline.run(query=question, params={"Retriever": { "top_k": 2 }}) | |
answers = print_answers(result) | |
return answers | |
interface = gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.components.Textbox(lines = 1, label="Enter your search query here..."), | |
gr.components.File(file_count="single", type="file", label="Upload a file here.") | |
], | |
outputs="text", | |
title="Search", | |
interpretation=None, | |
theme="default" # “default", “huggingface", “dark-grass", “peach" | |
) | |
interface.launch() | |