Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ from langchain_community.vectorstores import FAISS
|
|
11 |
from langchain_community.document_loaders import PyPDFLoader
|
12 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
13 |
from llama_parse import LlamaParse
|
|
|
14 |
|
15 |
# Environment variables and configurations
|
16 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
@@ -25,7 +26,7 @@ llama_parser = LlamaParse(
|
|
25 |
language="en",
|
26 |
)
|
27 |
|
28 |
-
def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[
|
29 |
"""Loads and splits the document into pages."""
|
30 |
if parser == "pypdf":
|
31 |
loader = PyPDFLoader(file.name)
|
@@ -33,7 +34,7 @@ def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[dict]
|
|
33 |
elif parser == "llamaparse":
|
34 |
try:
|
35 |
documents = llama_parser.load_data(file.name)
|
36 |
-
return [
|
37 |
except Exception as e:
|
38 |
print(f"Error using Llama Parse: {str(e)}")
|
39 |
print("Falling back to PyPDF parser")
|
|
|
11 |
from langchain_community.document_loaders import PyPDFLoader
|
12 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
13 |
from llama_parse import LlamaParse
|
14 |
+
from langchain_core.documents import Document
|
15 |
|
16 |
# Environment variables and configurations
|
17 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
26 |
language="en",
|
27 |
)
|
28 |
|
29 |
+
def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[Document]:
|
30 |
"""Loads and splits the document into pages."""
|
31 |
if parser == "pypdf":
|
32 |
loader = PyPDFLoader(file.name)
|
|
|
34 |
elif parser == "llamaparse":
|
35 |
try:
|
36 |
documents = llama_parser.load_data(file.name)
|
37 |
+
return [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
|
38 |
except Exception as e:
|
39 |
print(f"Error using Llama Parse: {str(e)}")
|
40 |
print("Falling back to PyPDF parser")
|