XThomasBU
commited on
Commit
·
49140fa
1
Parent(s):
4265034
reverted simplistic check for informative changes
Browse files
code/modules/dataloader/data_loader.py
CHANGED
@@ -223,8 +223,8 @@ class ChunkProcessor:
|
|
223 |
file_metadata = {}
|
224 |
|
225 |
for doc in documents:
|
226 |
-
if len(doc.page_content) <= 400:
|
227 |
-
|
228 |
|
229 |
page_num = doc.metadata.get("page", 0)
|
230 |
file_data[page_num] = doc.page_content
|
|
|
223 |
file_metadata = {}
|
224 |
|
225 |
for doc in documents:
|
226 |
+
# if len(doc.page_content) <= 400: # better approach to filter out non-informative documents
|
227 |
+
# continue
|
228 |
|
229 |
page_num = doc.metadata.get("page", 0)
|
230 |
file_data[page_num] = doc.page_content
|