try to fix the requirement
Browse files
pyproject.toml
CHANGED
@@ -27,7 +27,7 @@ dependencies = [
|
|
27 |
"pydantic>=2.10.5,<3.0.0",
|
28 |
"sentence-transformers>=3.2.0,<4.0.0",
|
29 |
"transformers>=4.44.2,<5.0.0",
|
30 |
-
"unstructured[md,pdf,docx]>=0.16.
|
31 |
"setuptools",
|
32 |
]
|
33 |
|
|
|
27 |
"pydantic>=2.10.5,<3.0.0",
|
28 |
"sentence-transformers>=3.2.0,<4.0.0",
|
29 |
"transformers>=4.44.2,<5.0.0",
|
30 |
+
"unstructured[md,pdf,docx]>=0.16.3,<1.0.0",
|
31 |
"setuptools",
|
32 |
]
|
33 |
|
src/synthetic_dataset_generator/apps/rag.py
CHANGED
@@ -52,7 +52,7 @@ from synthetic_dataset_generator.utils import (
|
|
52 |
swap_visibility,
|
53 |
)
|
54 |
nltk.download("punkt_tab")
|
55 |
-
nltk.download("
|
56 |
|
57 |
def _get_valid_columns(dataframe: pd.DataFrame):
|
58 |
doc_valid_columns = []
|
|
|
52 |
swap_visibility,
|
53 |
)
|
54 |
nltk.download("punkt_tab")
|
55 |
+
nltk.download("averaged_perceptron_tagger_eng")
|
56 |
|
57 |
def _get_valid_columns(dataframe: pd.DataFrame):
|
58 |
doc_valid_columns = []
|