Spaces:
Running
Running
First Commit
Browse files- Makefile +11 -0
- README.md +51 -14
- app.py +34 -0
- dist/ragscraper-11.4.2023-py3-none-any.whl +0 -0
- dist/ragscraper-11.4.2023.tar.gz +3 -0
- dist/ragscraper-11.5.2023-py3-none-any.whl +0 -0
- dist/ragscraper-11.5.2023.tar.gz +3 -0
- examples/scrape-content.ipynb +0 -0
- examples/scrape-urls.ipynb +193 -0
- poetry.lock +611 -0
- pyproject.toml +30 -0
- rag_scraper/__init__.py +0 -0
- rag_scraper/__pycache__/__init__.cpython-310.pyc +0 -0
- rag_scraper/__pycache__/cli.cpython-310.pyc +0 -0
- rag_scraper/__pycache__/converter.cpython-310.pyc +0 -0
- rag_scraper/__pycache__/link_extractor.cpython-310.pyc +0 -0
- rag_scraper/__pycache__/scraper.cpython-310.pyc +0 -0
- rag_scraper/__pycache__/utils.cpython-310.pyc +0 -0
- rag_scraper/cli.py +62 -0
- rag_scraper/converter.py +58 -0
- rag_scraper/link_extractor.py +70 -0
- rag_scraper/scraper.py +14 -0
- rag_scraper/utils.py +12 -0
- requirements.txt +5 -0
- tests/__init__.py +0 -0
- tests/__pycache__/__init__.cpython-310.pyc +0 -0
- tests/__pycache__/test_cli.cpython-310-pytest-7.4.3.pyc +0 -0
- tests/__pycache__/test_cli.cpython-310.pyc +0 -0
- tests/__pycache__/test_converter.cpython-310-pytest-7.4.3.pyc +0 -0
- tests/__pycache__/test_converter.cpython-310.pyc +0 -0
- tests/__pycache__/test_link_extractor.cpython-310-pytest-7.4.3.pyc +0 -0
- tests/__pycache__/test_link_extractor.cpython-310.pyc +0 -0
- tests/__pycache__/test_utils.cpython-310-pytest-7.4.3.pyc +0 -0
- tests/__pycache__/test_utils.cpython-310.pyc +0 -0
- tests/test_converter.py +0 -0
- tests/test_link_extractor.py +0 -0
- tests/test_scraper.py +0 -0
- tests/test_utils.py +0 -0
Makefile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
lint:
|
2 |
+
poetry run flake8 .
|
3 |
+
poetry run black --line-length 79 . --check
|
4 |
+
poetry run ruff .
|
5 |
+
|
6 |
+
format:
|
7 |
+
poetry run isort .
|
8 |
+
poetry run black --line-length 79 .
|
9 |
+
poetry run ruff . --fix
|
10 |
+
|
11 |
+
.PHONY: lint format
|
README.md
CHANGED
@@ -1,14 +1,51 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# RAGScraper
|
2 |
+
|
3 |
+
RAGScraper is a simple Python package that scrapes webpages and converts them to markdown format for RAG usage.
|
4 |
+
|
5 |
+
## Installation
|
6 |
+
|
7 |
+
To install RAGScraper, simply run:
|
8 |
+
|
9 |
+
```bash
|
10 |
+
pip install ragscraper
|
11 |
+
```
|
12 |
+
|
13 |
+
## Usage
|
14 |
+
|
15 |
+
To use RAGScraper as a command-line tool:
|
16 |
+
|
17 |
+
```bash
|
18 |
+
rag-scraper <URL>
|
19 |
+
```
|
20 |
+
|
21 |
+
To use RAGScraper in a Python script:
|
22 |
+
|
23 |
+
```python
|
24 |
+
from rag_scraper.scraper import Scraper
|
25 |
+
from rag_scraper.converter import Converter
|
26 |
+
|
27 |
+
# Fetch HTML content
|
28 |
+
url = "https://example.com"
|
29 |
+
html_content = Scraper.fetch_html(url)
|
30 |
+
|
31 |
+
# Convert to Markdown
|
32 |
+
markdown_content = Converter.html_to_markdown(
|
33 |
+
html=html_content,
|
34 |
+
base_url=base_url,
|
35 |
+
parser_features='html.parser',
|
36 |
+
ignore_links=True
|
37 |
+
)
|
38 |
+
print(markdown_content)
|
39 |
+
```
|
40 |
+
|
41 |
+
## Development
|
42 |
+
|
43 |
+
To run the tests for RAGScraper, navigate to the package directory and run:
|
44 |
+
|
45 |
+
```bash
|
46 |
+
python -m unittest discover tests
|
47 |
+
```
|
48 |
+
|
49 |
+
## Contributing
|
50 |
+
|
51 |
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from rag_scraper.scraper import Scraper
|
3 |
+
from rag_scraper.converter import Converter
|
4 |
+
|
5 |
+
def scrape_and_convert(url):
|
6 |
+
"""Fetch HTML content and convert it to Markdown."""
|
7 |
+
try:
|
8 |
+
# Fetch HTML content
|
9 |
+
html_content = Scraper.fetch_html(url)
|
10 |
+
|
11 |
+
# Convert to Markdown
|
12 |
+
markdown_content = Converter.html_to_markdown(
|
13 |
+
html=html_content,
|
14 |
+
base_url=url,
|
15 |
+
parser_features='html.parser',
|
16 |
+
ignore_links=True
|
17 |
+
)
|
18 |
+
return markdown_content
|
19 |
+
except Exception as e:
|
20 |
+
return f"Error: {str(e)}"
|
21 |
+
|
22 |
+
# Define Gradio interface
|
23 |
+
iface = gr.Interface(
|
24 |
+
fn=scrape_and_convert,
|
25 |
+
inputs=gr.Textbox(label="Enter URL"),
|
26 |
+
outputs=gr.Code(label="Markdown Output", language="markdown"),
|
27 |
+
title="RAGScraper",
|
28 |
+
description="Enter a URL to scrape and convert its content into Markdown format."
|
29 |
+
)
|
30 |
+
|
31 |
+
# Launch the Gradio app
|
32 |
+
if __name__ == "__main__":
|
33 |
+
iface.launch()
|
34 |
+
|
dist/ragscraper-11.4.2023-py3-none-any.whl
ADDED
Binary file (4.86 kB). View file
|
|
dist/ragscraper-11.4.2023.tar.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcfb2dee80597a1b0b11a9002099d4288960daba1f5b4f8579dbd5034c48da23
|
3 |
+
size 3521
|
dist/ragscraper-11.5.2023-py3-none-any.whl
ADDED
Binary file (5.28 kB). View file
|
|
dist/ragscraper-11.5.2023.tar.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aea8b6d9f9c8ce77691ec9d964ae5dd6dff94ac84af11482738909a4013a23e7
|
3 |
+
size 3906
|
examples/scrape-content.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/scrape-urls.ipynb
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"websites = [\n",
|
10 |
+
" \"https://docs.wized.com/\",\n",
|
11 |
+
" \"https://v1.wized.com/\"\n",
|
12 |
+
"]"
|
13 |
+
]
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"cell_type": "code",
|
17 |
+
"execution_count": 2,
|
18 |
+
"metadata": {},
|
19 |
+
"outputs": [],
|
20 |
+
"source": [
|
21 |
+
"from rag_scraper.link_extractor import LinkExtractor\n",
|
22 |
+
"\n",
|
23 |
+
"page_urls = []\n",
|
24 |
+
"for url in websites:\n",
|
25 |
+
" scraped_urls = LinkExtractor.scrape_url(url)\n",
|
26 |
+
" \n",
|
27 |
+
" page_urls.extend(scraped_urls)"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"cell_type": "code",
|
32 |
+
"execution_count": 3,
|
33 |
+
"metadata": {},
|
34 |
+
"outputs": [
|
35 |
+
{
|
36 |
+
"data": {
|
37 |
+
"text/plain": [
|
38 |
+
"['https://docs.wized.com/requests/firebase/get-user/',\n",
|
39 |
+
" 'https://docs.wized.com/changelog/',\n",
|
40 |
+
" 'https://docs.wized.com/actions/after-actions/set-variable/',\n",
|
41 |
+
" 'https://docs.wized.com/actions/after-actions/perform-request/',\n",
|
42 |
+
" 'https://docs.wized.com/requests/firebase/sign-in-email-password/',\n",
|
43 |
+
" 'https://docs.wized.com/requests/supabase/sign-in-password/',\n",
|
44 |
+
" 'https://docs.wized.com/actions/after-actions/navigate-to/',\n",
|
45 |
+
" 'https://docs.wized.com/actions/element-actions/',\n",
|
46 |
+
" 'https://docs.wized.com/requests/firebase/',\n",
|
47 |
+
" 'https://docs.wized.com/requests/firebase/sign-up-email-password/',\n",
|
48 |
+
" 'https://docs.wized.com/requests/supabase/sign-in-oauth/',\n",
|
49 |
+
" 'https://docs.wized.com/',\n",
|
50 |
+
" 'https://docs.wized.com/requests/firebase/sign-out/',\n",
|
51 |
+
" 'https://docs.wized.com/function-editor/',\n",
|
52 |
+
" 'https://docs.wized.com/configurator/backups/',\n",
|
53 |
+
" 'https://docs.wized.com/requests/supabase/delete-item/',\n",
|
54 |
+
" 'https://docs.wized.com/embed-versions-comparison/',\n",
|
55 |
+
" 'https://docs.wized.com/function-editor/common-patterns/',\n",
|
56 |
+
" 'https://docs.wized.com/actions/element-actions/render-list/',\n",
|
57 |
+
" 'https://docs.wized.com/function-editor/parameters/',\n",
|
58 |
+
" 'https://docs.wized.com/actions/reactivity/',\n",
|
59 |
+
" 'https://docs.wized.com/configurator/settings/',\n",
|
60 |
+
" 'https://docs.wized.com/requests/rest/',\n",
|
61 |
+
" 'https://docs.wized.com/requests/firebase/send-password-reset/',\n",
|
62 |
+
" 'https://docs.wized.com/configurator/publishing/',\n",
|
63 |
+
" 'https://docs.wized.com/actions/after-actions/set-cookie/',\n",
|
64 |
+
" 'https://docs.wized.com/configurator/',\n",
|
65 |
+
" 'https://docs.wized.com/requests/supabase/sign-out/',\n",
|
66 |
+
" 'https://docs.wized.com/requests/supabase/get-session/',\n",
|
67 |
+
" 'https://docs.wized.com/actions/event-actions/page-starts-loading/',\n",
|
68 |
+
" 'https://docs.wized.com/actions/element-actions/set-visibility/',\n",
|
69 |
+
" 'https://docs.wized.com/data-store/navigation/',\n",
|
70 |
+
" 'https://docs.wized.com/actions/element-actions/set-form-values/',\n",
|
71 |
+
" 'https://docs.wized.com/requests/firebase/delete-item/',\n",
|
72 |
+
" 'https://docs.wized.com/configurator/canvas/',\n",
|
73 |
+
" 'https://docs.wized.com/actions/element-actions/set-html-attribute/',\n",
|
74 |
+
" 'https://docs.wized.com/actions/element-actions/add-param-to-link/',\n",
|
75 |
+
" 'https://docs.wized.com/requests/supabase/create-item/',\n",
|
76 |
+
" 'https://docs.wized.com/actions/after-actions/',\n",
|
77 |
+
" 'https://docs.wized.com/requests/firebase/get-list/',\n",
|
78 |
+
" 'https://docs.wized.com/requests/firebase/set-item/',\n",
|
79 |
+
" 'https://docs.wized.com/actions/event-actions/page-finishes-loading/',\n",
|
80 |
+
" 'https://docs.wized.com/requests/firebase/create-item/',\n",
|
81 |
+
" 'https://docs.wized.com/requests/firebase/update-email/',\n",
|
82 |
+
" 'https://docs.wized.com/actions/event-actions/attribute-present/',\n",
|
83 |
+
" 'https://docs.wized.com/actions/event-actions/custom-condition/',\n",
|
84 |
+
" 'https://docs.wized.com/requests/supabase/sign-in-magic-link/',\n",
|
85 |
+
" 'https://docs.wized.com/actions/event-actions/',\n",
|
86 |
+
" 'https://docs.wized.com/actions/element-actions/on-event/',\n",
|
87 |
+
" 'https://docs.wized.com/actions/',\n",
|
88 |
+
" 'https://docs.wized.com/data-store/input-fields/',\n",
|
89 |
+
" 'https://docs.wized.com/data-store/forms/',\n",
|
90 |
+
" 'https://docs.wized.com/data-store/variables/',\n",
|
91 |
+
" 'https://docs.wized.com/javascript-api/',\n",
|
92 |
+
" 'https://docs.wized.com/actions/element-actions/set-text/',\n",
|
93 |
+
" 'https://docs.wized.com/requests/supabase/get-item/',\n",
|
94 |
+
" 'https://docs.wized.com/requests/supabase/get-list/',\n",
|
95 |
+
" 'https://docs.wized.com/actions/after-actions/run-function/',\n",
|
96 |
+
" 'https://docs.wized.com/requests/firebase/update-item/',\n",
|
97 |
+
" 'https://docs.wized.com/actions/element-actions/set-input-value/',\n",
|
98 |
+
" 'https://docs.wized.com/requests/firebase/get-item/',\n",
|
99 |
+
" 'https://docs.wized.com/requests/supabase/send-password-reset/',\n",
|
100 |
+
" 'https://docs.wized.com/requests/firebase/sign-in-provider/',\n",
|
101 |
+
" 'https://v1.wized.com/',\n",
|
102 |
+
" 'https://docs.wized.com/data-store/requests-data/',\n",
|
103 |
+
" 'https://docs.wized.com/requests/supabase/',\n",
|
104 |
+
" 'https://docs.wized.com/requests/supabase/update-item/',\n",
|
105 |
+
" 'https://docs.wized.com/actions/event-actions/request-finishes/',\n",
|
106 |
+
" 'https://docs.wized.com/requests/supabase/sign-up/',\n",
|
107 |
+
" 'https://docs.wized.com/actions/element-actions/set-style/',\n",
|
108 |
+
" 'https://docs.wized.com/data-store/',\n",
|
109 |
+
" 'https://docs.wized.com/requests/',\n",
|
110 |
+
" 'https://docs.wized.com/actions/element-actions/set-class/',\n",
|
111 |
+
" 'https://docs.wized.com/requests/firebase/unsubscribe-real-time/',\n",
|
112 |
+
" 'https://docs.wized.com/requests/supabase/get-user/',\n",
|
113 |
+
" 'https://docs.wized.com/requests/supabase/update-user/',\n",
|
114 |
+
" 'https://docs.wized.com/requests/supabase/unsubscribe-real-time/',\n",
|
115 |
+
" 'https://docs.wized.com/data-store/cookies/',\n",
|
116 |
+
" 'https://docs.wized.com/requests/rest/file-uploads/',\n",
|
117 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/data-out',\n",
|
118 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/intro-to-web-applications',\n",
|
119 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-projects/build-a-weather-app',\n",
|
120 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/booleans',\n",
|
121 |
+
" 'https://v1.wized.com/naming-convention/variable-naming',\n",
|
122 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/social-sign-in-with-wized-and-xano',\n",
|
123 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/filtering-request-data',\n",
|
124 |
+
" 'https://v1.wized.com/guides/general/pages-from-the-website-arent-showing-up-in-the-configurator',\n",
|
125 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/configurator',\n",
|
126 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/authentication',\n",
|
127 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/master-data-types-in-wized',\n",
|
128 |
+
" 'https://v1.wized.com/',\n",
|
129 |
+
" 'https://v1.wized.com/guides/general/previewing-array-data-with-index-variables-doesnt-work',\n",
|
130 |
+
" 'https://v1.wized.com/naming-convention/action-naming',\n",
|
131 |
+
" 'https://v1.wized.com/cheat-sheets/prevent-content-flashing',\n",
|
132 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/operators',\n",
|
133 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/filtering-and-sorting-with-finsweet-attributes',\n",
|
134 |
+
" 'https://v1.wized.com/javascript-api/js-api-documentation',\n",
|
135 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons',\n",
|
136 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/conditional-logic',\n",
|
137 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/magic-link-authentication',\n",
|
138 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-projects/build-a-bulk-ecommerce-store',\n",
|
139 |
+
" 'https://v1.wized.com/faq/frequently-asked-questions',\n",
|
140 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/data-in',\n",
|
141 |
+
" 'https://v1.wized.com/guides/general/how-to-render-a-list-of-loaded-items-in-wized',\n",
|
142 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/my-apps',\n",
|
143 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/elements',\n",
|
144 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/dashboard',\n",
|
145 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/file-upload',\n",
|
146 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-projects/build-a-custom-referral-program',\n",
|
147 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-projects/build-a-classified-ads-app',\n",
|
148 |
+
" 'https://v1.wized.com/cheat-sheets/debugging-wized-applications',\n",
|
149 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/stripe-checkout',\n",
|
150 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/storing-data-on-the-front-end',\n",
|
151 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-projects',\n",
|
152 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons',\n",
|
153 |
+
" 'https://v1.wized.com/updates/',\n",
|
154 |
+
" 'https://v1.wized.com/naming-convention/request-naming',\n",
|
155 |
+
" 'https://v1.wized.com/guides/general/third-party-login-with-xano',\n",
|
156 |
+
" 'https://v1.wized.com/naming-convention/element-naming',\n",
|
157 |
+
" 'https://v1.wized.com/advanced-learning-path/advanced-projects',\n",
|
158 |
+
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/actions',\n",
|
159 |
+
" 'https://v1.wized.com/cheat-sheets/formulas']"
|
160 |
+
]
|
161 |
+
},
|
162 |
+
"execution_count": 3,
|
163 |
+
"metadata": {},
|
164 |
+
"output_type": "execute_result"
|
165 |
+
}
|
166 |
+
],
|
167 |
+
"source": [
|
168 |
+
"page_urls"
|
169 |
+
]
|
170 |
+
}
|
171 |
+
],
|
172 |
+
"metadata": {
|
173 |
+
"kernelspec": {
|
174 |
+
"display_name": "rag-scraper-L88jsp71-py3.10",
|
175 |
+
"language": "python",
|
176 |
+
"name": "python3"
|
177 |
+
},
|
178 |
+
"language_info": {
|
179 |
+
"codemirror_mode": {
|
180 |
+
"name": "ipython",
|
181 |
+
"version": 3
|
182 |
+
},
|
183 |
+
"file_extension": ".py",
|
184 |
+
"mimetype": "text/x-python",
|
185 |
+
"name": "python",
|
186 |
+
"nbconvert_exporter": "python",
|
187 |
+
"pygments_lexer": "ipython3",
|
188 |
+
"version": "3.10.12"
|
189 |
+
}
|
190 |
+
},
|
191 |
+
"nbformat": 4,
|
192 |
+
"nbformat_minor": 2
|
193 |
+
}
|
poetry.lock
ADDED
@@ -0,0 +1,611 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
|
2 |
+
|
3 |
+
[[package]]
|
4 |
+
name = "beautifulsoup4"
|
5 |
+
version = "4.12.2"
|
6 |
+
description = "Screen-scraping library"
|
7 |
+
optional = false
|
8 |
+
python-versions = ">=3.6.0"
|
9 |
+
files = [
|
10 |
+
{file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"},
|
11 |
+
{file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"},
|
12 |
+
]
|
13 |
+
|
14 |
+
[package.dependencies]
|
15 |
+
soupsieve = ">1.2"
|
16 |
+
|
17 |
+
[package.extras]
|
18 |
+
html5lib = ["html5lib"]
|
19 |
+
lxml = ["lxml"]
|
20 |
+
|
21 |
+
[[package]]
|
22 |
+
name = "black"
|
23 |
+
version = "23.10.1"
|
24 |
+
description = "The uncompromising code formatter."
|
25 |
+
optional = false
|
26 |
+
python-versions = ">=3.8"
|
27 |
+
files = [
|
28 |
+
{file = "black-23.10.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:ec3f8e6234c4e46ff9e16d9ae96f4ef69fa328bb4ad08198c8cee45bb1f08c69"},
|
29 |
+
{file = "black-23.10.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:1b917a2aa020ca600483a7b340c165970b26e9029067f019e3755b56e8dd5916"},
|
30 |
+
{file = "black-23.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c74de4c77b849e6359c6f01987e94873c707098322b91490d24296f66d067dc"},
|
31 |
+
{file = "black-23.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:7b4d10b0f016616a0d93d24a448100adf1699712fb7a4efd0e2c32bbb219b173"},
|
32 |
+
{file = "black-23.10.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b15b75fc53a2fbcac8a87d3e20f69874d161beef13954747e053bca7a1ce53a0"},
|
33 |
+
{file = "black-23.10.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:e293e4c2f4a992b980032bbd62df07c1bcff82d6964d6c9496f2cd726e246ace"},
|
34 |
+
{file = "black-23.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d56124b7a61d092cb52cce34182a5280e160e6aff3137172a68c2c2c4b76bcb"},
|
35 |
+
{file = "black-23.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:3f157a8945a7b2d424da3335f7ace89c14a3b0625e6593d21139c2d8214d55ce"},
|
36 |
+
{file = "black-23.10.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:cfcce6f0a384d0da692119f2d72d79ed07c7159879d0bb1bb32d2e443382bf3a"},
|
37 |
+
{file = "black-23.10.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:33d40f5b06be80c1bbce17b173cda17994fbad096ce60eb22054da021bf933d1"},
|
38 |
+
{file = "black-23.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:840015166dbdfbc47992871325799fd2dc0dcf9395e401ada6d88fe11498abad"},
|
39 |
+
{file = "black-23.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:037e9b4664cafda5f025a1728c50a9e9aedb99a759c89f760bd83730e76ba884"},
|
40 |
+
{file = "black-23.10.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:7cb5936e686e782fddb1c73f8aa6f459e1ad38a6a7b0e54b403f1f05a1507ee9"},
|
41 |
+
{file = "black-23.10.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:7670242e90dc129c539e9ca17665e39a146a761e681805c54fbd86015c7c84f7"},
|
42 |
+
{file = "black-23.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed45ac9a613fb52dad3b61c8dea2ec9510bf3108d4db88422bacc7d1ba1243d"},
|
43 |
+
{file = "black-23.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:6d23d7822140e3fef190734216cefb262521789367fbdc0b3f22af6744058982"},
|
44 |
+
{file = "black-23.10.1-py3-none-any.whl", hash = "sha256:d431e6739f727bb2e0495df64a6c7a5310758e87505f5f8cde9ff6c0f2d7e4fe"},
|
45 |
+
{file = "black-23.10.1.tar.gz", hash = "sha256:1f8ce316753428ff68749c65a5f7844631aa18c8679dfd3ca9dc1a289979c258"},
|
46 |
+
]
|
47 |
+
|
48 |
+
[package.dependencies]
|
49 |
+
click = ">=8.0.0"
|
50 |
+
mypy-extensions = ">=0.4.3"
|
51 |
+
packaging = ">=22.0"
|
52 |
+
pathspec = ">=0.9.0"
|
53 |
+
platformdirs = ">=2"
|
54 |
+
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
55 |
+
typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
|
56 |
+
|
57 |
+
[package.extras]
|
58 |
+
colorama = ["colorama (>=0.4.3)"]
|
59 |
+
d = ["aiohttp (>=3.7.4)"]
|
60 |
+
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
|
61 |
+
uvloop = ["uvloop (>=0.15.2)"]
|
62 |
+
|
63 |
+
[[package]]
|
64 |
+
name = "certifi"
|
65 |
+
version = "2023.7.22"
|
66 |
+
description = "Python package for providing Mozilla's CA Bundle."
|
67 |
+
optional = false
|
68 |
+
python-versions = ">=3.6"
|
69 |
+
files = [
|
70 |
+
{file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"},
|
71 |
+
{file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
|
72 |
+
]
|
73 |
+
|
74 |
+
[[package]]
|
75 |
+
name = "charset-normalizer"
|
76 |
+
version = "3.3.2"
|
77 |
+
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
78 |
+
optional = false
|
79 |
+
python-versions = ">=3.7.0"
|
80 |
+
files = [
|
81 |
+
{file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
|
82 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
|
83 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"},
|
84 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"},
|
85 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"},
|
86 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"},
|
87 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"},
|
88 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"},
|
89 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"},
|
90 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"},
|
91 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"},
|
92 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"},
|
93 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"},
|
94 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"},
|
95 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"},
|
96 |
+
{file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"},
|
97 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"},
|
98 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"},
|
99 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"},
|
100 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"},
|
101 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"},
|
102 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"},
|
103 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"},
|
104 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"},
|
105 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"},
|
106 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"},
|
107 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"},
|
108 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"},
|
109 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"},
|
110 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"},
|
111 |
+
{file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"},
|
112 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
|
113 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
|
114 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
|
115 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
|
116 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
|
117 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
|
118 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
|
119 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
|
120 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
|
121 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
|
122 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
|
123 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
|
124 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
|
125 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
|
126 |
+
{file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
|
127 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"},
|
128 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"},
|
129 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"},
|
130 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"},
|
131 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"},
|
132 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"},
|
133 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"},
|
134 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"},
|
135 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"},
|
136 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"},
|
137 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"},
|
138 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"},
|
139 |
+
{file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"},
|
140 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"},
|
141 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"},
|
142 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"},
|
143 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"},
|
144 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"},
|
145 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"},
|
146 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"},
|
147 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"},
|
148 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"},
|
149 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"},
|
150 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"},
|
151 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"},
|
152 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"},
|
153 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"},
|
154 |
+
{file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"},
|
155 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"},
|
156 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"},
|
157 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"},
|
158 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"},
|
159 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"},
|
160 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"},
|
161 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"},
|
162 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"},
|
163 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"},
|
164 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"},
|
165 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"},
|
166 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"},
|
167 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"},
|
168 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"},
|
169 |
+
{file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"},
|
170 |
+
{file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
|
171 |
+
]
|
172 |
+
|
173 |
+
[[package]]
|
174 |
+
name = "click"
|
175 |
+
version = "8.1.7"
|
176 |
+
description = "Composable command line interface toolkit"
|
177 |
+
optional = false
|
178 |
+
python-versions = ">=3.7"
|
179 |
+
files = [
|
180 |
+
{file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
|
181 |
+
{file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
|
182 |
+
]
|
183 |
+
|
184 |
+
[package.dependencies]
|
185 |
+
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
186 |
+
|
187 |
+
[[package]]
|
188 |
+
name = "colorama"
|
189 |
+
version = "0.4.6"
|
190 |
+
description = "Cross-platform colored terminal text."
|
191 |
+
optional = false
|
192 |
+
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
193 |
+
files = [
|
194 |
+
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
195 |
+
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
196 |
+
]
|
197 |
+
|
198 |
+
[[package]]
|
199 |
+
name = "exceptiongroup"
|
200 |
+
version = "1.1.3"
|
201 |
+
description = "Backport of PEP 654 (exception groups)"
|
202 |
+
optional = false
|
203 |
+
python-versions = ">=3.7"
|
204 |
+
files = [
|
205 |
+
{file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
|
206 |
+
{file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
|
207 |
+
]
|
208 |
+
|
209 |
+
[package.extras]
|
210 |
+
test = ["pytest (>=6)"]
|
211 |
+
|
212 |
+
[[package]]
|
213 |
+
name = "flake8"
|
214 |
+
version = "6.1.0"
|
215 |
+
description = "the modular source code checker: pep8 pyflakes and co"
|
216 |
+
optional = false
|
217 |
+
python-versions = ">=3.8.1"
|
218 |
+
files = [
|
219 |
+
{file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"},
|
220 |
+
{file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"},
|
221 |
+
]
|
222 |
+
|
223 |
+
[package.dependencies]
|
224 |
+
mccabe = ">=0.7.0,<0.8.0"
|
225 |
+
pycodestyle = ">=2.11.0,<2.12.0"
|
226 |
+
pyflakes = ">=3.1.0,<3.2.0"
|
227 |
+
|
228 |
+
[[package]]
|
229 |
+
name = "html2text"
|
230 |
+
version = "2020.1.16"
|
231 |
+
description = "Turn HTML into equivalent Markdown-structured text."
|
232 |
+
optional = false
|
233 |
+
python-versions = ">=3.5"
|
234 |
+
files = [
|
235 |
+
{file = "html2text-2020.1.16-py3-none-any.whl", hash = "sha256:c7c629882da0cf377d66f073329ccf34a12ed2adf0169b9285ae4e63ef54c82b"},
|
236 |
+
{file = "html2text-2020.1.16.tar.gz", hash = "sha256:e296318e16b059ddb97f7a8a1d6a5c1d7af4544049a01e261731d2d5cc277bbb"},
|
237 |
+
]
|
238 |
+
|
239 |
+
[[package]]
|
240 |
+
name = "idna"
|
241 |
+
version = "3.4"
|
242 |
+
description = "Internationalized Domain Names in Applications (IDNA)"
|
243 |
+
optional = false
|
244 |
+
python-versions = ">=3.5"
|
245 |
+
files = [
|
246 |
+
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
|
247 |
+
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
|
248 |
+
]
|
249 |
+
|
250 |
+
[[package]]
|
251 |
+
name = "iniconfig"
|
252 |
+
version = "2.0.0"
|
253 |
+
description = "brain-dead simple config-ini parsing"
|
254 |
+
optional = false
|
255 |
+
python-versions = ">=3.7"
|
256 |
+
files = [
|
257 |
+
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
|
258 |
+
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
259 |
+
]
|
260 |
+
|
261 |
+
[[package]]
|
262 |
+
name = "isort"
|
263 |
+
version = "5.12.0"
|
264 |
+
description = "A Python utility / library to sort Python imports."
|
265 |
+
optional = false
|
266 |
+
python-versions = ">=3.8.0"
|
267 |
+
files = [
|
268 |
+
{file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"},
|
269 |
+
{file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"},
|
270 |
+
]
|
271 |
+
|
272 |
+
[package.extras]
|
273 |
+
colors = ["colorama (>=0.4.3)"]
|
274 |
+
pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"]
|
275 |
+
plugins = ["setuptools"]
|
276 |
+
requirements-deprecated-finder = ["pip-api", "pipreqs"]
|
277 |
+
|
278 |
+
[[package]]
|
279 |
+
name = "markdown-it-py"
|
280 |
+
version = "3.0.0"
|
281 |
+
description = "Python port of markdown-it. Markdown parsing, done right!"
|
282 |
+
optional = false
|
283 |
+
python-versions = ">=3.8"
|
284 |
+
files = [
|
285 |
+
{file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
|
286 |
+
{file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
|
287 |
+
]
|
288 |
+
|
289 |
+
[package.dependencies]
|
290 |
+
mdurl = ">=0.1,<1.0"
|
291 |
+
|
292 |
+
[package.extras]
|
293 |
+
benchmarking = ["psutil", "pytest", "pytest-benchmark"]
|
294 |
+
code-style = ["pre-commit (>=3.0,<4.0)"]
|
295 |
+
compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
|
296 |
+
linkify = ["linkify-it-py (>=1,<3)"]
|
297 |
+
plugins = ["mdit-py-plugins"]
|
298 |
+
profiling = ["gprof2dot"]
|
299 |
+
rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
|
300 |
+
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
|
301 |
+
|
302 |
+
[[package]]
|
303 |
+
name = "mccabe"
|
304 |
+
version = "0.7.0"
|
305 |
+
description = "McCabe checker, plugin for flake8"
|
306 |
+
optional = false
|
307 |
+
python-versions = ">=3.6"
|
308 |
+
files = [
|
309 |
+
{file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
|
310 |
+
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
|
311 |
+
]
|
312 |
+
|
313 |
+
[[package]]
|
314 |
+
name = "mdurl"
|
315 |
+
version = "0.1.2"
|
316 |
+
description = "Markdown URL utilities"
|
317 |
+
optional = false
|
318 |
+
python-versions = ">=3.7"
|
319 |
+
files = [
|
320 |
+
{file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
|
321 |
+
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
|
322 |
+
]
|
323 |
+
|
324 |
+
[[package]]
|
325 |
+
name = "mypy-extensions"
|
326 |
+
version = "1.0.0"
|
327 |
+
description = "Type system extensions for programs checked with the mypy type checker."
|
328 |
+
optional = false
|
329 |
+
python-versions = ">=3.5"
|
330 |
+
files = [
|
331 |
+
{file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
|
332 |
+
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
|
333 |
+
]
|
334 |
+
|
335 |
+
[[package]]
|
336 |
+
name = "nodeenv"
|
337 |
+
version = "1.8.0"
|
338 |
+
description = "Node.js virtual environment builder"
|
339 |
+
optional = false
|
340 |
+
python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
|
341 |
+
files = [
|
342 |
+
{file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"},
|
343 |
+
{file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"},
|
344 |
+
]
|
345 |
+
|
346 |
+
[package.dependencies]
|
347 |
+
setuptools = "*"
|
348 |
+
|
349 |
+
[[package]]
|
350 |
+
name = "packaging"
|
351 |
+
version = "23.2"
|
352 |
+
description = "Core utilities for Python packages"
|
353 |
+
optional = false
|
354 |
+
python-versions = ">=3.7"
|
355 |
+
files = [
|
356 |
+
{file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
|
357 |
+
{file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
|
358 |
+
]
|
359 |
+
|
360 |
+
[[package]]
|
361 |
+
name = "pathspec"
|
362 |
+
version = "0.11.2"
|
363 |
+
description = "Utility library for gitignore style pattern matching of file paths."
|
364 |
+
optional = false
|
365 |
+
python-versions = ">=3.7"
|
366 |
+
files = [
|
367 |
+
{file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"},
|
368 |
+
{file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"},
|
369 |
+
]
|
370 |
+
|
371 |
+
[[package]]
|
372 |
+
name = "platformdirs"
|
373 |
+
version = "3.11.0"
|
374 |
+
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
|
375 |
+
optional = false
|
376 |
+
python-versions = ">=3.7"
|
377 |
+
files = [
|
378 |
+
{file = "platformdirs-3.11.0-py3-none-any.whl", hash = "sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e"},
|
379 |
+
{file = "platformdirs-3.11.0.tar.gz", hash = "sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3"},
|
380 |
+
]
|
381 |
+
|
382 |
+
[package.extras]
|
383 |
+
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"]
|
384 |
+
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"]
|
385 |
+
|
386 |
+
[[package]]
|
387 |
+
name = "pluggy"
|
388 |
+
version = "1.3.0"
|
389 |
+
description = "plugin and hook calling mechanisms for python"
|
390 |
+
optional = false
|
391 |
+
python-versions = ">=3.8"
|
392 |
+
files = [
|
393 |
+
{file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
|
394 |
+
{file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
|
395 |
+
]
|
396 |
+
|
397 |
+
[package.extras]
|
398 |
+
dev = ["pre-commit", "tox"]
|
399 |
+
testing = ["pytest", "pytest-benchmark"]
|
400 |
+
|
401 |
+
[[package]]
|
402 |
+
name = "pycodestyle"
|
403 |
+
version = "2.11.1"
|
404 |
+
description = "Python style guide checker"
|
405 |
+
optional = false
|
406 |
+
python-versions = ">=3.8"
|
407 |
+
files = [
|
408 |
+
{file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"},
|
409 |
+
{file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
|
410 |
+
]
|
411 |
+
|
412 |
+
[[package]]
|
413 |
+
name = "pyflakes"
|
414 |
+
version = "3.1.0"
|
415 |
+
description = "passive checker of Python programs"
|
416 |
+
optional = false
|
417 |
+
python-versions = ">=3.8"
|
418 |
+
files = [
|
419 |
+
{file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"},
|
420 |
+
{file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"},
|
421 |
+
]
|
422 |
+
|
423 |
+
[[package]]
|
424 |
+
name = "pygments"
|
425 |
+
version = "2.16.1"
|
426 |
+
description = "Pygments is a syntax highlighting package written in Python."
|
427 |
+
optional = false
|
428 |
+
python-versions = ">=3.7"
|
429 |
+
files = [
|
430 |
+
{file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"},
|
431 |
+
{file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"},
|
432 |
+
]
|
433 |
+
|
434 |
+
[package.extras]
|
435 |
+
plugins = ["importlib-metadata"]
|
436 |
+
|
437 |
+
[[package]]
|
438 |
+
name = "pyright"
|
439 |
+
version = "1.1.334"
|
440 |
+
description = "Command line wrapper for pyright"
|
441 |
+
optional = false
|
442 |
+
python-versions = ">=3.7"
|
443 |
+
files = [
|
444 |
+
{file = "pyright-1.1.334-py3-none-any.whl", hash = "sha256:dcb13e8358e021189672c4d6ebcad192ab061e4c7225036973ec493183c6da68"},
|
445 |
+
{file = "pyright-1.1.334.tar.gz", hash = "sha256:3adaf10f1f4209575dc022f9c897f7ef024639b7ea5b3cbe49302147e6949cd4"},
|
446 |
+
]
|
447 |
+
|
448 |
+
[package.dependencies]
|
449 |
+
nodeenv = ">=1.6.0"
|
450 |
+
|
451 |
+
[package.extras]
|
452 |
+
all = ["twine (>=3.4.1)"]
|
453 |
+
dev = ["twine (>=3.4.1)"]
|
454 |
+
|
455 |
+
[[package]]
|
456 |
+
name = "pytest"
|
457 |
+
version = "7.4.3"
|
458 |
+
description = "pytest: simple powerful testing with Python"
|
459 |
+
optional = false
|
460 |
+
python-versions = ">=3.7"
|
461 |
+
files = [
|
462 |
+
{file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"},
|
463 |
+
{file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"},
|
464 |
+
]
|
465 |
+
|
466 |
+
[package.dependencies]
|
467 |
+
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
468 |
+
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
|
469 |
+
iniconfig = "*"
|
470 |
+
packaging = "*"
|
471 |
+
pluggy = ">=0.12,<2.0"
|
472 |
+
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
473 |
+
|
474 |
+
[package.extras]
|
475 |
+
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
476 |
+
|
477 |
+
[[package]]
|
478 |
+
name = "requests"
|
479 |
+
version = "2.31.0"
|
480 |
+
description = "Python HTTP for Humans."
|
481 |
+
optional = false
|
482 |
+
python-versions = ">=3.7"
|
483 |
+
files = [
|
484 |
+
{file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
|
485 |
+
{file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
|
486 |
+
]
|
487 |
+
|
488 |
+
[package.dependencies]
|
489 |
+
certifi = ">=2017.4.17"
|
490 |
+
charset-normalizer = ">=2,<4"
|
491 |
+
idna = ">=2.5,<4"
|
492 |
+
urllib3 = ">=1.21.1,<3"
|
493 |
+
|
494 |
+
[package.extras]
|
495 |
+
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
496 |
+
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
497 |
+
|
498 |
+
[[package]]
|
499 |
+
name = "rich"
|
500 |
+
version = "13.6.0"
|
501 |
+
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
|
502 |
+
optional = false
|
503 |
+
python-versions = ">=3.7.0"
|
504 |
+
files = [
|
505 |
+
{file = "rich-13.6.0-py3-none-any.whl", hash = "sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245"},
|
506 |
+
{file = "rich-13.6.0.tar.gz", hash = "sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef"},
|
507 |
+
]
|
508 |
+
|
509 |
+
[package.dependencies]
|
510 |
+
markdown-it-py = ">=2.2.0"
|
511 |
+
pygments = ">=2.13.0,<3.0.0"
|
512 |
+
|
513 |
+
[package.extras]
|
514 |
+
jupyter = ["ipywidgets (>=7.5.1,<9)"]
|
515 |
+
|
516 |
+
[[package]]
|
517 |
+
name = "ruff"
|
518 |
+
version = "0.1.4"
|
519 |
+
description = "An extremely fast Python linter and code formatter, written in Rust."
|
520 |
+
optional = false
|
521 |
+
python-versions = ">=3.7"
|
522 |
+
files = [
|
523 |
+
{file = "ruff-0.1.4-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:864958706b669cce31d629902175138ad8a069d99ca53514611521f532d91495"},
|
524 |
+
{file = "ruff-0.1.4-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:9fdd61883bb34317c788af87f4cd75dfee3a73f5ded714b77ba928e418d6e39e"},
|
525 |
+
{file = "ruff-0.1.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4eaca8c9cc39aa7f0f0d7b8fe24ecb51232d1bb620fc4441a61161be4a17539"},
|
526 |
+
{file = "ruff-0.1.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a9a1301dc43cbf633fb603242bccd0aaa34834750a14a4c1817e2e5c8d60de17"},
|
527 |
+
{file = "ruff-0.1.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78e8db8ab6f100f02e28b3d713270c857d370b8d61871d5c7d1702ae411df683"},
|
528 |
+
{file = "ruff-0.1.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:80fea754eaae06335784b8ea053d6eb8e9aac75359ebddd6fee0858e87c8d510"},
|
529 |
+
{file = "ruff-0.1.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6bc02a480d4bfffd163a723698da15d1a9aec2fced4c06f2a753f87f4ce6969c"},
|
530 |
+
{file = "ruff-0.1.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862811b403063765b03e716dac0fda8fdbe78b675cd947ed5873506448acea4"},
|
531 |
+
{file = "ruff-0.1.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58826efb8b3efbb59bb306f4b19640b7e366967a31c049d49311d9eb3a4c60cb"},
|
532 |
+
{file = "ruff-0.1.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:fdfd453fc91d9d86d6aaa33b1bafa69d114cf7421057868f0b79104079d3e66e"},
|
533 |
+
{file = "ruff-0.1.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e8791482d508bd0b36c76481ad3117987301b86072158bdb69d796503e1c84a8"},
|
534 |
+
{file = "ruff-0.1.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:01206e361021426e3c1b7fba06ddcb20dbc5037d64f6841e5f2b21084dc51800"},
|
535 |
+
{file = "ruff-0.1.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:645591a613a42cb7e5c2b667cbefd3877b21e0252b59272ba7212c3d35a5819f"},
|
536 |
+
{file = "ruff-0.1.4-py3-none-win32.whl", hash = "sha256:99908ca2b3b85bffe7e1414275d004917d1e0dfc99d497ccd2ecd19ad115fd0d"},
|
537 |
+
{file = "ruff-0.1.4-py3-none-win_amd64.whl", hash = "sha256:1dfd6bf8f6ad0a4ac99333f437e0ec168989adc5d837ecd38ddb2cc4a2e3db8a"},
|
538 |
+
{file = "ruff-0.1.4-py3-none-win_arm64.whl", hash = "sha256:d98ae9ebf56444e18a3e3652b3383204748f73e247dea6caaf8b52d37e6b32da"},
|
539 |
+
{file = "ruff-0.1.4.tar.gz", hash = "sha256:21520ecca4cc555162068d87c747b8f95e1e95f8ecfcbbe59e8dd00710586315"},
|
540 |
+
]
|
541 |
+
|
542 |
+
[[package]]
|
543 |
+
name = "setuptools"
|
544 |
+
version = "68.2.2"
|
545 |
+
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
546 |
+
optional = false
|
547 |
+
python-versions = ">=3.8"
|
548 |
+
files = [
|
549 |
+
{file = "setuptools-68.2.2-py3-none-any.whl", hash = "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a"},
|
550 |
+
{file = "setuptools-68.2.2.tar.gz", hash = "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87"},
|
551 |
+
]
|
552 |
+
|
553 |
+
[package.extras]
|
554 |
+
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
|
555 |
+
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
|
556 |
+
testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
|
557 |
+
|
558 |
+
[[package]]
|
559 |
+
name = "soupsieve"
|
560 |
+
version = "2.5"
|
561 |
+
description = "A modern CSS selector implementation for Beautiful Soup."
|
562 |
+
optional = false
|
563 |
+
python-versions = ">=3.8"
|
564 |
+
files = [
|
565 |
+
{file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"},
|
566 |
+
{file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"},
|
567 |
+
]
|
568 |
+
|
569 |
+
[[package]]
|
570 |
+
name = "tomli"
|
571 |
+
version = "2.0.1"
|
572 |
+
description = "A lil' TOML parser"
|
573 |
+
optional = false
|
574 |
+
python-versions = ">=3.7"
|
575 |
+
files = [
|
576 |
+
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
577 |
+
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
578 |
+
]
|
579 |
+
|
580 |
+
[[package]]
|
581 |
+
name = "typing-extensions"
|
582 |
+
version = "4.8.0"
|
583 |
+
description = "Backported and Experimental Type Hints for Python 3.8+"
|
584 |
+
optional = false
|
585 |
+
python-versions = ">=3.8"
|
586 |
+
files = [
|
587 |
+
{file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"},
|
588 |
+
{file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
|
589 |
+
]
|
590 |
+
|
591 |
+
[[package]]
|
592 |
+
name = "urllib3"
|
593 |
+
version = "2.0.7"
|
594 |
+
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
595 |
+
optional = false
|
596 |
+
python-versions = ">=3.7"
|
597 |
+
files = [
|
598 |
+
{file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"},
|
599 |
+
{file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"},
|
600 |
+
]
|
601 |
+
|
602 |
+
[package.extras]
|
603 |
+
brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
|
604 |
+
secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"]
|
605 |
+
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
|
606 |
+
zstd = ["zstandard (>=0.18.0)"]
|
607 |
+
|
608 |
+
[metadata]
|
609 |
+
lock-version = "2.0"
|
610 |
+
python-versions = "^3.10"
|
611 |
+
content-hash = "dd127b620413062f17854364cb7006adb32eb0b27ebdc6163950489d6157610d"
|
pyproject.toml
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "RAGScraper"
|
3 |
+
version = "11.5.2023"
|
4 |
+
description = "RAGScraper is a Python library designed for efficient and intelligent scraping of web documentation and content. Tailored for Retrieval-Augmented Generation systems, RAGScraper extracts and preprocesses text into structured, machine-learning-ready formats. It emphasizes precision, context preservation, and ease of integration with RAG models, making it an ideal tool for developers looking to enhance AI-driven applications with rich, web-sourced knowledge."
|
5 |
+
authors = ["kdcokenny <[email protected]>"]
|
6 |
+
license = "MIT"
|
7 |
+
readme = "README.md"
|
8 |
+
packages = [{include = "rag_scraper"}]
|
9 |
+
|
10 |
+
[tool.poetry.dependencies]
|
11 |
+
python = "^3.10"
|
12 |
+
requests = "^2.31.0"
|
13 |
+
beautifulsoup4 = "^4.12.2"
|
14 |
+
html2text = "^2020.1.16"
|
15 |
+
|
16 |
+
[tool.poetry.group.dev]
|
17 |
+
optional = true
|
18 |
+
|
19 |
+
[tool.poetry.group.dev.dependencies]
|
20 |
+
rich = "^13.6.0"
|
21 |
+
black = "^23.10.1"
|
22 |
+
flake8 = "^6.1.0"
|
23 |
+
ruff = "^0.1.4"
|
24 |
+
isort = "^5.12.0"
|
25 |
+
pyright = "^1.1.334"
|
26 |
+
pytest = "^7.4.3"
|
27 |
+
|
28 |
+
[build-system]
|
29 |
+
requires = ["poetry-core"]
|
30 |
+
build-backend = "poetry.core.masonry.api"
|
rag_scraper/__init__.py
ADDED
File without changes
|
rag_scraper/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (165 Bytes). View file
|
|
rag_scraper/__pycache__/cli.cpython-310.pyc
ADDED
Binary file (1.65 kB). View file
|
|
rag_scraper/__pycache__/converter.cpython-310.pyc
ADDED
Binary file (2.16 kB). View file
|
|
rag_scraper/__pycache__/link_extractor.cpython-310.pyc
ADDED
Binary file (2.33 kB). View file
|
|
rag_scraper/__pycache__/scraper.cpython-310.pyc
ADDED
Binary file (810 Bytes). View file
|
|
rag_scraper/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (787 Bytes). View file
|
|
rag_scraper/cli.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
from rag_scraper.converter import Converter
|
4 |
+
from rag_scraper.link_extractor import LinkExtractor
|
5 |
+
from rag_scraper.scraper import Scraper
|
6 |
+
from rag_scraper.utils import URLUtils
|
7 |
+
|
8 |
+
|
9 |
+
def main():
|
10 |
+
parser = argparse.ArgumentParser(
|
11 |
+
description="RAGScraper: A tool to scrape, extract links, and convert webpages to markdown."
|
12 |
+
)
|
13 |
+
|
14 |
+
parser.add_argument("url", help="The URL of the webpage to scrape.")
|
15 |
+
parser.add_argument(
|
16 |
+
"--element_id",
|
17 |
+
help="The ID of the element to search for links.",
|
18 |
+
default=None,
|
19 |
+
)
|
20 |
+
parser.add_argument(
|
21 |
+
"--element_type",
|
22 |
+
help='The type of the element to search for links. Default is "nav".',
|
23 |
+
default="nav",
|
24 |
+
)
|
25 |
+
parser.add_argument(
|
26 |
+
"--convert",
|
27 |
+
help="Convert the webpage to markdown.",
|
28 |
+
action="store_true",
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--extract",
|
32 |
+
help="Extract links from the specified element.",
|
33 |
+
action="store_true",
|
34 |
+
)
|
35 |
+
|
36 |
+
args = parser.parse_args()
|
37 |
+
|
38 |
+
base_url = URLUtils.get_base_url(args.url)
|
39 |
+
|
40 |
+
if args.extract:
|
41 |
+
# Extract links if the flag is set
|
42 |
+
links = LinkExtractor.scrape_url(
|
43 |
+
args.url,
|
44 |
+
element_id=args.element_id,
|
45 |
+
element_type=args.element_type,
|
46 |
+
)
|
47 |
+
print(f"Unique links for {args.url}:")
|
48 |
+
for link in links:
|
49 |
+
print(link)
|
50 |
+
elif args.convert:
|
51 |
+
# Convert to markdown if the flag is set
|
52 |
+
html_content = Scraper.fetch_html(args.url)
|
53 |
+
markdown_content = Converter.html_to_markdown(html_content, base_url)
|
54 |
+
print(markdown_content)
|
55 |
+
else:
|
56 |
+
print(
|
57 |
+
"Please specify an action: --convert for markdown conversion or --extract for link extraction."
|
58 |
+
)
|
59 |
+
|
60 |
+
|
61 |
+
if __name__ == "__main__":
|
62 |
+
main()
|
rag_scraper/converter.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from typing import Optional
|
3 |
+
from urllib.parse import urljoin
|
4 |
+
|
5 |
+
import html2text
|
6 |
+
from bs4 import BeautifulSoup, Tag
|
7 |
+
|
8 |
+
|
9 |
+
class Converter:
|
10 |
+
@staticmethod
|
11 |
+
def html_to_markdown(
|
12 |
+
html: str,
|
13 |
+
base_url: str,
|
14 |
+
parser_features="html.parser",
|
15 |
+
**conversion_options,
|
16 |
+
) -> str:
|
17 |
+
soup = BeautifulSoup(html, parser_features)
|
18 |
+
cleaned_soup = Converter.replace_media_with_markdown(soup, base_url)
|
19 |
+
return Converter.convert_html_to_markdown(
|
20 |
+
str(cleaned_soup), **conversion_options
|
21 |
+
)
|
22 |
+
|
23 |
+
@staticmethod
|
24 |
+
def replace_media_with_markdown(
|
25 |
+
soup: BeautifulSoup, base_url: str
|
26 |
+
) -> BeautifulSoup:
|
27 |
+
data_uri_pattern = re.compile(
|
28 |
+
r"data:([a-zA-Z]+/[a-zA-Z+.-]+)?(;base64)?,[^,]*"
|
29 |
+
)
|
30 |
+
|
31 |
+
def process_media_tag(tag: Tag, media_type: str) -> Optional[str]:
|
32 |
+
media_url = tag.get("src", "")
|
33 |
+
if data_uri_pattern.match(media_url):
|
34 |
+
tag.decompose()
|
35 |
+
return None
|
36 |
+
if not media_url.startswith(("http://", "https://")):
|
37 |
+
media_url = urljoin(base_url, media_url.lstrip("/"))
|
38 |
+
alt_text = f"{media_type}: {tag.get('alt', '') or tag.get('title', '')}".strip()
|
39 |
+
return f""
|
40 |
+
|
41 |
+
for img in soup.find_all("img"):
|
42 |
+
markdown_img = process_media_tag(img, "Image")
|
43 |
+
if markdown_img:
|
44 |
+
img.replace_with(markdown_img)
|
45 |
+
|
46 |
+
for video in soup.find_all("video"):
|
47 |
+
markdown_video = process_media_tag(video, "Video")
|
48 |
+
if markdown_video:
|
49 |
+
video.replace_with(markdown_video)
|
50 |
+
|
51 |
+
return soup
|
52 |
+
|
53 |
+
@staticmethod
|
54 |
+
def convert_html_to_markdown(cleaned_html: str, **options) -> str:
|
55 |
+
converter = html2text.HTML2Text()
|
56 |
+
for key, value in options.items():
|
57 |
+
setattr(converter, key, value)
|
58 |
+
return converter.handle(cleaned_html)
|
rag_scraper/link_extractor.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import Enum, auto
|
2 |
+
from typing import Set
|
3 |
+
from urllib.parse import urljoin, urlparse
|
4 |
+
|
5 |
+
import requests
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
|
8 |
+
|
9 |
+
class LinkType(Enum):
|
10 |
+
ALL = auto()
|
11 |
+
INTERNAL = auto()
|
12 |
+
EXTERNAL = auto()
|
13 |
+
|
14 |
+
|
15 |
+
class LinkExtractor:
|
16 |
+
@staticmethod
|
17 |
+
def scrape_url(
|
18 |
+
url: str, link_type: LinkType = LinkType.ALL, **kwargs
|
19 |
+
) -> Set[str]:
|
20 |
+
"""
|
21 |
+
Scrape a given URL for unique links within a specified element, with an option to choose between internal, external, or all links.
|
22 |
+
Converts relative URLs to absolute URLs.
|
23 |
+
:param url: The URL of the website to scrape.
|
24 |
+
:param link_type: The type of links to scrape (LinkType.ALL, LinkType.INTERNAL, LinkType.EXTERNAL).
|
25 |
+
:param kwargs: Keyword arguments to specify element id and element type.
|
26 |
+
:return: A set of unique link URLs found within the specified element.
|
27 |
+
"""
|
28 |
+
element_id = kwargs.get("element_id")
|
29 |
+
element_type = kwargs.get("element_type", "nav")
|
30 |
+
base_url = "{uri.scheme}://{uri.netloc}".format(uri=urlparse(url))
|
31 |
+
|
32 |
+
try:
|
33 |
+
response = requests.get(url)
|
34 |
+
response.raise_for_status()
|
35 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
36 |
+
|
37 |
+
if element_id:
|
38 |
+
fetched_element = soup.find_all(element_type, id=element_id)
|
39 |
+
else:
|
40 |
+
fetched_element = soup.find_all(element_type)
|
41 |
+
|
42 |
+
links = set()
|
43 |
+
|
44 |
+
# Iterate over all found elements and extract links
|
45 |
+
for element in fetched_element:
|
46 |
+
for a_tag in element.find_all("a", href=True):
|
47 |
+
href = a_tag["href"]
|
48 |
+
absolute_url = urljoin(url, href)
|
49 |
+
domain = urlparse(absolute_url).netloc
|
50 |
+
|
51 |
+
if (
|
52 |
+
link_type == LinkType.INTERNAL
|
53 |
+
and domain == urlparse(base_url).netloc
|
54 |
+
):
|
55 |
+
links.add(absolute_url)
|
56 |
+
elif (
|
57 |
+
link_type == LinkType.EXTERNAL
|
58 |
+
and domain != urlparse(base_url).netloc
|
59 |
+
):
|
60 |
+
links.add(absolute_url)
|
61 |
+
elif link_type == LinkType.ALL:
|
62 |
+
links.add(absolute_url)
|
63 |
+
|
64 |
+
return links
|
65 |
+
except requests.RequestException as e:
|
66 |
+
print(f"Request failed for {url}: {e}")
|
67 |
+
return set()
|
68 |
+
except Exception as e:
|
69 |
+
print(f"An error occurred: {e}")
|
70 |
+
return set()
|
rag_scraper/scraper.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
|
4 |
+
|
5 |
+
class Scraper:
|
6 |
+
@staticmethod
|
7 |
+
def fetch_html(url: str) -> str:
|
8 |
+
response = requests.get(url)
|
9 |
+
response.raise_for_status()
|
10 |
+
return response.text
|
11 |
+
|
12 |
+
@staticmethod
|
13 |
+
def get_soup(html_content: str, **parser_options) -> BeautifulSoup:
|
14 |
+
return BeautifulSoup(html_content, "html.parser", **parser_options)
|
rag_scraper/utils.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from urllib.parse import urljoin, urlparse
|
2 |
+
|
3 |
+
|
4 |
+
class URLUtils:
|
5 |
+
@staticmethod
|
6 |
+
def get_base_url(url: str) -> str:
|
7 |
+
parsed_url = urlparse(url)
|
8 |
+
return f"{parsed_url.scheme}://{parsed_url.netloc}"
|
9 |
+
|
10 |
+
@staticmethod
|
11 |
+
def resolve_url(src: str, base_url: str) -> str:
|
12 |
+
return urljoin(base_url, src)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
requests
|
3 |
+
beautifulsoup4
|
4 |
+
lxml
|
5 |
+
|
tests/__init__.py
ADDED
File without changes
|
tests/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (159 Bytes). View file
|
|
tests/__pycache__/test_cli.cpython-310-pytest-7.4.3.pyc
ADDED
Binary file (1.51 kB). View file
|
|
tests/__pycache__/test_cli.cpython-310.pyc
ADDED
Binary file (1.4 kB). View file
|
|
tests/__pycache__/test_converter.cpython-310-pytest-7.4.3.pyc
ADDED
Binary file (3.16 kB). View file
|
|
tests/__pycache__/test_converter.cpython-310.pyc
ADDED
Binary file (2.01 kB). View file
|
|
tests/__pycache__/test_link_extractor.cpython-310-pytest-7.4.3.pyc
ADDED
Binary file (1.26 kB). View file
|
|
tests/__pycache__/test_link_extractor.cpython-310.pyc
ADDED
Binary file (805 Bytes). View file
|
|
tests/__pycache__/test_utils.cpython-310-pytest-7.4.3.pyc
ADDED
Binary file (1.35 kB). View file
|
|
tests/__pycache__/test_utils.cpython-310.pyc
ADDED
Binary file (702 Bytes). View file
|
|
tests/test_converter.py
ADDED
File without changes
|
tests/test_link_extractor.py
ADDED
File without changes
|
tests/test_scraper.py
ADDED
File without changes
|
tests/test_utils.py
ADDED
File without changes
|