Merge remote-tracking branch 'refs/remotes/origin/main'
Browse files- crawl/crawl +4 -3
crawl/crawl
CHANGED
@@ -18,6 +18,7 @@ import time
|
|
18 |
import argparse
|
19 |
from urllib.parse import urljoin, urlparse
|
20 |
import requests
|
|
|
21 |
try:
|
22 |
from crawl4ai import WebCrawler # type: ignore
|
23 |
except ImportError as exc:
|
@@ -185,8 +186,8 @@ def retry_crawl(inner_url):
|
|
185 |
save_result(inner_url)
|
186 |
return
|
187 |
except (AttributeError, ValueError) as inner_e:
|
188 |
-
print(f"[ERROR] 🚫 Failed to crawl {inner_url}, "
|
189 |
-
f"error: {str(inner_e)}")
|
190 |
print("Retrying in 3 seconds...")
|
191 |
time.sleep(3)
|
192 |
|
@@ -223,4 +224,4 @@ if __name__ == "__main__":
|
|
223 |
try:
|
224 |
future.result()
|
225 |
except (AttributeError, ValueError) as e:
|
226 |
-
print(f"[ERROR] 🚫 Failed to crawl, error: {str(e)}")
|
|
|
18 |
import argparse
|
19 |
from urllib.parse import urljoin, urlparse
|
20 |
import requests
|
21 |
+
|
22 |
try:
|
23 |
from crawl4ai import WebCrawler # type: ignore
|
24 |
except ImportError as exc:
|
|
|
186 |
save_result(inner_url)
|
187 |
return
|
188 |
except (AttributeError, ValueError) as inner_e:
|
189 |
+
print(f"\033[91m[ERROR] 🚫 Failed to crawl {inner_url}, "
|
190 |
+
f"error: {str(inner_e)}\033[0m")
|
191 |
print("Retrying in 3 seconds...")
|
192 |
time.sleep(3)
|
193 |
|
|
|
224 |
try:
|
225 |
future.result()
|
226 |
except (AttributeError, ValueError) as e:
|
227 |
+
print(f"\033[91m[ERROR] 🚫 Failed to crawl, error: {str(e)}\033[0m")
|