Revert "fix: Improve URL validation to catch invalid labels in image downloads"
Browse filesThis reverts commit fc02b0fa6a40ba2798140e2743d47ea796e184b7.
- crawl/crawl +0 -4
crawl/crawl
CHANGED
@@ -80,10 +80,6 @@ def download_image(session, image_url, save_dir):
|
|
80 |
if not parsed_url.scheme or not parsed_url.netloc or ".." in parsed_url.path:
|
81 |
raise ValueError(f"Invalid URL: {image_url}")
|
82 |
|
83 |
-
# Additional check for invalid labels in the netloc
|
84 |
-
if any(part == ".." for part in parsed_url.netloc.split(".")):
|
85 |
-
raise ValueError(f"Invalid URL: {image_url}")
|
86 |
-
|
87 |
image_filename = os.path.basename(parsed_url.path).split("?")[0]
|
88 |
sanitized_image_filename = sanitize_filename(image_filename)
|
89 |
image_path = os.path.join(save_dir, sanitized_image_filename)
|
|
|
80 |
if not parsed_url.scheme or not parsed_url.netloc or ".." in parsed_url.path:
|
81 |
raise ValueError(f"Invalid URL: {image_url}")
|
82 |
|
|
|
|
|
|
|
|
|
83 |
image_filename = os.path.basename(parsed_url.path).split("?")[0]
|
84 |
sanitized_image_filename = sanitize_filename(image_filename)
|
85 |
image_path = os.path.join(save_dir, sanitized_image_filename)
|