k4d3 commited on
Commit
e4302bc
1 Parent(s): fc02b0f

Revert "fix: Improve URL validation to catch invalid labels in image downloads"

Browse files

This reverts commit fc02b0fa6a40ba2798140e2743d47ea796e184b7.

Files changed (1) hide show
  1. crawl/crawl +0 -4
crawl/crawl CHANGED
@@ -80,10 +80,6 @@ def download_image(session, image_url, save_dir):
80
  if not parsed_url.scheme or not parsed_url.netloc or ".." in parsed_url.path:
81
  raise ValueError(f"Invalid URL: {image_url}")
82
 
83
- # Additional check for invalid labels in the netloc
84
- if any(part == ".." for part in parsed_url.netloc.split(".")):
85
- raise ValueError(f"Invalid URL: {image_url}")
86
-
87
  image_filename = os.path.basename(parsed_url.path).split("?")[0]
88
  sanitized_image_filename = sanitize_filename(image_filename)
89
  image_path = os.path.join(save_dir, sanitized_image_filename)
 
80
  if not parsed_url.scheme or not parsed_url.netloc or ".." in parsed_url.path:
81
  raise ValueError(f"Invalid URL: {image_url}")
82
 
 
 
 
 
83
  image_filename = os.path.basename(parsed_url.path).split("?")[0]
84
  sanitized_image_filename = sanitize_filename(image_filename)
85
  image_path = os.path.join(save_dir, sanitized_image_filename)