Spaces:
Running
Running
import logging | |
import mimetypes | |
import os | |
import pathlib | |
from typing import Callable, Iterable, Optional, Tuple | |
from pip._internal.models.candidate import InstallationCandidate | |
from pip._internal.models.link import Link | |
from pip._internal.utils.urls import path_to_url, url_to_path | |
from pip._internal.vcs import is_url | |
logger = logging.getLogger(__name__) | |
FoundCandidates = Iterable[InstallationCandidate] | |
FoundLinks = Iterable[Link] | |
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]] | |
PageValidator = Callable[[Link], bool] | |
class LinkSource: | |
def link(self) -> Optional[Link]: | |
"""Returns the underlying link, if there's one.""" | |
raise NotImplementedError() | |
def page_candidates(self) -> FoundCandidates: | |
"""Candidates found by parsing an archive listing HTML file.""" | |
raise NotImplementedError() | |
def file_links(self) -> FoundLinks: | |
"""Links found by specifying archives directly.""" | |
raise NotImplementedError() | |
def _is_html_file(file_url: str) -> bool: | |
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html" | |
class _FlatDirectorySource(LinkSource): | |
"""Link source specified by ``--find-links=<path-to-dir>``. | |
This looks the content of the directory, and returns: | |
* ``page_candidates``: Links listed on each HTML file in the directory. | |
* ``file_candidates``: Archives in the directory. | |
""" | |
def __init__( | |
self, | |
candidates_from_page: CandidatesFromPage, | |
path: str, | |
) -> None: | |
self._candidates_from_page = candidates_from_page | |
self._path = pathlib.Path(os.path.realpath(path)) | |
def link(self) -> Optional[Link]: | |
return None | |
def page_candidates(self) -> FoundCandidates: | |
for path in self._path.iterdir(): | |
url = path_to_url(str(path)) | |
if not _is_html_file(url): | |
continue | |
yield from self._candidates_from_page(Link(url)) | |
def file_links(self) -> FoundLinks: | |
for path in self._path.iterdir(): | |
url = path_to_url(str(path)) | |
if _is_html_file(url): | |
continue | |
yield Link(url) | |
class _LocalFileSource(LinkSource): | |
"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``. | |
If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to | |
the option, it is converted to a URL first. This returns: | |
* ``page_candidates``: Links listed on an HTML file. | |
* ``file_candidates``: The non-HTML file. | |
""" | |
def __init__( | |
self, | |
candidates_from_page: CandidatesFromPage, | |
link: Link, | |
) -> None: | |
self._candidates_from_page = candidates_from_page | |
self._link = link | |
def link(self) -> Optional[Link]: | |
return self._link | |
def page_candidates(self) -> FoundCandidates: | |
if not _is_html_file(self._link.url): | |
return | |
yield from self._candidates_from_page(self._link) | |
def file_links(self) -> FoundLinks: | |
if _is_html_file(self._link.url): | |
return | |
yield self._link | |
class _RemoteFileSource(LinkSource): | |
"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``. | |
This returns: | |
* ``page_candidates``: Links listed on an HTML file. | |
* ``file_candidates``: The non-HTML file. | |
""" | |
def __init__( | |
self, | |
candidates_from_page: CandidatesFromPage, | |
page_validator: PageValidator, | |
link: Link, | |
) -> None: | |
self._candidates_from_page = candidates_from_page | |
self._page_validator = page_validator | |
self._link = link | |
def link(self) -> Optional[Link]: | |
return self._link | |
def page_candidates(self) -> FoundCandidates: | |
if not self._page_validator(self._link): | |
return | |
yield from self._candidates_from_page(self._link) | |
def file_links(self) -> FoundLinks: | |
yield self._link | |
class _IndexDirectorySource(LinkSource): | |
"""``--[extra-]index-url=<path-to-directory>``. | |
This is treated like a remote URL; ``candidates_from_page`` contains logic | |
for this by appending ``index.html`` to the link. | |
""" | |
def __init__( | |
self, | |
candidates_from_page: CandidatesFromPage, | |
link: Link, | |
) -> None: | |
self._candidates_from_page = candidates_from_page | |
self._link = link | |
def link(self) -> Optional[Link]: | |
return self._link | |
def page_candidates(self) -> FoundCandidates: | |
yield from self._candidates_from_page(self._link) | |
def file_links(self) -> FoundLinks: | |
return () | |
def build_source( | |
location: str, | |
*, | |
candidates_from_page: CandidatesFromPage, | |
page_validator: PageValidator, | |
expand_dir: bool, | |
cache_link_parsing: bool, | |
) -> Tuple[Optional[str], Optional[LinkSource]]: | |
path: Optional[str] = None | |
url: Optional[str] = None | |
if os.path.exists(location): # Is a local path. | |
url = path_to_url(location) | |
path = location | |
elif location.startswith("file:"): # A file: URL. | |
url = location | |
path = url_to_path(location) | |
elif is_url(location): | |
url = location | |
if url is None: | |
msg = ( | |
"Location '%s' is ignored: " | |
"it is either a non-existing path or lacks a specific scheme." | |
) | |
logger.warning(msg, location) | |
return (None, None) | |
if path is None: | |
source: LinkSource = _RemoteFileSource( | |
candidates_from_page=candidates_from_page, | |
page_validator=page_validator, | |
link=Link(url, cache_link_parsing=cache_link_parsing), | |
) | |
return (url, source) | |
if os.path.isdir(path): | |
if expand_dir: | |
source = _FlatDirectorySource( | |
candidates_from_page=candidates_from_page, | |
path=path, | |
) | |
else: | |
source = _IndexDirectorySource( | |
candidates_from_page=candidates_from_page, | |
link=Link(url, cache_link_parsing=cache_link_parsing), | |
) | |
return (url, source) | |
elif os.path.isfile(path): | |
source = _LocalFileSource( | |
candidates_from_page=candidates_from_page, | |
link=Link(url, cache_link_parsing=cache_link_parsing), | |
) | |
return (url, source) | |
logger.warning( | |
"Location '%s' is ignored: it is neither a file nor a directory.", | |
location, | |
) | |
return (url, None) | |