From a4f750859225236a0139991e5ede1d91756dde49 Mon Sep 17 00:00:00 2001 From: q0w <43147888+q0w@users.noreply.github.com> Date: Sun, 27 Feb 2022 15:39:06 +0300 Subject: [PATCH] Drop the doctype check (#10906) Co-authored-by: Pradyun Gedam --- news/10903.removal.rst | 1 + src/pip/_internal/exceptions.py | 24 ---------------- src/pip/_internal/index/collector.py | 27 +----------------- tests/functional/test_install_index.py | 16 +++++++++++ tests/unit/test_collector.py | 38 -------------------------- 5 files changed, 18 insertions(+), 88 deletions(-) create mode 100644 news/10903.removal.rst diff --git a/news/10903.removal.rst b/news/10903.removal.rst new file mode 100644 index 00000000000..42c996f6e4e --- /dev/null +++ b/news/10903.removal.rst @@ -0,0 +1 @@ +Drop the doctype check, that presented a warning for index pages that use non-compliant HTML 5. diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index dd20370bd10..97b9612a187 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -181,30 +181,6 @@ class UninstallationError(PipError): """General exception during uninstallation""" -class BadHTMLDoctypeDeclaration(DiagnosticPipError): - reference = "bad-index-doctype" - - def __init__(self, *, url: str) -> None: - super().__init__( - kind="warning", - message=( - "The package index page being used does not have a proper HTML " - "doctype declaration." - ), - context=f"Problematic URL: {escape(url)}", - note_stmt="This is an issue with the page at the URL mentioned above.", - hint_stmt=( - "You might need to reach out to the owner of that package index, " - "to get this fixed. " - "See https://github.com/pypa/pip/issues/10825 for context." - ), - ) - - -class MissingHTMLDoctypeDeclaration(BadHTMLDoctypeDeclaration): - reference = "missing-index-doctype" - - class MissingPyProjectBuildRequires(DiagnosticPipError): """Raised when pyproject.toml has `build-system`, but no `build-system.requires`.""" diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index ff722cfeb85..e6e9469af1a 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -32,11 +32,7 @@ from pip._vendor.requests import Response from pip._vendor.requests.exceptions import RetryError, SSLError -from pip._internal.exceptions import ( - BadHTMLDoctypeDeclaration, - MissingHTMLDoctypeDeclaration, - NetworkConnectionError, -) +from pip._internal.exceptions import NetworkConnectionError from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession @@ -401,33 +397,12 @@ class HTMLLinkParser(HTMLParser): def __init__(self, url: str) -> None: super().__init__(convert_charrefs=True) - self._dealt_with_doctype_issues = False self.url: str = url self.base_url: Optional[str] = None self.anchors: List[Dict[str, Optional[str]]] = [] - def handle_decl(self, decl: str) -> None: - self._dealt_with_doctype_issues = True - match = re.match( - r"""doctype\s+html\s*(?:SYSTEM\s+(["'])about:legacy-compat\1)?\s*$""", - decl, - re.IGNORECASE, - ) - if match is None: - logger.warning( - "[present-diagnostic] %s", - BadHTMLDoctypeDeclaration(url=self.url), - ) - def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: - if not self._dealt_with_doctype_issues: - logger.warning( - "[present-diagnostic] %s", - MissingHTMLDoctypeDeclaration(url=self.url), - ) - self._dealt_with_doctype_issues = True - if tag == "base" and self.base_url is None: href = self.get_href(attrs) if href is not None: diff --git a/tests/functional/test_install_index.py b/tests/functional/test_install_index.py index 3308de504ac..a492863b542 100644 --- a/tests/functional/test_install_index.py +++ b/tests/functional/test_install_index.py @@ -1,4 +1,5 @@ import os +import shutil import textwrap import urllib.parse @@ -24,6 +25,21 @@ def test_find_links_relative_path(script: PipTestEnvironment, data: TestData) -> result.did_create(initools_folder) +def test_find_links_no_doctype(script: PipTestEnvironment, data: TestData) -> None: + shutil.copy(data.packages / "simple-1.0.tar.gz", script.scratch_path) + html = script.scratch_path.joinpath("index.html") + html.write_text('') + result = script.pip( + "install", + "simple==1.0", + "--no-index", + "--find-links", + script.scratch_path, + expect_stderr=True, + ) + assert not result.stderr + + @pytest.mark.usefixtures("with_wheel") def test_find_links_requirements_file_relative_path( script: PipTestEnvironment, data: TestData diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 219e0c0cc4c..f77794b55b9 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -551,44 +551,6 @@ def test_parse_link_handles_deprecated_usage_properly() -> None: assert "pkg1-2.0" in parsed_links[1].url -def test_parse_links_presents_warning_on_missing_doctype( - caplog: pytest.LogCaptureFixture, -) -> None: - html = b'' - url = "https://example.com/simple/" - page = HTMLPage(html, encoding=None, url=url, cache_link_parsing=False) - - with caplog.at_level(logging.WARN): - parsed_links = list(parse_links(page, use_deprecated_html5lib=False)) - - assert len(parsed_links) == 2, parsed_links - assert "pkg1-1.0" in parsed_links[0].url - assert "pkg1-2.0" in parsed_links[1].url - - assert len(caplog.records) == 1 - - -def test_parse_links_presents_warning_on_html4_doctype( - caplog: pytest.LogCaptureFixture, -) -> None: - html = ( - b'' - b'' - ) - url = "https://example.com/simple/" - page = HTMLPage(html, encoding=None, url=url, cache_link_parsing=False) - - with caplog.at_level(logging.WARN): - parsed_links = list(parse_links(page, use_deprecated_html5lib=False)) - - assert len(parsed_links) == 2, parsed_links - assert "pkg1-1.0" in parsed_links[0].url - assert "pkg1-2.0" in parsed_links[1].url - - assert len(caplog.records) == 1 - - @mock.patch("pip._internal.index.collector.raise_for_status") def test_request_http_error( mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture