diff --git a/.authors.yml b/.authors.yml index 4e6c03865d..e662a35c1a 100644 --- a/.authors.yml +++ b/.authors.yml @@ -611,7 +611,7 @@ first_commit: 2015-08-30 06:44:37 - name: Marcel Bargull email: marcel.bargull@udo.edu - num_commits: 76 + num_commits: 77 first_commit: 2016-09-26 11:45:54 github: mbargull alternate_emails: @@ -1201,7 +1201,7 @@ alternate_emails: - clee@anaconda.com - name: Ken Odegard - num_commits: 158 + num_commits: 159 email: kodegard@anaconda.com first_commit: 2020-09-08 19:53:41 github: kenodegard diff --git a/CHANGELOG.md b/CHANGELOG.md index e376dc1422..62edf32ae8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ [//]: # (current developments) +## 3.28.4 (2024-01-17) + +### Bug fixes + +* Fix linking check regressions by restoring pre-3.28 behavior for `conda_build.inspect_pkg.which_package`. (#5141) + +### Contributors + +* @mbargull + + + ## 3.28.3 (2024-01-04) ### Bug fixes diff --git a/conda_build/inspect_pkg.py b/conda_build/inspect_pkg.py index d5650681f6..bb69707456 100644 --- a/conda_build/inspect_pkg.py +++ b/conda_build/inspect_pkg.py @@ -9,7 +9,7 @@ from functools import lru_cache from itertools import groupby from operator import itemgetter -from os.path import abspath, basename, dirname, exists, join +from os.path import abspath, basename, dirname, exists, join, normcase from pathlib import Path from tempfile import TemporaryDirectory from typing import Iterable, Literal @@ -68,43 +68,20 @@ def which_package( Given the path (of a (presumably) conda installed file) iterate over the conda packages the file came from. Usually the iteration yields only one package. - - We use lstat since a symlink doesn't clobber the file it points to. """ - prefix = Path(prefix) - - # historically, path was relative to prefix, just to be safe we append to prefix - # get lstat before calling _file_package_mapping in case path doesn't exist try: - lstat = (prefix / path).lstat() - except FileNotFoundError: - # FileNotFoundError: path doesn't exist - return - else: - yield from _file_package_mapping(prefix).get(lstat, ()) - + path = Path(path).relative_to(prefix) + except ValueError: + # ValueError: path is already relative to prefix + pass + # On Windows, be lenient and allow case-insensitive path comparisons. + # NOTE: On macOS, although case-insensitive filesystem is default, still + # require case-sensitive matches (i.e., normcase on macOS is a no-op). + normcase_path = normcase(path) -@lru_cache(maxsize=None) -def _file_package_mapping(prefix: Path) -> dict[os.stat_result, set[PrefixRecord]]: - """Map paths to package records. - - We use lstat since a symlink doesn't clobber the file it points to. - """ - mapping: dict[os.stat_result, set[PrefixRecord]] = {} for prec in PrefixData(str(prefix)).iter_records(): - for file in prec["files"]: - # packages are capable of removing files installed by other dependencies from - # the build prefix, in those cases lstat will fail, while which_package wont - # return the correct package(s) in such a condition we choose to not worry about - # it since this file to package lookup exists primarily to detect clobbering - try: - lstat = (prefix / file).lstat() - except FileNotFoundError: - # FileNotFoundError: path doesn't exist - continue - else: - mapping.setdefault(lstat, set()).add(prec) - return mapping + if normcase_path in (normcase(file) for file in prec["files"]): + yield prec def print_object_info(info, key): diff --git a/tests/test_inspect_pkg.py b/tests/test_inspect_pkg.py index 97aa9228db..2f35bd3b0e 100644 --- a/tests/test_inspect_pkg.py +++ b/tests/test_inspect_pkg.py @@ -11,6 +11,7 @@ from conda.core.prefix_data import PrefixData from conda_build.inspect_pkg import which_package +from conda_build.utils import on_win def test_which_package(tmp_path: Path): @@ -24,15 +25,27 @@ def test_which_package(tmp_path: Path): (tmp_path / "internal").symlink_to(tmp_path / "hardlinkA") # packageA (tmp_path / "external").symlink_to(tmp_path / "hardlinkB") # packageA (tmp_path / "hardlinkB").touch() # packageB + # Files might be deleted from the prefix during the build, but they should + # still be recognized since they will be present in the run environment. + (tmp_path / "deleted").unlink(missing_ok=True) # packageA + (tmp_path / "deleted_shared").unlink(missing_ok=True) # packageA & packageB - # a dummy package with a hardlink file, shared file, internal softlink, and external softlink + # a dummy package with a hardlink file, shared file, internal softlink, + # external softlink, deleted file, and deleted shared file (tmp_path / "conda-meta" / "packageA-1-0.json").write_text( json.dumps( { "build": "0", "build_number": 0, "channel": "packageA-channel", - "files": ["hardlinkA", "shared", "internal", "external"], + "files": [ + "hardlinkA", + "shared", + "internal", + "external", + "deleted", + "deleted_shared", + ], "name": "packageA", "paths_data": { "paths": [ @@ -56,6 +69,16 @@ def test_which_package(tmp_path: Path): "path_type": "softlink", "size_in_bytes": 0, }, + { + "_path": "deleted", + "path_type": "hardlink", + "size_in_bytes": 0, + }, + { + "_path": "deleted_shared", + "path_type": "hardlink", + "size_in_bytes": 0, + }, ], "paths_version": 1, }, @@ -63,14 +86,14 @@ def test_which_package(tmp_path: Path): } ) ) - # a dummy package with a hardlink file and shared file + # a dummy package with a hardlink file, shared file, and deleted shared file (tmp_path / "conda-meta" / "packageB-1-0.json").write_text( json.dumps( { "build": "0", "build_number": 0, "channel": "packageB-channel", - "files": ["hardlinkB", "shared"], + "files": ["hardlinkB", "shared", "deleted_shared"], "name": "packageB", "paths_data": { "paths": [ @@ -84,6 +107,11 @@ def test_which_package(tmp_path: Path): "path_type": "hardlink", "size_in_bytes": 0, }, + { + "_path": "deleted_shared", + "path_type": "hardlink", + "size_in_bytes": 0, + }, ], "paths_version": 1, }, @@ -101,6 +129,14 @@ def test_which_package(tmp_path: Path): precs_missing = list(which_package(tmp_path / "missing", tmp_path)) assert not precs_missing + precs_Hardlinka = list(which_package(tmp_path / "Hardlinka", tmp_path)) + if on_win: + # On Windows, be lenient and allow case-insensitive path comparisons. + assert len(precs_Hardlinka) == 1 + assert set(precs_Hardlinka) == {precA} + else: + assert not precs_Hardlinka + precs_hardlinkA = list(which_package(tmp_path / "hardlinkA", tmp_path)) assert len(precs_hardlinkA) == 1 assert set(precs_hardlinkA) == {precA} @@ -121,6 +157,52 @@ def test_which_package(tmp_path: Path): assert len(precs_hardlinkB) == 1 assert set(precs_hardlinkB) == {precB} + precs_deleted = list(which_package(tmp_path / "deleted", tmp_path)) + assert len(precs_deleted) == 1 + assert set(precs_deleted) == {precA} + + precs_deleted_shared = list(which_package(tmp_path / "deleted_shared", tmp_path)) + assert len(precs_deleted_shared) == 2 + assert set(precs_deleted_shared) == {precA, precB} + + # reuse environment, regression test for #5136 + (tmp_path / "conda-meta" / "packageA-1-0.json").unlink() + (tmp_path / "conda-meta" / "packageB-1-0.json").unlink() + + # a dummy package with a hardlink file + (tmp_path / "conda-meta" / "packageC-1-0.json").write_text( + json.dumps( + { + "build": "0", + "build_number": 0, + "channel": "packageC-channel", + "files": ["hardlinkA"], + "name": "packageC", + "paths_data": { + "paths": [ + { + "_path": "hardlinkA", + "path_type": "hardlink", + "size_in_bytes": 0, + } + ], + "paths_version": 1, + }, + "version": "1", + } + ) + ) + + # fetch package records + PrefixData._cache_.clear() + pd = PrefixData(tmp_path) + precC = pd.get("packageC") + + # test returned package records given a path + precs_reused = list(which_package(tmp_path / "hardlinkA", tmp_path)) + assert len(precs_reused) == 1 + assert set(precs_reused) == {precC} + @pytest.mark.benchmark def test_which_package_battery(tmp_path: Path): @@ -172,10 +254,12 @@ def test_which_package_battery(tmp_path: Path): assert len(list(which_package(path, tmp_path))) == 1 - # removed files should return no packages - # this occurs when, e.g., a package removes files installed by another package + # removed files should still return a package + # this occurs when, e.g., a build script removes files installed by another package + # (post-install scripts removing files from the run environment is less + # likely and not covered) for file in removed: - assert not len(list(which_package(tmp_path / file, tmp_path))) + assert len(list(which_package(tmp_path / file, tmp_path))) == 1 # missing files should return no packages assert not len(list(which_package(tmp_path / "missing", tmp_path)))