From fc04949abf973ae0aab09b5ec837d775f5c5b0c5 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Sun, 24 Mar 2024 23:52:04 -0400 Subject: [PATCH] Fix syncing kickstart repositories with no "root" repo closes #3452 --- CHANGES/3452.bugfix | 1 + pulp_rpm/app/tasks/synchronizing.py | 96 ++++++++++------------ pulp_rpm/tests/functional/api/test_sync.py | 14 ++++ pulp_rpm/tests/functional/constants.py | 2 + 4 files changed, 62 insertions(+), 51 deletions(-) create mode 100644 CHANGES/3452.bugfix diff --git a/CHANGES/3452.bugfix b/CHANGES/3452.bugfix new file mode 100644 index 000000000..4cf106056 --- /dev/null +++ b/CHANGES/3452.bugfix @@ -0,0 +1 @@ +Allow kickstart repositories without any "root" repo to be synced. diff --git a/pulp_rpm/app/tasks/synchronizing.py b/pulp_rpm/app/tasks/synchronizing.py index 57cea6322..568cd98ea 100644 --- a/pulp_rpm/app/tasks/synchronizing.py +++ b/pulp_rpm/app/tasks/synchronizing.py @@ -263,47 +263,6 @@ def fetch_mirror(remote): return None -def fetch_remote_url(remote, custom_url=None): - """Fetch a single remote from which can be content synced.""" - - def normalize_url(url_to_normalize): - return url_to_normalize.rstrip("/") + "/" - - url = custom_url or remote.url - - try: - normalized_remote_url = normalize_url(url) - get_repomd_file(remote, normalized_remote_url) - # just check if the metadata exists - return normalized_remote_url - except ClientResponseError as exc: - # If 'custom_url' is passed it is a call from ACS refresh - # which doesn't support mirror lists. - if custom_url: - raise ValueError( - _( - "ACS remote for url '{}' raised an error '{}: {}'. " - "Please check your ACS remote configuration." - ).format(custom_url, exc.status, exc.message) - ) - log.info( - _("Attempting to resolve a true url from potential mirrolist url '{}'").format(url) - ) - remote_url = fetch_mirror(remote) - if remote_url: - log.info( - _("Using url '{}' from mirrorlist in place of the provided url {}").format( - remote_url, url - ) - ) - return normalize_url(remote_url) - - if exc.status == 404: - raise ValueError(_("An invalid remote URL was provided: {}").format(url)) - - raise exc - - def should_optimize_sync(sync_details, last_sync_details): """ Check whether the sync should be optimized by comparing its parameters with the previous sync. @@ -469,9 +428,10 @@ def get_sync_details(remote, url, sync_policy, repository): def is_subrepo(directory): return directory != PRIMARY_REPO - with tempfile.TemporaryDirectory(dir="."): - remote_url = fetch_remote_url(remote, url) + # normalize + remote_url = (url or remote.url).rstrip("/") + "/" + with tempfile.TemporaryDirectory(dir="."): # Find and set up to deal with any subtrees treeinfo = get_treeinfo_data(remote, remote_url) if treeinfo: @@ -506,14 +466,44 @@ def is_subrepo(directory): "repo": sub_repo, } - # Set up to deal with the primary repository - sync_details = get_sync_details(remote, remote_url, sync_policy, repository) - repo_sync_config[PRIMARY_REPO] = { - "should_skip": should_optimize_sync(sync_details, repository.last_sync_details), - "sync_details": sync_details, - "url": remote_url, - "repo": repository, - } + try: + get_repomd_file(remote, remote_url) + except ClientResponseError as exc: + if not treeinfo: + # If 'custom_url' is passed it is a call from ACS refresh + # which doesn't support mirror lists. + if url: + raise ValueError( + _( + "ACS remote for url '{}' raised an error '{}: {}'. " + "Please check your ACS remote configuration." + ).format(custom_url, exc.status, exc.message) + ) + log.info( + _("Attempting to resolve a true url from potential mirrolist url '{}'").format(url) + ) + remote_url = fetch_mirror(remote) + if remote_url: + log.info( + _("Using url '{}' from mirrorlist in place of the provided url {}").format( + remote_url, url + ) + ) + return normalize_url(remote_url) + + if exc.status == 404: + raise ValueError(_("An invalid remote URL was provided: {}").format(url)) + + raise exc + finally: + # Set up to deal with the primary repository + sync_details = get_sync_details(remote, remote_url, sync_policy, repository) + repo_sync_config[PRIMARY_REPO] = { + "should_skip": should_optimize_sync(sync_details, repository.last_sync_details), + "sync_details": sync_details, + "url": remote_url, + "repo": repository, + } # If all repos are exactly the same, we should skip all further processing, even in # metadata-mirror mode @@ -571,6 +561,10 @@ def is_subrepo(directory): with RpmPublication.create( repo_sync_results[PRIMARY_REPO], pass_through=False ) as publication: + # TODO: how do we create a publication if we have no "root" repository version? we need to pick one... it isn't obvious which one + # Or can we pick an empty / the previous repository version of the current repo?" + + # TODO: no new repository version == no autopublish. that's kind of fundamental to how autopublish works currently, I'm not sure how to work around it gpgcheck = repository.repo_config.get("gpgcheck", 0) has_repomd_signature = ( "repodata/repomd.xml.asc" in metadata_files_for_mirroring[str(repository.pk)].keys() diff --git a/pulp_rpm/tests/functional/api/test_sync.py b/pulp_rpm/tests/functional/api/test_sync.py index 9422a9f37..63189fdb1 100644 --- a/pulp_rpm/tests/functional/api/test_sync.py +++ b/pulp_rpm/tests/functional/api/test_sync.py @@ -45,6 +45,7 @@ RPM_COMPLEX_PACKAGE_DATA, RPM_FIXTURE_SUMMARY, RPM_INVALID_FIXTURE_URL, + RPM_NO_ROOT_REPO_TREEINFO_FIXTURE_URL, RPM_KICKSTART_DATA, RPM_KICKSTART_FIXTURE_SUMMARY, RPM_KICKSTART_FIXTURE_URL, @@ -138,6 +139,19 @@ def test_sync_from_invalid_mirror_list_feed(init_and_sync): assert "An invalid remote URL was provided" in exc.value.task.to_dict()["error"]["description"] +@pytest.mark.parallel +def test_sync_from_treeinfo_repo_with_empty_root(init_and_sync): + """Sync RPM content from a remote that has no root repo, but does have subrepos.""" + # test that it is successful if you're not skipping treeinfo + init_and_sync(url=RPM_NO_ROOT_REPO_TREEINFO_FIXTURE_URL, skip_types=["treeinfo"]) + + # test that if you do skip treeinfo, the repo looks empty, and fails + with pytest.raises(PulpTaskError) as exc: + init_and_sync(url=RPM_NO_ROOT_REPO_TREEINFO_FIXTURE_URL, skip_types=["treeinfo"]) + + assert "An invalid remote URL was provided" in exc.value.task.to_dict()["error"]["description"] + + @pytest.mark.parallel def test_sync_modular(init_and_sync): """Sync RPM modular content.""" diff --git a/pulp_rpm/tests/functional/constants.py b/pulp_rpm/tests/functional/constants.py index 5b866efa9..4fcb188c3 100644 --- a/pulp_rpm/tests/functional/constants.py +++ b/pulp_rpm/tests/functional/constants.py @@ -90,6 +90,8 @@ RPM_INVALID_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-missing-filelists/") +RPM_NO_ROOT_REPO_TREEINFO_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-distribution-tree-empty-root/") + RPM_MIRROR_LIST_GOOD_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-mirrorlist-good") RPM_MIRROR_LIST_BAD_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-mirrorlist-bad")