Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix syncing kickstart repositories with no "root" repo #3481

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/3452.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow kickstart repositories without any "root" repo to be synced.
96 changes: 45 additions & 51 deletions pulp_rpm/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,47 +263,6 @@ def fetch_mirror(remote):
return None


def fetch_remote_url(remote, custom_url=None):
"""Fetch a single remote from which can be content synced."""

def normalize_url(url_to_normalize):
return url_to_normalize.rstrip("/") + "/"

url = custom_url or remote.url

try:
normalized_remote_url = normalize_url(url)
get_repomd_file(remote, normalized_remote_url)
# just check if the metadata exists
return normalized_remote_url
except ClientResponseError as exc:
# If 'custom_url' is passed it is a call from ACS refresh
# which doesn't support mirror lists.
if custom_url:
raise ValueError(
_(
"ACS remote for url '{}' raised an error '{}: {}'. "
"Please check your ACS remote configuration."
).format(custom_url, exc.status, exc.message)
)
log.info(
_("Attempting to resolve a true url from potential mirrolist url '{}'").format(url)
)
remote_url = fetch_mirror(remote)
if remote_url:
log.info(
_("Using url '{}' from mirrorlist in place of the provided url {}").format(
remote_url, url
)
)
return normalize_url(remote_url)

if exc.status == 404:
raise ValueError(_("An invalid remote URL was provided: {}").format(url))

raise exc


def should_optimize_sync(sync_details, last_sync_details):
"""
Check whether the sync should be optimized by comparing its parameters with the previous sync.
Expand Down Expand Up @@ -469,9 +428,10 @@ def get_sync_details(remote, url, sync_policy, repository):
def is_subrepo(directory):
return directory != PRIMARY_REPO

with tempfile.TemporaryDirectory(dir="."):
remote_url = fetch_remote_url(remote, url)
# normalize
remote_url = (url or remote.url).rstrip("/") + "/"

with tempfile.TemporaryDirectory(dir="."):
# Find and set up to deal with any subtrees
treeinfo = get_treeinfo_data(remote, remote_url)
if treeinfo:
Expand Down Expand Up @@ -506,14 +466,44 @@ def is_subrepo(directory):
"repo": sub_repo,
}

# Set up to deal with the primary repository
sync_details = get_sync_details(remote, remote_url, sync_policy, repository)
repo_sync_config[PRIMARY_REPO] = {
"should_skip": should_optimize_sync(sync_details, repository.last_sync_details),
"sync_details": sync_details,
"url": remote_url,
"repo": repository,
}
try:
get_repomd_file(remote, remote_url)
except ClientResponseError as exc:
if not treeinfo:
# If 'custom_url' is passed it is a call from ACS refresh
# which doesn't support mirror lists.
if url:
raise ValueError(
_(
"ACS remote for url '{}' raised an error '{}: {}'. "
"Please check your ACS remote configuration."
).format(custom_url, exc.status, exc.message)
)
log.info(
_("Attempting to resolve a true url from potential mirrolist url '{}'").format(url)
)
remote_url = fetch_mirror(remote)
if remote_url:
log.info(
_("Using url '{}' from mirrorlist in place of the provided url {}").format(
remote_url, url
)
)
return normalize_url(remote_url)

if exc.status == 404:
raise ValueError(_("An invalid remote URL was provided: {}").format(url))

raise exc
finally:
# Set up to deal with the primary repository
sync_details = get_sync_details(remote, remote_url, sync_policy, repository)
repo_sync_config[PRIMARY_REPO] = {
"should_skip": should_optimize_sync(sync_details, repository.last_sync_details),
"sync_details": sync_details,
"url": remote_url,
"repo": repository,
}

# If all repos are exactly the same, we should skip all further processing, even in
# metadata-mirror mode
Expand Down Expand Up @@ -571,6 +561,10 @@ def is_subrepo(directory):
with RpmPublication.create(
repo_sync_results[PRIMARY_REPO], pass_through=False
) as publication:
# TODO: how do we create a publication if we have no "root" repository version? we need to pick one... it isn't obvious which one
# Or can we pick an empty / the previous repository version of the current repo?"

# TODO: no new repository version == no autopublish. that's kind of fundamental to how autopublish works currently, I'm not sure how to work around it
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ipanova @ggainey

What do you think the most reasonable answers are, here. This second question I'm not even sure we can fix without a rework of autopublish which would fall under a separate issue.

gpgcheck = repository.repo_config.get("gpgcheck", 0)
has_repomd_signature = (
"repodata/repomd.xml.asc" in metadata_files_for_mirroring[str(repository.pk)].keys()
Expand Down
14 changes: 14 additions & 0 deletions pulp_rpm/tests/functional/api/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
RPM_COMPLEX_PACKAGE_DATA,
RPM_FIXTURE_SUMMARY,
RPM_INVALID_FIXTURE_URL,
RPM_NO_ROOT_REPO_TREEINFO_FIXTURE_URL,
RPM_KICKSTART_DATA,
RPM_KICKSTART_FIXTURE_SUMMARY,
RPM_KICKSTART_FIXTURE_URL,
Expand Down Expand Up @@ -138,6 +139,19 @@ def test_sync_from_invalid_mirror_list_feed(init_and_sync):
assert "An invalid remote URL was provided" in exc.value.task.to_dict()["error"]["description"]


@pytest.mark.parallel
def test_sync_from_treeinfo_repo_with_empty_root(init_and_sync):
"""Sync RPM content from a remote that has no root repo, but does have subrepos."""
# test that it is successful if you're not skipping treeinfo
init_and_sync(url=RPM_NO_ROOT_REPO_TREEINFO_FIXTURE_URL, skip_types=["treeinfo"])

# test that if you do skip treeinfo, the repo looks empty, and fails
with pytest.raises(PulpTaskError) as exc:
init_and_sync(url=RPM_NO_ROOT_REPO_TREEINFO_FIXTURE_URL, skip_types=["treeinfo"])

assert "An invalid remote URL was provided" in exc.value.task.to_dict()["error"]["description"]


@pytest.mark.parallel
def test_sync_modular(init_and_sync):
"""Sync RPM modular content."""
Expand Down
2 changes: 2 additions & 0 deletions pulp_rpm/tests/functional/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@

RPM_INVALID_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-missing-filelists/")

RPM_NO_ROOT_REPO_TREEINFO_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-distribution-tree-empty-root/")

RPM_MIRROR_LIST_GOOD_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-mirrorlist-good")
RPM_MIRROR_LIST_BAD_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-mirrorlist-bad")

Expand Down
Loading