Skip to content

Commit

Permalink
Add mirror option to optimize sync
Browse files Browse the repository at this point in the history
[noissue]
  • Loading branch information
hstct committed Feb 27, 2024
1 parent adabeae commit 9b46441
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 13 deletions.
96 changes: 90 additions & 6 deletions pulp_deb/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,43 @@
log = logging.getLogger(__name__)


def get_distribution_release_file_artifact_set_sha256(distribution, remote):
log.info(_('Downloading Release file for distribution: "{}"').format(distribution))
if distribution[-1] == "/":
release_file_dir = distribution.strip("/")
else:
release_file_dir = os.path.join("dists", distribution)

release_file_info_serialized = {}
base_url = os.path.join(remote.url, release_file_dir)
for filename in ReleaseFile.SUPPORTED_ARTIFACTS:
url = os.path.join(base_url, filename)
log.info(f"DOWNLOADER URL: {url}")
downloader = remote.get_downloader(url=url)
try:
result = downloader.fetch()
except FileNotFoundError:
continue
sha256 = result.artifact_attributes["sha256"]
release_file_info_serialized[filename] = sha256

hash_string = ""
for filename, sha256 in release_file_info_serialized.items():
hash_string = hash_string + filename + "," + sha256 + "\n"

return hashlib.sha256(hash_string.encode("utf-8")).hexdigest()


def get_previous_release_file(previous_version, distribution):
previous_release_file_qs = previous_version.get_content(
ReleaseFile.objects.filter(distribution=distribution)
)
if previous_release_file_qs.count() > 1:
message = "Previous ReleaseFile count: {}. There should only be one."
raise Exception(message.format(previous_release_file_qs.count()))
return previous_release_file_qs.first()


class NoReleaseFile(Exception):
"""
Exception to signal, that no file representing a release is present.
Expand Down Expand Up @@ -185,6 +222,24 @@ def synchronize(remote_pk, repository_pk, mirror, optimize):
if not remote.url:
raise ValueError(_("A remote must have a url specified to synchronize."))

should_skip = False
for dist in remote.distributions.split():
artifact_set_sha256 = get_distribution_release_file_artifact_set_sha256(dist, remote)
previous_release_file = get_previous_release_file(previous_repo_version, dist)
if previous_release_file and previous_release_file.artifact_set_sha256 == artifact_set_sha256:
should_skip = True
break


if should_skip:
log.info("No change in ReleaseFiles detected. Skipping sync.")
with ProgressReport(
message="Skipping sync (no changes for any ReleaseFile)",
code="sync.complete_skip.was_skipped",
) as pb:
asyncio.run(pb.aincrement())
return

first_stage = DebFirstStage(remote, optimize, mirror, previous_repo_version)
DebDeclarativeVersion(first_stage, repository, mirror=mirror).create()

Expand Down Expand Up @@ -564,12 +619,6 @@ def __init__(self, remote, optimize, mirror, previous_repo_version, *args, **kwa
elif mirror and not previous_sync_info["sync_options"]["mirror"]:
log.info(_("Setting optimize=False since this sync switches to mirror=True."))
self.optimize = False
# TODO: https://github.com/pulp/pulp_deb/issues/631
if mirror:
log.info(_("Falling back to optimize=False behaviour since mirror=True is set!"))
log.info(_("See https://github.com/pulp/pulp_deb/issues/631 for more information."))
self.optimize = False
self.sync_info["sync_options"]["optimize"] = False

async def run(self):
"""
Expand Down Expand Up @@ -612,6 +661,41 @@ def _gen_remote_options(self):
"ignore_missing_package_indices": self.remote.ignore_missing_package_indices,
}

async def _create_release_file(self, distribution):
log.info(_('Downloading Release file for distribution: "{}"').format(distribution))
# Create release_file
if distribution[-1] == "/":
release_file_dir = distribution.strip("/")
else:
release_file_dir = os.path.join("dists", distribution)
release_file_dc = DeclarativeContent(
content=ReleaseFile(distribution=distribution, relative_path=release_file_dir),
d_artifacts=[
self._to_d_artifact(os.path.join(release_file_dir, filename))
for filename in ReleaseFile.SUPPORTED_ARTIFACTS
],
)
release_file = await self._create_unit(release_file_dc)
if release_file is None:
return
if self.optimize:
previous_release_file = await _get_previous_release_file(
self.previous_repo_version, distribution
)
if previous_release_file.artifact_set_sha256 == release_file.artifact_set_sha256:
await _readd_previous_package_indices(
self.previous_repo_version, self.new_version, distribution
)
message = 'ReleaseFile has not changed for distribution="{}". Skipping'
log.info(_(message).format(distribution))
async with ProgressReport(
message="Skipping ReleaseFile sync (no change from previous sync)",
code="sync.release_file.was_skipped",
) as pb:
await pb.aincrement()
return
return release_file

async def _handle_distribution(self, distribution):
log.info(_('Downloading Release file for distribution: "{}"').format(distribution))
# Create release_file
Expand Down
35 changes: 28 additions & 7 deletions pulp_deb/tests/functional/api/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
DEB_INSTALLER_SOURCE_FIXTURE_SUMMARY,
DEB_REPORT_CODE_SKIP_PACKAGE,
DEB_REPORT_CODE_SKIP_RELEASE,
DEB_REPORT_CODE_SKIP_COMPLETE,
DEB_SIGNING_KEY,
)
from pulp_deb.tests.functional.utils import get_counts_from_content_summary
Expand Down Expand Up @@ -47,7 +48,7 @@ def test_sync(

# Verify latest `repository_version` is 1 and sync was not skipped
assert repo.latest_version_href.endswith("/1/")
assert not is_sync_skipped(task, DEB_REPORT_CODE_SKIP_RELEASE)
assert not is_sync_skipped(task, DEB_REPORT_CODE_SKIP_COMPLETE)

# Verify that the repo content and added content matches the summary
assert get_counts_from_content_summary(summary.present) == fixture_summary
Expand All @@ -60,7 +61,7 @@ def test_sync(

# Verify that the latest `repository_version` is still 1 and sync was skipped
assert repo.latest_version_href.endswith("/1/")
assert is_sync_skipped(task_skip, DEB_REPORT_CODE_SKIP_RELEASE)
assert is_sync_skipped(task_skip, DEB_REPORT_CODE_SKIP_COMPLETE)

# Verify that the repo content still matches the summary
assert get_counts_from_content_summary(summary.present) == fixture_summary
Expand Down Expand Up @@ -119,6 +120,7 @@ def test_sync_missing_package_indices(
"repo_name, remote_args, expected",
[
("http://i-am-an-invalid-url.com/invalid/", {}, ["Cannot connect"]),
# EXPECTED MESSAGE IS NOW WRONG
(
DEB_FIXTURE_STANDARD_REPOSITORY_NAME,
{"distributions": "no_dist"},
Expand Down Expand Up @@ -165,6 +167,7 @@ def test_sync_invalid_cases(

@pytest.mark.parallel
@pytest.mark.parametrize(
# FIRST TWO NOW FAIL -> NO PACKAGE SKIP ONLY POSSIBLE
"remote_name, remote_args, remote_diff_name, remote_diff_args",
[
(
Expand Down Expand Up @@ -311,9 +314,7 @@ def test_sync_optimize_skip_unchanged_package_index(


@pytest.mark.parallel
def test_sync_optimize_switch_to_no_mirror(
deb_init_and_sync,
):
def test_sync_optimize_switch_to_no_mirror(deb_init_and_sync):
"""
Test that when syncing a repo with mirror=True, and then re-syncing that repo with
mirror=False, optimize=True, the releases will be skipped by optimize mode.
Expand All @@ -322,13 +323,33 @@ def test_sync_optimize_switch_to_no_mirror(
sync_args = {"mirror": True}
repo, remote, task = deb_init_and_sync(sync_args=sync_args, return_task=True)
assert repo.latest_version_href.endswith("/1/")
assert not is_sync_skipped(task, DEB_REPORT_CODE_SKIP_RELEASE)
assert not is_sync_skipped(task, DEB_REPORT_CODE_SKIP_COMPLETE)
sync_args = {"optimize": True, "mirror": False}
repo, _, task = deb_init_and_sync(
repository=repo, remote=remote, sync_args=sync_args, return_task=True
)
assert repo.latest_version_href.endswith("/1/")
assert is_sync_skipped(task, DEB_REPORT_CODE_SKIP_RELEASE)
assert is_sync_skipped(task, DEB_REPORT_CODE_SKIP_COMPLETE)


@pytest.mark.parallel
def test_sync_optimize_with_mirror_enabled(deb_init_and_sync):
"""Test if enabling mirror sync option will skip syncing (optimize) on resync."""

sync_args = {"mirror": True}
repo, remote, task = deb_init_and_sync(sync_args=sync_args, return_task=True)
assert repo.latest_version_href.endswith("/1/")
assert not is_sync_skipped(task, DEB_REPORT_CODE_SKIP_COMPLETE)

# resync
repo, _, task = deb_init_and_sync(
repository=repo,
remote=remote,
sync_args=sync_args,
return_task=True
)
assert repo.latest_version_href.endswith("/1/")
assert is_sync_skipped(task, DEB_REPORT_CODE_SKIP_COMPLETE)


def test_sync_orphan_cleanup_fail(
Expand Down
1 change: 1 addition & 0 deletions pulp_deb/tests/functional/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def _clean_dict(d):

DEB_FIXTURE_PACKAGE_COUNT = DEB_FIXTURE_SUMMARY.get(DEB_PACKAGE_NAME, 0)

DEB_REPORT_CODE_SKIP_COMPLETE = "sync.complete_skip.was_skipped"
DEB_REPORT_CODE_SKIP_RELEASE = "sync.release_file.was_skipped"
DEB_REPORT_CODE_SKIP_PACKAGE = "sync.package_index.was_skipped"

Expand Down

0 comments on commit 9b46441

Please sign in to comment.