Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for repository-size command #3313

Merged
merged 1 commit into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/3312.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added support for ``repository-size`` management command.
59 changes: 59 additions & 0 deletions pulp_rpm/app/models/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
Artifact,
AsciiArmoredDetachedSigningService,
Content,
ContentArtifact,
Remote,
RemoteArtifact,
Repository,
RepositoryContent,
RepositoryVersion,
Publication,
Distribution,
Expand Down Expand Up @@ -258,6 +261,62 @@ def on_new_version(self, version):
compression_type=self.compression_type,
)

def all_content_pks(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason these all need to be properties as opposed to functions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That one could be a function, but disk_size and on_demand_size need to be properties as that is how it is defined in pulpcore.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make this one and on_demand_artifacts_for_version into normal functions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on_demand_artifacts_for_version has to be a static method as that is how it is in pulpcore.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I misread it as @property. It's fine then

"""Returns a list of pks for all content stored across all versions."""
all_content = (
RepositoryContent.objects.filter(repository=self)
.distinct("content")
.values_list("content")
)
repos = {self.pk}
for dt in DistributionTree.objects.only().filter(pk__in=all_content):
repos.update(dt.repositories().values_list("pk", flat=True))
return (
RepositoryContent.objects.filter(repository__in=repos)
.distinct("content")
.values_list("content")
)

@property
def disk_size(self):
"""Returns the approximate size on disk for all artifacts stored across all versions."""
return (
Artifact.objects.filter(content__in=self.all_content_pks())
.distinct()
.aggregate(size=models.Sum("size", default=0))["size"]
)

@property
def on_demand_size(self):
"""Returns the approximate size of all on-demand artifacts stored across all versions."""
on_demand_ca = ContentArtifact.objects.filter(
content__in=self.all_content_pks(), artifact=None
)
# Aggregate does not work with distinct("fields") so sum must be done manually
ras = RemoteArtifact.objects.filter(
content_artifact__in=on_demand_ca, size__isnull=False
).distinct("content_artifact")
return sum(ras.values_list("size", flat=True))

@staticmethod
def on_demand_artifacts_for_version(version):
"""
Returns the remote artifacts of on-demand content for a repository version.

Override the default behavior to include DistributionTree artifacts from nested repos.
Note: this only returns remote artifacts that have a non-null size.

Args:
version (pulpcore.app.models.RepositoryVersion): to get the remote artifacts for.
Returns:
django.db.models.QuerySet: The remote artifacts that are contained within this version.
"""
content_pks = set(version.content.values_list("pk", flat=True))
for tree in DistributionTree.objects.filter(pk__in=content_pks):
content_pks.update(tree.content().values_list("pk", flat=True))
on_demand_ca = ContentArtifact.objects.filter(content__in=content_pks, artifact=None)
return RemoteArtifact.objects.filter(content_artifact__in=on_demand_ca, size__isnull=False)

@staticmethod
def artifacts_for_version(version):
"""
Expand Down
3 changes: 3 additions & 0 deletions pulp_rpm/tests/functional/api/test_pulpimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ def test_create_missing_repos(
gen_object_with_cleanup,
importers_pulp_imports_api_client,
monitor_task_group,
add_to_cleanup,
):
"""
Tests for PulpImporter and create-missing-repos.
Expand Down Expand Up @@ -392,6 +393,8 @@ def test_create_missing_repos(
# Find the repos we just created
rpm_repo = rpm_repository_api.list(name=saved_entities["rpm-repo"].name).results[0]
ks_repo = rpm_repository_api.list(name=saved_entities["ks-repo"].name).results[0]
add_to_cleanup(rpm_repository_api, rpm_repo.pulp_href)
add_to_cleanup(rpm_repository_api, ks_repo.pulp_href)

# 7. Inspect the results
# Step 7a
Expand Down
71 changes: 71 additions & 0 deletions pulp_rpm/tests/functional/api/test_repo_sizes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import json
import subprocess

from pulp_rpm.tests.functional.constants import (
RPM_UNSIGNED_FIXTURE_URL,
RPM_UNSIGNED_FIXTURE_SIZE,
RPM_KICKSTART_FIXTURE_URL,
RPM_KICKSTART_FIXTURE_SIZE,
)


def test_repo_size(init_and_sync, delete_orphans_pre, monitor_task, orphans_cleanup_api_client):
"""Test that RPM repos correctly report their on-disk artifact sizes."""
monitor_task(orphans_cleanup_api_client.cleanup({"orphan_protection_time": 0}).task)
repo, _ = init_and_sync(url=RPM_UNSIGNED_FIXTURE_URL, policy="on_demand")

cmd = (
"pulpcore-manager",
"repository-size",
"--repositories",
repo.pulp_href,
"--include-on-demand",
)
run = subprocess.run(cmd, capture_output=True, check=True)
out = json.loads(run.stdout)

# Assert basic items of report and test on-demand sizing
assert len(out) == 1
report = out[0]
assert report["name"] == repo.name
assert report["href"] == repo.pulp_href
assert report["disk-size"] == 0
assert report["on-demand-size"] == RPM_UNSIGNED_FIXTURE_SIZE

_, _ = init_and_sync(repository=repo, url=RPM_UNSIGNED_FIXTURE_URL, policy="immediate")
run = subprocess.run(cmd, capture_output=True, check=True)
report = json.loads(run.stdout)[0]
assert report["disk-size"] == RPM_UNSIGNED_FIXTURE_SIZE
assert report["on-demand-size"] == 0


def test_kickstart_repo_size(
init_and_sync, delete_orphans_pre, monitor_task, orphans_cleanup_api_client
):
"""Test that kickstart RPM repos correctly report their on-disk artifact sizes."""
monitor_task(orphans_cleanup_api_client.cleanup({"orphan_protection_time": 0}).task)
repo, _ = init_and_sync(url=RPM_KICKSTART_FIXTURE_URL, policy="on_demand")

cmd = (
"pulpcore-manager",
"repository-size",
"--repositories",
repo.pulp_href,
"--include-on-demand",
)
run = subprocess.run(cmd, capture_output=True, check=True)
out = json.loads(run.stdout)

# Assert basic items of report and test on-demand sizing
assert len(out) == 1
report = out[0]
assert report["name"] == repo.name
assert report["href"] == repo.pulp_href
assert report["disk-size"] == 2275 # One file is always downloaded
assert report["on-demand-size"] == 133810 # Not all remote artifacts have sizes

_, _ = init_and_sync(repository=repo, url=RPM_KICKSTART_FIXTURE_URL, policy="immediate")
run = subprocess.run(cmd, capture_output=True, check=True)
report = json.loads(run.stdout)[0]
assert report["disk-size"] == RPM_KICKSTART_FIXTURE_SIZE
assert report["on-demand-size"] == 0
5 changes: 5 additions & 0 deletions pulp_rpm/tests/functional/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@
"content_summary" field on "../repositories/../versions/../".
"""

RPM_UNSIGNED_FIXTURE_SIZE = 79260
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these numbers manually verified?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I manually added up the sizes on the fixture. I then double checked by performing a sync on an empty system and adding up all the artifact sizes.

"""Size in bytes of all the packages in the :data:`RPM_UNSIGNED_FIXTURE_URL`."""

FEDORA_MIRRORLIST_BASE = "https://mirrors.fedoraproject.org/mirrorlist"
FEDORA_MIRRORLIST_PARAMS = "?repo=epel-modular-8&arch=x86_64&infra=stock&content=centos"
RPM_EPEL_MIRROR_URL = FEDORA_MIRRORLIST_BASE + FEDORA_MIRRORLIST_PARAMS
Expand Down Expand Up @@ -462,6 +465,8 @@
RPM_PACKAGELANGPACKS_CONTENT_NAME: 1,
}

RPM_KICKSTART_FIXTURE_SIZE = 9917733

RPM_KICKSTART_REPOSITORY_ROOT_CONTENT = [
".treeinfo",
"Dolphin/",
Expand Down
Loading