Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serve previously published artifacts for up to 3 days #925

Merged
merged 1 commit into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES/911.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added feature to serve published artifacts from previous publications for 3 days.
This fulfills the apt-by-hash/acquire-by-hash spec by allowing by-hash files to be cached for a
period of 3 days.
50 changes: 50 additions & 0 deletions pulp_deb/app/migrations/0029_distributedpublication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Generated by Django 4.2.2 on 2024-01-03 18:58

from django.db import migrations, models
import django.db.models.deletion
import django_lifecycle.mixins
import pulpcore.app.models.base


class Migration(migrations.Migration):

dependencies = [
("core", "0114_remove_task_args_remove_task_kwargs"),
("deb", "0028_sourcepackage_sourcepackagereleasecomponent_and_more"),
]

operations = [
migrations.CreateModel(
name="DistributedPublication",
fields=[
(
"pulp_id",
models.UUIDField(
default=pulpcore.app.models.base.pulp_uuid,
editable=False,
primary_key=True,
serialize=False,
),
),
("pulp_created", models.DateTimeField(auto_now_add=True)),
("pulp_last_updated", models.DateTimeField(auto_now=True, null=True)),
("expires_at", models.DateTimeField(null=True)),
(
"distribution",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="core.distribution"
),
),
(
"publication",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="core.publication"
),
),
],
options={
"abstract": False,
},
bases=(django_lifecycle.mixins.LifecycleModelMixin, models.Model),
),
]
96 changes: 95 additions & 1 deletion pulp_deb/app/models/publication.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,40 @@
from contextlib import suppress
from datetime import timedelta

from django.db import models
from django.utils import timezone
from django_lifecycle import hook, AFTER_CREATE, AFTER_UPDATE

from pulpcore.plugin.models import Publication, Distribution
from pulpcore.plugin.models import (
BaseModel,
Distribution,
Publication,
PublishedArtifact,
RepositoryVersion,
)

from pulp_deb.app.models.signing_service import AptReleaseSigningService


BOOL_CHOICES = [(True, "yes"), (False, "no")]
PUBLICATION_CACHE_DURATION = timedelta(days=3)


def latest_publication(repo_pk):
"""
Find the latest publication for a repository.

This function is based on the logic in pulpcore's content handler.

https://github.com/pulp/pulpcore/blob/3bfd35c76e29944b622d275be52c0d5ebbdfbf72/pulpcore/content/handler.py#L601-L607
"""
versions = RepositoryVersion.objects.filter(repository=repo_pk)
with suppress(Publication.DoesNotExist):
return (
Publication.objects.filter(repository_version__in=versions, complete=True)
.latest("repository_version", "pulp_created")
.cast()
)


class VerbatimPublication(Publication):
Expand All @@ -17,6 +46,12 @@ class VerbatimPublication(Publication):

TYPE = "verbatim-publication"

@hook(AFTER_UPDATE, when="complete", has_changed=True, is_now=True)
def set_distributed_publication(self):
for distro in AptDistribution.objects.filter(repository__pk=self.repository.pk):
if self == latest_publication(self.repository.pk):
DistributedPublication(distribution=distro, publication=self).save()

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"

Expand All @@ -36,6 +71,12 @@ class AptPublication(Publication):
AptReleaseSigningService, on_delete=models.PROTECT, null=True
)

@hook(AFTER_UPDATE, when="complete", has_changed=True, is_now=True)
def set_distributed_publication(self):
for distro in AptDistribution.objects.filter(repository__pk=self.repository.pk):
if self == latest_publication(self.repository.pk):
DistributedPublication(distribution=distro, publication=self).save()

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"

Expand All @@ -48,5 +89,58 @@ class AptDistribution(Distribution):
TYPE = "apt-distribution"
SERVE_FROM_PUBLICATION = True

@hook(AFTER_CREATE)
@hook(AFTER_UPDATE, when="publication", has_changed=True, is_not=None)
@hook(AFTER_UPDATE, when="repository", has_changed=True, is_not=None)
def set_distributed_publication(self):
if self.publication:
DistributedPublication(distribution=self, publication=self.publication)
elif self.repository:
if publication := latest_publication(self.repository):
DistributedPublication(distribution=self, publication=publication).save()

def content_handler(self, path):
recent_dp = self.distributedpublication_set.filter(
models.Q(expires_at__gte=timezone.now()) | models.Q(expires_at__isnull=True)
).order_by("pulp_created")
pa = (
PublishedArtifact.objects.filter(
relative_path=path, publication__distributedpublication__pk__in=recent_dp
)
.order_by("-publication__distributedpublication__pulp_created")
.select_related(
"content_artifact",
"content_artifact__artifact",
)
).first()

if pa:
return pa.content_artifact
return

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"


class DistributedPublication(BaseModel):
"""
Represents a history of distributed publications.

This allows the content handler to serve a previous Publication's content for a set period of
time.

When a new Publication is served by a Distribution, it creates a new DistributionPublication and
sets the expires_at field on any existing DistributionPublications.
"""

distribution = models.ForeignKey(Distribution, on_delete=models.CASCADE)
publication = models.ForeignKey(Publication, on_delete=models.CASCADE)
expires_at = models.DateTimeField(null=True)

@hook(AFTER_CREATE)
def cleanup(self):
"""Set expires_at on any older DistributedPublication and cleanup any expired ones."""
DistributedPublication.objects.filter(expires_at__lt=timezone.now()).delete()
DistributedPublication.objects.exclude(pk=self.pk).filter(
distribution=self.distribution, expires_at__isnull=True
).update(expires_at=(timezone.now() + PUBLICATION_CACHE_DURATION))
7 changes: 5 additions & 2 deletions pulp_deb/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,17 @@ def apt_repository_versions_api(apt_client):
def deb_distribution_factory(apt_distribution_api, gen_object_with_cleanup):
"""Fixture that generates a deb distribution with cleanup from a given publication."""

def _deb_distribution_factory(publication):
def _deb_distribution_factory(publication=None, repository=None):
"""Create a deb distribution.

:param publication: The publication the distribution is based on.
:returns: The created distribution.
"""
body = gen_distribution()
body["publication"] = publication.pulp_href
if publication:
body["publication"] = publication.pulp_href
if repository:
body["repository"] = repository.pulp_href
return gen_object_with_cleanup(apt_distribution_api, body)

return _deb_distribution_factory
Expand Down
34 changes: 34 additions & 0 deletions pulp_deb/tests/functional/api/test_download_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,37 @@ def test_download_content(
content = download_content_unit(distribution.base_path, unit_path[1])
pulp_hashes.append(hashlib.sha256(content).hexdigest())
assert fixtures_hashes == pulp_hashes


@pytest.mark.parallel
def test_download_cached_content(
deb_init_and_sync,
deb_distribution_factory,
deb_publication_factory,
deb_fixture_server,
download_content_unit,
http_get,
deb_get_content_types,
deb_modify_repository,
):
"""Verify that previously published content can still be downloaded."""
# Create/sync a repo and then a distro
repo, _ = deb_init_and_sync()
distribution = deb_distribution_factory(repository=repo)
deb_publication_factory(repo, structured=True, simple=True)

# Find a random package and get its hash digest
package_content = deb_get_content_types("apt_package_api", DEB_PACKAGE_NAME, repo)
package = choice(package_content)
url = deb_fixture_server.make_url(DEB_FIXTURE_STANDARD_REPOSITORY_NAME)
package_hash = hashlib.sha256(http_get(urljoin(url, package.relative_path))).hexdigest()

# Remove content and republish
deb_modify_repository(repo, {"remove_content_units": ["*"]})
deb_publication_factory(repo, structured=True, simple=True)

# Download the package and check its checksum
content = download_content_unit(distribution.base_path, package.relative_path)
content_hash = hashlib.sha256(content).hexdigest()

assert package_hash == content_hash
Loading