Skip to content

Commit

Permalink
WIP Implement compact_index and add digest to gem
Browse files Browse the repository at this point in the history
fixes pulp#96
  • Loading branch information
mdellweg committed Jun 15, 2023
1 parent 2735502 commit dd3c564
Show file tree
Hide file tree
Showing 8 changed files with 330 additions and 87 deletions.
1 change: 1 addition & 0 deletions CHANGES/96.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implement new compact_index format. Add checksum and dependency information to gem content.
3 changes: 3 additions & 0 deletions CHANGES/96.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Disable synching without compact index format. Existing on-demand content will be broken after this release.

TODO: Provide a data repair command.
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Generated by Django 4.2.1 on 2023-06-14 14:53

import django.contrib.postgres.fields.hstore
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
dependencies = [
("core", "0107_distribution_hidden"),
("gem", "0005_rename_gemcontent_shallowgemcontent"),
]

operations = [
migrations.AddField(
model_name="gemremote",
name="excludes",
field=django.contrib.postgres.fields.hstore.HStoreField(null=True),
),
migrations.AddField(
model_name="gemremote",
name="includes",
field=django.contrib.postgres.fields.hstore.HStoreField(null=True),
),
migrations.AddField(
model_name="gemremote",
name="prereleases",
field=models.BooleanField(default=False),
),
migrations.CreateModel(
name="GemContent",
fields=[
(
"content_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="core.content",
),
),
("name", models.TextField()),
("version", models.TextField()),
("checksum", models.CharField(db_index=True, max_length=64)),
("dependencies", django.contrib.postgres.fields.hstore.HStoreField(default=dict)),
("required_ruby_version", models.TextField(null=True)),
("required_rubygems_version", models.TextField(null=True)),
("prerelease", models.BooleanField(default=False)),
],
options={
"default_related_name": "%(app_label)s_%(model_name)s",
"unique_together": {("name", "version", "checksum")},
},
bases=("core.content",),
),
]
47 changes: 45 additions & 2 deletions pulp_gem/app/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from logging import getLogger

from django.contrib.postgres.fields import HStoreField
from django.db import models

from pulpcore.plugin.models import (
Expand Down Expand Up @@ -46,7 +47,45 @@ class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = ("name", "version")

GemContent = ShallowGemContent

class GemContent(Content):
"""
The "gem" content type.
Content of this type represents a ruby gem file
with its spec data.
Fields:
name (str): The name of the gem.
version (str): The version of the gem.
"""

TYPE = "gem"
repo_key_fields = ("name", "version")

name = models.TextField(blank=False, null=False)
version = models.TextField(blank=False, null=False)
checksum = models.CharField(max_length=64, null=False, db_index=True)
prerelease = models.BooleanField(default=False)
dependencies = HStoreField(default=dict)
required_ruby_version = models.TextField(null=True)
required_rubygems_version = models.TextField(null=True)

@property
def relative_path(self):
"""The relative path this gem is stored under for the content app."""
return f"gems/{self.name}-{self.version}.gem"

@property
def gemspec_path(self):
"""The path for this gem's gemspec for the content app."""
return f"quick/Marshal.4.8/{self.name}-{self.version}.gemspec.rz"

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = ("name", "version", "checksum")


class GemDistribution(Distribution):
"""
Expand Down Expand Up @@ -77,6 +116,10 @@ class GemRemote(Remote):

TYPE = "gem"

prereleases = models.BooleanField(default=False)
includes = HStoreField(null=True)
excludes = HStoreField(null=True)

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"

Expand All @@ -87,7 +130,7 @@ class GemRepository(Repository):
"""

TYPE = "gem"
CONTENT_TYPES = [ShallowGemContent]
CONTENT_TYPES = [GemContent, ShallowGemContent]
REMOTE_TYPES = [GemRemote]

class Meta:
Expand Down
30 changes: 21 additions & 9 deletions pulp_gem/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import os

from rest_framework.serializers import (
BooleanField,
CharField,
ChoiceField,
FileField,
HStoreField,
HyperlinkedRelatedField,
ValidationError,
)
Expand Down Expand Up @@ -72,6 +74,10 @@ class GemContentSerializer(MultipleArtifactContentSerializer):
)
name = CharField(help_text=_("Name of the gem"), read_only=True)
version = CharField(help_text=_("Version of the gem"), read_only=True)
prerelease = BooleanField(help_text=_("Whether the gem is a prerelease"), read_only=True)
dependencies = HStoreField(read_only=True)
required_ruby_version = CharField(help_text=_("Required ruby version of the gem"), read_only=True)
required_rubygems_version = CharField(help_text=_("Required rubygems version of the gem"), read_only=True)

def __init__(self, *args, **kwargs):
"""Initializer for GemContentSerializer."""
Expand All @@ -98,23 +104,25 @@ def deferred_validate(self, data):
"""Validate the GemContent data (deferred)."""
artifact = data.pop("artifact")

name, version, spec_data = analyse_gem(artifact.file)
relative_path = os.path.join("gems", name + "-" + version + ".gem")
gem_info, spec_data = analyse_gem(artifact.file)
relative_path = os.path.join("gems", gem_info["name"] + "-" + gem_info["version"] + ".gem")

spec_artifact = _artifact_from_data(spec_data)
spec_relative_path = os.path.join("quick/Marshal.4.8", name + "-" + version + ".gemspec.rz")
spec_relative_path = os.path.join(
"quick/Marshal.4.8", gem_info["name"] + "-" + gem_info["version"] + ".gemspec.rz"
)

data["name"] = name
data["version"] = version
data.update(gem_info)
data["artifacts"] = {relative_path: artifact, spec_relative_path: spec_artifact}
data["checksum"] = artifact.sha256

# Validate uniqueness
content = GemContent.objects.filter(name=name, version=version)
content = GemContent.objects.filter(checksum=data["checksum"])
if content.exists():
raise ValidationError(
_(
"There is already a gem content with name '{name}' and version '{version}'."
).format(name=name, version=version)
_("There is already a gem content with that artifact.").format(
name=name, version=version
)
)

return data
Expand Down Expand Up @@ -142,6 +150,10 @@ class Meta:
"repository",
"name",
"version",
"prerelease",
"dependencies",
"required_ruby_version",
"required_rubygems_version",
)
model = GemContent

Expand Down
48 changes: 47 additions & 1 deletion pulp_gem/app/tasks/publishing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import datetime
import logging
import re
import gzip
import os
import shutil

from gettext import gettext as _
Expand Down Expand Up @@ -53,6 +55,21 @@ def _publish_specs(specs, relative_path, publication):
specs_metadata_gz.save()


def _publish_compact_index(lines, relative_path, publication, timestamp=False):
with open(relative_path, "w") as fp:
if timestamp:
timestamp = datetime.datetime.utcnow().isoformat(timespec="seconds")
fp.write(f"created_at: {timestamp}Z\n")
fp.write("---\n")
for line in lines:
fp.write(line + "\n")
metadata = PublishedMetadata.create_from_file(
publication=publication, file=File(open(relative_path, "rb"))
)
metadata.save()
return metadata


def _create_index(publication, path="", links=None):
links = links or []
links = (li if li.endswith("/") else str(Path(li).relative_to(path)) for li in links)
Expand Down Expand Up @@ -110,6 +127,30 @@ def publish(repository_version_pk):
_publish_specs(specs, "specs.4.8", publication)
_publish_specs(latest_specs, "latest_specs.4.8", publication)
_publish_specs(prerelease_specs, "prerelease_specs.4.8", publication)

# compact_inde
gems_qs = GemContent.objects.filter(pk__in=publication.repository_version.content)
names_qs = gems_qs.order_by("name").values_list("name", flat=True).distinct()
_publish_compact_index(names_qs, "names", publication)

versions_lines = []
os.mkdir("info")
for name in names_qs:
lines = []
for gem in gems_qs.filter(name=name):
deps = ",".join((f"{key}: {value}" for key, value in gem.dependencies.items()))
line = f"{gem.version} {deps}|checksum:{gem.checksum}"
if gem.required_ruby_version:
line += f",ruby:{gem.required_ruby_version}"
if gem.required_rubygems_version:
line += f",rubygems:{gem.required_rubygems_version}"
lines.append(line)
info_metadata = _publish_compact_index(lines, f"info/{name}", publication)
versions = ",".join(gems_qs.filter(name=name).values_list("version", flat=True))
md5_sum = info_metadata._artifacts.first().md5
versions_lines.append(f"{name} {versions} {md5_sum}")
_publish_compact_index(versions_lines, "versions", publication, timestamp=True)

_create_index(
publication,
path="",
Expand All @@ -119,10 +160,15 @@ def publish(repository_version_pk):
"specs.4.8",
"latest_specs.4.8",
"prerelease_specs.4.8",
"names",
"versions",
"info/",
],
)
_create_index(publication, path="gems/", links=gems)
_create_index(publication, path="quick/", links=[])
_create_index(publication, path="quick/", links=["quick/Marshal.4.8/"])
_create_index(publication, path="quick/Marshal.4.8/", links=gemspecs)
_create_index(publication, path="info/", links=(f"info/{name}" for name in names_qs))


log.info(_("Publication: {publication} created").format(publication=publication.pk))
Loading

0 comments on commit dd3c564

Please sign in to comment.