Skip to content

Commit

Permalink
WIP Implement compact_index and add digest to gem
Browse files Browse the repository at this point in the history
fixes pulp#96
  • Loading branch information
mdellweg committed Jun 14, 2023
1 parent cefb651 commit 62926b0
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 41 deletions.
1 change: 1 addition & 0 deletions CHANGES/96.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implement new compact_index format. Add checksum and dependency information to gem content.
3 changes: 3 additions & 0 deletions CHANGES/96.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Disable synching without compact index format. Existing on-demand content will be broken after this release.

TODO: Provide a data repair command.
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 4.2.1 on 2023-06-12 16:50

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("gem", "0004_alter_gemcontent_content_ptr_and_more"),
]

operations = [
migrations.AlterUniqueTogether(
name="gemcontent",
unique_together=set(),
),
migrations.AddField(
model_name="gemcontent",
name="checksum",
field=models.CharField(db_index=True, default=None, max_length=64),
preserve_default=False,
),
migrations.AlterUniqueTogether(
name="gemcontent",
unique_together={("name", "version", "checksum")},
),
]
11 changes: 10 additions & 1 deletion pulp_gem/app/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from logging import getLogger

from django.contrib.postgres.fields import HStoreField
from django.db import models

from pulpcore.plugin.models import (
Expand All @@ -14,6 +15,9 @@
log = getLogger(__name__)


# TODO: Add ShallowGemContent for old content without checksums.
# Decide whether we need to carry it forever or whether we can assume all upstreams will use
# compact index eventually.
class GemContent(Content):
"""
The "gem" content type.
Expand All @@ -28,9 +32,14 @@ class GemContent(Content):
"""

TYPE = "gem"
repo_key_fields = ("name", "version")

name = models.TextField(blank=False, null=False)
version = models.TextField(blank=False, null=False)
checksum = models.CharField(max_length=64, null=False, db_index=True)
dependencies = HStoreField(default=dict)
required_ruby_version = models.TextField(null=True)
required_rubygems_version = models.TextField(null=True)

@property
def relative_path(self):
Expand All @@ -44,7 +53,7 @@ def gemspec_path(self):

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = ("name", "version")
unique_together = ("name", "version", "checksum")


class GemDistribution(Distribution):
Expand Down
88 changes: 48 additions & 40 deletions pulp_gem/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import os

from gettext import gettext as _
from urllib.parse import urlparse, urlunparse
from urllib.parse import urlparse, urlunparse, urljoin

from asgiref.sync import sync_to_async
from django.conf import settings

from pulpcore.plugin.models import Artifact, ProgressReport, Remote, Repository
from pulpcore.plugin.stages import (
Expand All @@ -20,7 +21,7 @@
)

from pulp_gem.app.models import GemContent, GemRemote
from pulp_gem.specs import read_specs
from pulp_gem.specs import read_specs, read_versions, read_info


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -102,44 +103,51 @@ async def run(self):
# Interpret policy to download Artifacts or not
deferred_download = self.remote.policy != Remote.IMMEDIATE

async with ProgressReport(message="Downloading Metadata") as progress:
parsed_url = urlparse(self.remote.url)
root_dir = parsed_url.path
specs_path = os.path.join(root_dir, "specs.4.8.gz")
specs_url = urlunparse(parsed_url._replace(path=specs_path))
downloader = self.remote.get_downloader(url=specs_url)
result = await downloader.run()
await progress.aincrement()

async with ProgressReport(message="Parsing Metadata") as progress:
for key in read_specs(result.path):
relative_path = os.path.join("gems", key.name + "-" + key.version + ".gem")
path = os.path.join(root_dir, relative_path)
url = urlunparse(parsed_url._replace(path=path))

spec_relative_path = os.path.join(
"quick/Marshal.4.8", key.name + "-" + key.version + ".gemspec.rz"
)
spec_path = os.path.join(root_dir, spec_relative_path)
spec_url = urlunparse(parsed_url._replace(path=spec_path))
gem = GemContent(name=key.name, version=key.version)
da_gem = DeclarativeArtifact(
artifact=Artifact(),
url=url,
relative_path=relative_path,
remote=self.remote,
deferred_download=deferred_download,
)
da_spec = DeclarativeArtifact(
artifact=Artifact(),
url=spec_url,
relative_path=spec_relative_path,
remote=self.remote,
deferred_download=deferred_download,
)
dc = DeclarativeContent(content=gem, d_artifacts=[da_gem, da_spec])
await progress.aincrement()
await self.put(dc)
async with ProgressReport(message="Downloading versions list", total=1) as pr_download_versions:
versions_url = urljoin(self.remote.url, "versions")
versions_downloader = self.remote.get_downloader(url=versions_url)
versions_result = await versions_downloader.run()
await pr_download_versions.aincrement()

async with ProgressReport(message="Parsing versions list") as pr_parse_versions:
async with ProgressReport(message="Parsing versions info") as pr_parse_info:
async for name, versions, md5_sum in read_versions(versions_result.path):
info_url = urljoin(urljoin(self.remote.url, "info/"), name)
if "md5" in settings.ALLOWED_CONTENT_CHECKSUMS:
extra_kwargs = {"expected_digests": {"md5": md5_sum}}
else:
extra_kwargs = {}
log.warn("Checksum of info file for '{}' could not be validated.", name)
raise Exception("Not so Yay!")
info_downloader = self.remote.get_downloader(url=info_url, **extra_kwargs)
info_result = await info_downloader.run()
async for version, checksum, deps, platform_deps in read_info(info_result.path):
gem_path = os.path.join("gems", name + "-" + version + ".gem")
gem_url = urljoin(self.remote.url, gem_path)
gemspec_path = os.path.join(
"quick/Marshal.4.8", name + "-" + version + ".gemspec.rz"
)
gemspec_url = urljoin(self.remote.url, gemspec_path)
gem = GemContent(name=name, version=version, checksum=checksum)

da_gem = DeclarativeArtifact(
artifact=Artifact(sha256=checksum),
url=gem_url,
relative_path=gem_path,
remote=self.remote,
deferred_download=deferred_download,
)
da_gemspec = DeclarativeArtifact(
artifact=Artifact(),
url=gemspec_url,
relative_path=gemspec_path,
remote=self.remote,
deferred_download=deferred_download,
)
dc = DeclarativeContent(content=gem, d_artifacts=[da_gem, da_gemspec])
await pr_parse_info.aincrement()
await self.put(dc)
await pr_parse_versions.aincrement()


class GemDeclarativeVersion(DeclarativeVersion):
Expand Down
48 changes: 48 additions & 0 deletions pulp_gem/specs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections import namedtuple

import aiofiles
import zlib
import gzip
import yaml
Expand All @@ -14,6 +15,53 @@
Key = namedtuple("Key", ("name", "version"))


async def read_versions(relative_path):
# File starts with:
# created_at: <timestamp>
# ---
async with aiofiles.open(relative_path, mode="r") as fp:
results = {}
preamble = True
async for line in fp:
line = line.strip()
if line == "---":
preamble = False
continue
if preamble:
continue
name, versions, md5_sum = line.split(" ", maxsplit=2)
versions = versions.split(",")
entry = results.get(name) or ([], "")
results[name] = (entry[0] + versions, md5_sum)
for name, (versions, md5_sum) in results.items():
yield name, versions, md5_sum


async def read_info(relative_path):
# File starts with:
# ---
async with aiofiles.open(relative_path, mode="r") as fp:
results = {}
preamble = True
async for line in fp:
line = line.strip()
if line == "---":
preamble = False
continue
if preamble:
continue
platform_deps = {}
front, back = line.split("|")
version, dependencies = front.split(" ", maxsplit=1)
for stmt in back.split(","):
key, value = stmt.split(":")
if key == "checksum":
checksum = value
else:
platform_deps[key] = value
yield version, checksum, dependencies, platform_deps


def read_specs(relative_path):
"""
Read rubygem specs from file.
Expand Down

0 comments on commit 62926b0

Please sign in to comment.