Skip to content

Commit

Permalink
Taught export to insure de-duplicated Artifact.json.
Browse files Browse the repository at this point in the history
Along the way taught export to operate on a QuerySet of Artifacts
instead of (prematurely) hydrating all affected Artifacts into
a list.

fixes #4159.

(cherry picked from commit 6178887)
  • Loading branch information
ggainey authored and dralley committed Jul 29, 2023
1 parent 63b3ddd commit 7cf946c
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGES/4159.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Taught the Artifact.json of an export to hold minimum-unique-set of Artifact entries.

In highly-duplicated-content export scenarios, this can mean a significant decrease
in export-size, and significant improvement in import-performance.
5 changes: 3 additions & 2 deletions pulpcore/app/importexport.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ def export_artifacts(export, artifacts):
Args:
export (django.db.models.PulpExport): export instance that's doing the export
artifacts (django.db.models.Artifacts): list of artifacts in all repos being exported
artifacts (django.db.models.Artifacts): QuerySet of artifacts in all repos being exported
Raises:
ValidationError: When path is not in the ALLOWED_EXPORT_PATHS setting
"""
data = dict(message="Exporting Artifacts", code="export.artifacts", total=len(artifacts))
with ProgressReport(**data) as pb:
for artifact in pb.iter(artifacts):
for artifact in artifacts.iterator(): # chunk_size= defaults to 2000 at a fetch
dest = artifact.file.name
if settings.DEFAULT_FILE_STORAGE != "pulpcore.app.models.storage.FileSystem":
with tempfile.TemporaryDirectory(dir=".") as temp_dir:
Expand All @@ -112,6 +112,7 @@ def export_artifacts(export, artifacts):
export.tarfile.add(temp_file.name, dest)
else:
export.tarfile.add(artifact.file.path, dest)
pb.increment()

resource = ArtifactResource()
resource.queryset = artifacts
Expand Down
13 changes: 9 additions & 4 deletions pulpcore/app/tasks/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ def _do_export(pulp_exporter, tar, the_export):
starting_versions = _get_starting_versions(do_incremental, pulp_exporter, the_export)
vers_match = _version_match(ending_versions, starting_versions)
# Gather up versions and artifacts
artifacts = []
artifacts = None # Will be a QuerySet selecting the Artifacts that need to be exported
for version in ending_versions:
# Check version-content to make sure we're not being asked to export
# an on_demand repo
Expand All @@ -504,10 +504,15 @@ def _do_export(pulp_exporter, tar, the_export):
raise RuntimeError(_("Remote artifacts cannot be exported."))

if do_incremental:
vers_artifacts = version.artifacts.difference(vers_match[version].artifacts).all()
vers_artifacts = version.artifacts.difference(vers_match[version].artifacts)
else:
vers_artifacts = version.artifacts.all()
artifacts.extend(vers_artifacts)
vers_artifacts = version.artifacts

if artifacts:
artifacts.union(vers_artifacts)
else:
artifacts = vers_artifacts

# export plugin-version-info
export_versions(the_export, plugin_version_info)
# Export the top-level entities (artifacts and repositories)
Expand Down

0 comments on commit 7cf946c

Please sign in to comment.