Skip to content

Commit

Permalink
Add a high-level repository writing API
Browse files Browse the repository at this point in the history
The existing API is frustratingly verbose and low-level. This will make
it trivial to create repositories with only a few lines.
  • Loading branch information
dralley committed Dec 3, 2023
1 parent 0930a82 commit 9a9928d
Show file tree
Hide file tree
Showing 5 changed files with 270 additions and 11 deletions.
12 changes: 6 additions & 6 deletions examples/python/simple_createrepo.py → examples/python/manual_createrepo.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import os.path
import createrepo_c as cr

def do_repodata(path):
def manual_method(path):
# Prepare repodata/ directory
repodata_path = os.path.join(path, "repodata")
if os.path.exists(repodata_path):
Expand Down Expand Up @@ -37,10 +37,10 @@ def do_repodata(path):

# List directory and prepare list of files to process
pkg_list = []
for filename in os.listdir(path):
filename = os.path.join(path, filename)
if os.path.isfile(filename) and filename.endswith(".rpm"):
pkg_list.append(filename)
with os.scandir(path) as entries:
for entry in entries:
if entry.is_file() and entry.path.endswith(".rpm"):
pkg_list.append(entry.path)

pri_xml.set_num_of_pkgs(len(pkg_list))
fil_xml.set_num_of_pkgs(len(pkg_list))
Expand Down Expand Up @@ -93,6 +93,6 @@ def do_repodata(path):
print("Usage: %s <directory>" % (sys.argv[0]))
sys.exit(1)

do_repodata(sys.argv[1])
manual_method(sys.argv[1])

print("Repository created in %s" % sys.argv[1])
66 changes: 66 additions & 0 deletions examples/python/simple_repository_writing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python

import os
import sys
import createrepo_c as cr


def write_repository_v1(path):
# List directory and prepare list of files to process
pkg_list = []

with os.scandir(path) as entries:
for entry in entries:
if entry.is_file() and entry.path.endswith(".rpm"):
pkg_list.append(entry.path)

# create a RepositoryWriter with a context manager - finish() is called automatically
# let's just use the default options
with cr.RepositoryWriter(path) as writer:
writer.repomd.add_repo_tag("Fedora 34")
writer.repomd.set_revision("1628310033")
# we have to set the number of packages we will add, before we add them
writer.set_num_of_pkgs(len(pkg_list))

for filename in pkg_list:
pkg = writer.add_pkg_from_file(filename)
print("Added: %s" % pkg.nevra())


def write_repository_v2(path):
# List directory and prepare list of files to process
pkg_list = []

with os.scandir(path) as entries:
for entry in entries:
if entry.is_file() and entry.path.endswith(".rpm"):
pkg_list.append(entry.path)

# create a writer without a context manager - you need to manually call finish()
# change a couple of the defaults too
writer = cr.RepositoryWriter(
path,
unique_md_filenames=False,
changelog_limit=4,
checksum_type=cr.SHA512,
compression=cr.GZ_COMPRESSION,
)
writer.repomd.set_num_of_pkgs(len(pkg_list))
writer.repomd.add_repo_tag("Fedora 34")
writer.set_revision("1628310033")

for filename in pkg_list:
pkg = writer.add_pkg_from_file(filename)
print("Added: %s" % pkg.nevra())

writer.finish()


if __name__ == "__main__":
if len(sys.argv) != 2 or not os.path.isdir(sys.argv[1]):
print("Usage: %s <directory>" % (sys.argv[0]))
sys.exit(1)

create_repo(sys.argv[1])

print("Repository created in %s" % sys.argv[1])
4 changes: 2 additions & 2 deletions src/compression_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ typedef enum {
CR_CW_MODE_SENTINEL, /*!< Sentinel of the list */
} cr_OpenMode;

/** Stat build about open content during compression (writting).
/** Stat build about open content during compression (writing).
*/
typedef struct {
gint64 size; /*!< Size of content */
Expand Down Expand Up @@ -87,7 +87,7 @@ typedef struct {
void *INNERFILE; /*!< Pointer to underlying FILE */
cr_OpenMode mode; /*!< Mode */
cr_ContentStat *stat; /*!< Content stats */
cr_ChecksumCtx *checksum_ctx; /*!< Checksum contenxt */
cr_ChecksumCtx *checksum_ctx; /*!< Checksum context */
} CR_FILE;

#define CR_CW_ERR -1 /*!< Return value - Error */
Expand Down
2 changes: 1 addition & 1 deletion src/createrepo_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,7 @@ main(int argc, char **argv)
fex_db_filename = g_strconcat(tmp_out_repo, "/filelists-ext.sqlite", NULL);
oth_db_filename = g_strconcat(tmp_out_repo, "/other.sqlite", NULL);
} else {
g_debug("Creating databases localy");
g_debug("Creating databases locally");
const gchar *tmpdir = g_get_tmp_dir();
pri_db_filename = g_build_filename(tmpdir, "primary.XXXXXX.sqlite", NULL);
fil_db_filename = g_build_filename(tmpdir, "filelists.XXXXXX.sqlite", NULL);
Expand Down
197 changes: 195 additions & 2 deletions src/python/createrepo_c/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@

import collections
import os
from pathlib import Path
import shutil
import subprocess
import sys
import tempfile

from . import _createrepo_c
from ._createrepo_c import *
Expand Down Expand Up @@ -196,7 +199,7 @@ def __init__(self, type=None, path=None):
_createrepo_c.RepomdRecord.__init__(self, type, path)

def compress_and_fill(self, hashtype, compresstype):
rec = RepomdRecord(self.type + "_gz", None)
rec = RepomdRecord(self.type, None)
_createrepo_c.RepomdRecord.compress_and_fill(self,
rec,
hashtype,
Expand Down Expand Up @@ -446,7 +449,7 @@ def package_count(self):
# But there's no way to do that. This gets fuzzy around the topic of duplicates.
# If the same package is listed more than once, is that counted as more than one package?
# Currently, no.
return len(self.parse_packages(only_primary=True))
return len(self.parse_packages(only_primary=True)[0])

def iter_packages(self, warningcb=None):
"""
Expand Down Expand Up @@ -545,6 +548,196 @@ def newpkgcb(pkgId, name, arch):
return packages, warnings


# both the path and the *XmlFile objects need to be tracked together because there's no way to get the path
# back from the *XmlFile objects
MetadataInfoHolder = collections.namedtuple("MetadataInfoHolder", ["path", "writer"])

class RepositoryWriter:

_FINISHED_ERR_MSG = "Cannot perform action after the repository has already finished being written"

def __init__(self,
destination,
num_packages=None,
unique_md_filenames=True,
changelog_limit=10,
compression=ZSTD_COMPRESSION,
checksum_type=SHA256,
):
# TODO: with_zchunk option?

if changelog_limit:
assert isinstance(changelog_limit, int) and changelog_limit >= 0, "changelog_limit must be an integer >= 0"

self.repomd = Repomd()
self._destination_repo_path = Path(destination)

self._unique_md_filenames = unique_md_filenames
self._changelog_limit = changelog_limit
self._preserve_existing_metadata = preserve_existing_metadata
self._checksum_type = checksum_type

self._has_set_num_pkgs = False
self._finished = False

os.makedirs(self.path, exist_ok=True)
os.makedirs(self.repodata_dir, exist_ok=True)

def _compression_suffix(compressiontype):
suffix = compression_suffix(compressiontype)
return suffix if suffix else ""

self._compression = compression
self._compression_suffix = _compression_suffix(compression)

pri_xml_path = self.repodata_dir / ("primary.xml" + self._compression_suffix)
fil_xml_path = self.repodata_dir / ("filelists.xml" + self._compression_suffix)
oth_xml_path = self.repodata_dir / ("other.xml" + self._compression_suffix)

self.working_metadata_files = {
"primary": MetadataInfoHolder(
pri_xml_path, PrimaryXmlFile(str(pri_xml_path), compressiontype=compression)
),
"filelists": MetadataInfoHolder(
fil_xml_path, FilelistsXmlFile(str(fil_xml_path), compressiontype=compression)
),
"other": MetadataInfoHolder(
oth_xml_path, OtherXmlFile(str(oth_xml_path), compressiontype=compression)
),
}
self.additional_metadata_files = {}

if num_packages is not None:
self.set_num_of_pkgs(num_packages)

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, exc_tb):
# TODO: if there's an error do we actually want to finish()?
self.finish()

@property
def path(self):
return self._destination_repo_path

@property
def repodata_dir(self):
return self.path / "repodata"

def set_num_of_pkgs(self, num):
"""Set the number of packages that will be added - this has to be done before adding any packages."""
assert not self._has_set_num_pkgs, "The number of packages has already been set"
self._has_set_num_pkgs = True

self.working_metadata_files["primary"].writer.set_num_of_pkgs(num)
self.working_metadata_files["filelists"].writer.set_num_of_pkgs(num)
self.working_metadata_files["other"].writer.set_num_of_pkgs(num)

def add_pkg_from_file(self, path, output_dir=None):
"""Add a package to the repo from a provided path."""
assert self._has_set_num_pkgs, "Must set the number of packages before adding packages"
assert not self._finished, self._FINISHED_ERR_MSG

try:
relative_path = Path(path).relative_to(self.path) # raises a ValueError if path is not relative
except ValueError:
if output_dir:
os.makedirs(output_dir, exist_ok=True)
relative_path = Path(output_dir) / os.path.basename(path)
shutil.copy2(path, relative_path)
else:
raise

pkg = package_from_rpm(
path,
checksum_type=self._checksum_type,
location_href=str(relative_path),
location_base=None,
changelog_limit=self._changelog_limit
)

self.add_pkg(pkg)
return pkg

def add_pkg(self, pkg):
"""Add a package to the repo from a pre-created Package object."""
assert self._has_set_num_pkgs, "Must set the number of packages before adding packages"
assert not self._finished, self._FINISHED_ERR_MSG

self.working_metadata_files["primary"].writer.add_pkg(pkg)
self.working_metadata_files["filelists"].writer.add_pkg(pkg)
self.working_metadata_files["other"].writer.add_pkg(pkg)

def add_repomd_metadata(self, name, path, compressiontype=None):
"""Add an additional metadata file to the final repomd."""
assert not self._finished, self._FINISHED_ERR_MSG

if not compressiontype:
shutil.copy2(path, self.repodata_dir)
self.additional_metadata_files[name] = path
else:
dst = self.repodata_dir / (os.path.basename(path) + compression_suffix(compressiontype))
compress_file(path, str(dst), compressiontype=compressiontype)
self.additional_metadata_files[name] = path

def add_update_record(self, rec):
"""Add an advisory (update record) to the repository."""
assert not self._finished, self._FINISHED_ERR_MSG

# lazily create the updateinfo entry
if "updateinfo" not in self.working_metadata_files:
upd_xml_path = Path(self.repodata_dir) / (self.upd_xml_name + self._compression_suffix)
self.working_metadata_files["updateinfo"] = MetadataInfoHolder(
upd_xml_path, UpdateInfoXmlFile(str(upd_xml_path), compressiontype=self._compression)
)

self.working_metadata_files["updateinfo"].writer.append(rec)

def finish(self):
"""Finish writing metadata."""
assert not self._finished, self._FINISHED_ERR_MSG
self._finished = True

# if the user hasn't added any packages we can let them skip this step
if not self._has_set_num_pkgs:
self.set_num_of_pkgs(0)

records = {}

# fail if the user used add_repomd_metadata() for one of "primary", "filelists", "other",
# "updateinfo" (if updaterecords added also), etc.
created_record_names = set(self.working_metadata_files.keys())
added_record_names = set(self.additional_metadata_files.keys())
overlapping_records = created_record_names.intersection(added_record_names)
assert not overlapping_records, "Added repomd metadata {} conflicts with created metadata".format(overlapping_records)

# Create all the repomdrecords for the standard metadata
for record_name, metadata_info in self.working_metadata_files.items():
# Close all of the metadata files being actively edited
metadata_info.writer.close()
record = RepomdRecord(record_name, str(metadata_info.path))
record.fill(self._checksum_type)
records[record_name] = record

# Create all the repomdrecords for the externally-added metadata
for record_name, path in self.additional_metadata_files.items():
# if the user tried to add the same record twice, last one wins I guess?
record = RepomdRecord(record_name, str(path))
record.fill(self._checksum_type)
records[record_name] = record

# Rename the files (if requested) and then add all the repomdrecords to the repomd.xml
for record in records.values():
if self._unique_md_filenames:
record.rename_file()
self.repomd.set_record(record)

# Write repomd.xml
repomd_path = self.repodata_dir / "repomd.xml"
with open(repomd_path, "w") as repomd_xml_file:
repomd_xml_file.write(self.repomd.xml_dump())

# If we have been built as a Python package, e.g. "setup.py", this is where the binaries
# will be located.
_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
Expand Down

0 comments on commit 9a9928d

Please sign in to comment.