diff --git a/.bazelci/tests.yml b/.bazelci/tests.yml index 183bd0a5..b5e87743 100644 --- a/.bazelci/tests.yml +++ b/.bazelci/tests.yml @@ -39,5 +39,6 @@ tasks: - "//tests:helpers_test" - "//tests:pkg_deb_test" - "//tests:pkg_tar_test" + - "//tests:test_tar_compression" - "//tests:zip_test" diff --git a/pkg/archive.py b/pkg/archive.py index c379dc15..e21111e9 100644 --- a/pkg/archive.py +++ b/pkg/archive.py @@ -121,6 +121,7 @@ class Error(Exception): def __init__(self, name, compression='', + compressor='', root_directory='.', default_mtime=None, preserve_tar_mtimes=True): @@ -129,27 +130,13 @@ def __init__(self, Args: name: the tar file name. compression: compression type: bzip2, bz2, gz, tgz, xz, lzma. + compressor: custom command to do the compression. root_directory: virtual root to prepend to elements in the archive. default_mtime: default mtime to use for elements in the archive. May be an integer or the value 'portable' to use the date 2000-01-01, which is compatible with non *nix OSes'. preserve_tar_mtimes: if true, keep file mtimes from input tar file. """ - if compression in ['bzip2', 'bz2']: - mode = 'w:bz2' - else: - mode = 'w:' - self.gz = compression in ['tgz', 'gz'] - # Fallback to xz compression through xz. - self.use_xz_tool = False - if compression in ['xz', 'lzma']: - if HAS_LZMA: - mode = 'w:xz' - else: - self.use_xz_tool = True - self.name = name - self.root_directory = root_directory.rstrip('/').rstrip('\\') - self.root_directory = self.root_directory.replace('\\', '/') self.preserve_mtime = preserve_tar_mtimes if default_mtime is None: self.default_mtime = 0 @@ -159,11 +146,37 @@ def __init__(self, self.default_mtime = int(default_mtime) self.fileobj = None - if self.gz: - # The Tarfile class doesn't allow us to specify gzip's mtime attribute. - # Instead, we manually re-implement gzopen from tarfile.py and set mtime. - self.fileobj = gzip.GzipFile( - filename=name, mode='w', compresslevel=9, mtime=self.default_mtime) + self.compressor_cmd = (compressor or '').strip() + if self.compressor_cmd: + # Some custom command has been specified: no need for further + # configuration, we're just going to use it. + pass + # Support xz compression through xz... until we can use Py3 + elif compression in ['xz', 'lzma']: + if HAS_LZMA: + mode = 'w:xz' + else: + self.compressor_cmd = 'xz -F {} -'.format(compression) + elif compression in ['bzip2', 'bz2']: + mode = 'w:bz2' + else: + mode = 'w:' + if compression in ['tgz', 'gz']: + # The Tarfile class doesn't allow us to specify gzip's mtime attribute. + # Instead, we manually reimplement gzopen from tarfile.py and set mtime. + self.fileobj = gzip.GzipFile( + filename=name, mode='w', compresslevel=9, mtime=self.default_mtime) + self.compressor_proc = None + if self.compressor_cmd: + mode = 'w|' + self.compressor_proc = subprocess.Popen(self.compressor_cmd.split(), + stdin=subprocess.PIPE, + stdout=open(name, 'wb')) + self.fileobj = self.compressor_proc.stdin + self.name = name + self.root_directory = root_directory.rstrip('/').rstrip('\\') + self.root_directory = self.root_directory.replace('\\', '/') + self.tar = tarfile.open(name=name, mode=mode, fileobj=self.fileobj) self.members = set([]) self.directories = set([]) @@ -419,15 +432,9 @@ def close(self): TarFileWriter.Error: if an error happens when compressing the output file. """ self.tar.close() - # Close the gzip file object if necessary. + # Close the file object if necessary. if self.fileobj: self.fileobj.close() - if self.use_xz_tool: - # Support xz compression through xz... until we can use Py3 - if subprocess.call('which xz', shell=True, stdout=subprocess.PIPE): - raise self.Error('Cannot handle .xz and .lzma compression: ' - 'xz not found.') - subprocess.call( - 'mv {0} {0}.d && xz -z {0}.d && mv {0}.d.xz {0}'.format(self.name), - shell=True, - stdout=subprocess.PIPE) + if self.compressor_proc and self.compressor_proc.wait() != 0: + raise self.Error('Custom compression command ' + '"{}" failed'.format(self.compressor_cmd)) diff --git a/pkg/build_tar.py b/pkg/build_tar.py index f861ea3d..f4117748 100644 --- a/pkg/build_tar.py +++ b/pkg/build_tar.py @@ -29,11 +29,12 @@ class TarFile(object): class DebError(Exception): pass - def __init__(self, output, directory, compression, root_directory, + def __init__(self, output, directory, compression, compressor, root_directory, default_mtime): self.directory = directory self.output = output self.compression = compression + self.compressor = compressor self.root_directory = root_directory self.default_mtime = default_mtime @@ -41,6 +42,7 @@ def __enter__(self): self.tarfile = archive.TarFileWriter( self.output, self.compression, + self.compressor, self.root_directory, default_mtime=self.default_mtime) return self @@ -233,8 +235,14 @@ def main(): parser.add_argument( '--directory', help='Directory in which to store the file inside the layer') - parser.add_argument('--compression', - help='Compression (`gz` or `bz2`), default is none.') + + compression = parser.add_mutually_exclusive_group() + compression.add_argument('--compression', + help='Compression (`gz` or `bz2`), default is none.') + compression.add_argument('--compressor', + help='Compressor program and arguments, ' + 'e.g. `pigz -p 4`') + parser.add_argument( '--modes', action='append', help='Specific mode to apply to specific file (from the file argument),' @@ -298,7 +306,8 @@ def main(): # Add objects to the tar file with TarFile( options.output, helpers.GetFlagValue(options.directory), - options.compression, options.root_directory, options.mtime) as output: + options.compression, options.compressor, options.root_directory, + options.mtime) as output: def file_attributes(filename): if filename.startswith('/'): diff --git a/pkg/docs/reference.md b/pkg/docs/reference.md index a8af0582..ceed1ae0 100644 --- a/pkg/docs/reference.md +++ b/pkg/docs/reference.md @@ -68,8 +68,9 @@ There are currently no other well-known attributes. ## pkg_tar ```python -pkg_tar(name, extension, strip_prefix, package_dir, srcs, - mode, modes, deps, symlinks, package_file_name, package_variables) +pkg_tar(name, extension, strip_prefix, package_dir, srcs, compressor, + compressor_args, mode, modes, deps, symlinks, package_file_name, + package_variables) ``` Creates a tar file from a list of inputs. @@ -145,6 +146,27 @@ Creates a tar file from a list of inputs.

+ + compressor + + Label, optional +

+ Executable to be built and used as part of a custom compression filter. + For example, to compress with pigz -p 4, use "@pigz" here + (assuming a workspace rule named "pigz" exists). +

+ + + + compressor_args + + String, optional +

+ Arguments to be passed to compressor. + For example, to compress with pigz -p 4, use "-p 4" here. +

+ + mode diff --git a/pkg/pkg.bzl b/pkg/pkg.bzl index 55f5df67..19c883b5 100644 --- a/pkg/pkg.bzl +++ b/pkg/pkg.bzl @@ -76,6 +76,8 @@ def _pkg_tar_impl(ctx): "--owner=" + ctx.attr.owner, "--owner_name=" + ctx.attr.ownername, ] + if ctx.executable.compressor: + args.append("--compressor=%s %s" % (ctx.executable.compressor.path, ctx.attr.compressor_args)) if ctx.attr.mtime != _DEFAULT_MTIME: if ctx.attr.portable_mtime: fail("You may not set both mtime and portable_mtime") @@ -129,7 +131,7 @@ def _pkg_tar_impl(ctx): args += ["--empty_file=%s" % empty_file for empty_file in ctx.attr.empty_files] if ctx.attr.empty_dirs: args += ["--empty_dir=%s" % empty_dir for empty_dir in ctx.attr.empty_dirs] - if ctx.attr.extension: + if ctx.attr.extension and not ctx.executable.compressor: dotPos = ctx.attr.extension.find(".") if dotPos > 0: dotPos += 1 @@ -149,6 +151,7 @@ def _pkg_tar_impl(ctx): mnemonic = "PackageTar", progress_message = "Writing: %s" % output_file.path, inputs = file_inputs + ctx.files.deps + files, + tools = [ctx.executable.compressor] if ctx.executable.compressor else [], executable = ctx.executable.build_tar, arguments = ["@" + arg_file.path], outputs = [output_file], @@ -351,6 +354,8 @@ pkg_tar_impl = rule( "include_runfiles": attr.bool(), "empty_dirs": attr.string_list(), "remap_paths": attr.string_dict(), + "compressor": attr.label(executable = True, cfg = "exec"), + "compressor_args": attr.string(), # Common attributes "out": attr.output(mandatory = True), diff --git a/pkg/tests/BUILD b/pkg/tests/BUILD index d958ace4..cb28a07b 100644 --- a/pkg/tests/BUILD +++ b/pkg/tests/BUILD @@ -25,6 +25,8 @@ exports_files(glob(["testdata/**"])) filegroup( name = "archive_testdata", srcs = glob(["testdata/**"]) + [ + ":compressor", + ":test_tar_compression", ":test_tar_package_dir", ":test_tar_package_dir_file", ], @@ -88,6 +90,20 @@ copy_file( out = "zipcontent/loremipsum.txt", ) +py_binary( + name = "compressor", + srcs = ["compressor.py"], + python_version = "PY3", + srcs_version = "PY3", +) + +pkg_tar( + name = "test_tar_compression", + deps = ["testdata/tar_test.tar"], + compressor = ":compressor", + compressor_args = "-a -b -c" +) + # # Tests for package_file_name # diff --git a/pkg/tests/archive_test.py b/pkg/tests/archive_test.py index af51b70a..c700aa94 100644 --- a/pkg/tests/archive_test.py +++ b/pkg/tests/archive_test.py @@ -19,6 +19,7 @@ from bazel_tools.tools.python.runfiles import runfiles from rules_pkg import archive +from tests import compressor class SimpleArFileTest(unittest.TestCase): @@ -403,6 +404,21 @@ def testPackageDirFileAttribute(self): self.assertTarFileContent(package_dir, expected_content) self.assertTarFileContent(package_dir_file, expected_content) + def testCustomCompression(self): + original = self.data_files.Rlocation( + "rules_pkg/tests/testdata/tar_test.tar") + compressed = self.data_files.Rlocation( + "rules_pkg/tests/test_tar_compression.tar") + expected_content = [ + {"name": "./" + x, "data": x.encode("utf-8")} for x in ["a", "b", "ab"] + ] + with open(compressed, "rb") as f_in, open(self.tempfile, "wb") as f_out: + # "Decompress" by skipping garbage bytes + f_in.seek(len(compressor.GARBAGE)) + f_out.write(f_in.read()) + + self.assertTarFileContent(original, expected_content) + self.assertTarFileContent(self.tempfile, expected_content) if __name__ == "__main__": unittest.main() diff --git a/pkg/tests/compressor.py b/pkg/tests/compressor.py new file mode 100644 index 00000000..3603d0d3 --- /dev/null +++ b/pkg/tests/compressor.py @@ -0,0 +1,10 @@ +'''Fake compressor that just prepends garbage bytes.''' + +import sys + +GARBAGE = b'garbage' + +if __name__ == '__main__': + assert sys.argv[1:] == ['-a', '-b', '-c'] + sys.stdout.buffer.write(GARBAGE) + sys.stdout.buffer.write(sys.stdin.buffer.read())