Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support custom compression program #320

Merged
merged 8 commits into from
Apr 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .bazelci/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@ tasks:
- "//tests:helpers_test"
- "//tests:pkg_deb_test"
- "//tests:pkg_tar_test"
- "//tests:test_tar_compression"
- "//tests:zip_test"

67 changes: 37 additions & 30 deletions pkg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ class Error(Exception):
def __init__(self,
name,
compression='',
compressor='',
root_directory='.',
default_mtime=None,
preserve_tar_mtimes=True):
Expand All @@ -129,27 +130,13 @@ def __init__(self,
Args:
name: the tar file name.
compression: compression type: bzip2, bz2, gz, tgz, xz, lzma.
compressor: custom command to do the compression.
root_directory: virtual root to prepend to elements in the archive.
default_mtime: default mtime to use for elements in the archive.
May be an integer or the value 'portable' to use the date
2000-01-01, which is compatible with non *nix OSes'.
preserve_tar_mtimes: if true, keep file mtimes from input tar file.
"""
if compression in ['bzip2', 'bz2']:
mode = 'w:bz2'
else:
mode = 'w:'
self.gz = compression in ['tgz', 'gz']
# Fallback to xz compression through xz.
self.use_xz_tool = False
if compression in ['xz', 'lzma']:
if HAS_LZMA:
mode = 'w:xz'
else:
self.use_xz_tool = True
self.name = name
self.root_directory = root_directory.rstrip('/').rstrip('\\')
self.root_directory = self.root_directory.replace('\\', '/')
self.preserve_mtime = preserve_tar_mtimes
if default_mtime is None:
self.default_mtime = 0
Expand All @@ -159,11 +146,37 @@ def __init__(self,
self.default_mtime = int(default_mtime)

self.fileobj = None
if self.gz:
# The Tarfile class doesn't allow us to specify gzip's mtime attribute.
# Instead, we manually re-implement gzopen from tarfile.py and set mtime.
self.fileobj = gzip.GzipFile(
filename=name, mode='w', compresslevel=9, mtime=self.default_mtime)
self.compressor_cmd = (compressor or '').strip()
if self.compressor_cmd:
# Some custom command has been specified: no need for further
# configuration, we're just going to use it.
pass
HackAttack marked this conversation as resolved.
Show resolved Hide resolved
# Support xz compression through xz... until we can use Py3
elif compression in ['xz', 'lzma']:
if HAS_LZMA:
mode = 'w:xz'
else:
self.compressor_cmd = 'xz -F {} -'.format(compression)
elif compression in ['bzip2', 'bz2']:
mode = 'w:bz2'
else:
mode = 'w:'
if compression in ['tgz', 'gz']:
# The Tarfile class doesn't allow us to specify gzip's mtime attribute.
# Instead, we manually reimplement gzopen from tarfile.py and set mtime.
self.fileobj = gzip.GzipFile(
filename=name, mode='w', compresslevel=9, mtime=self.default_mtime)
self.compressor_proc = None
if self.compressor_cmd:
mode = 'w|'
self.compressor_proc = subprocess.Popen(self.compressor_cmd.split(),
stdin=subprocess.PIPE,
stdout=open(name, 'wb'))
self.fileobj = self.compressor_proc.stdin
self.name = name
self.root_directory = root_directory.rstrip('/').rstrip('\\')
self.root_directory = self.root_directory.replace('\\', '/')

self.tar = tarfile.open(name=name, mode=mode, fileobj=self.fileobj)
self.members = set([])
self.directories = set([])
Expand Down Expand Up @@ -419,15 +432,9 @@ def close(self):
TarFileWriter.Error: if an error happens when compressing the output file.
"""
self.tar.close()
# Close the gzip file object if necessary.
# Close the file object if necessary.
if self.fileobj:
self.fileobj.close()
if self.use_xz_tool:
# Support xz compression through xz... until we can use Py3
if subprocess.call('which xz', shell=True, stdout=subprocess.PIPE):
raise self.Error('Cannot handle .xz and .lzma compression: '
'xz not found.')
subprocess.call(
'mv {0} {0}.d && xz -z {0}.d && mv {0}.d.xz {0}'.format(self.name),
shell=True,
stdout=subprocess.PIPE)
if self.compressor_proc and self.compressor_proc.wait() != 0:
raise self.Error('Custom compression command '
'"{}" failed'.format(self.compressor_cmd))
17 changes: 13 additions & 4 deletions pkg/build_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,20 @@ class TarFile(object):
class DebError(Exception):
pass

def __init__(self, output, directory, compression, root_directory,
def __init__(self, output, directory, compression, compressor, root_directory,
default_mtime):
self.directory = directory
self.output = output
self.compression = compression
self.compressor = compressor
self.root_directory = root_directory
self.default_mtime = default_mtime

def __enter__(self):
self.tarfile = archive.TarFileWriter(
self.output,
self.compression,
self.compressor,
self.root_directory,
default_mtime=self.default_mtime)
return self
Expand Down Expand Up @@ -233,8 +235,14 @@ def main():
parser.add_argument(
'--directory',
help='Directory in which to store the file inside the layer')
parser.add_argument('--compression',
help='Compression (`gz` or `bz2`), default is none.')

compression = parser.add_mutually_exclusive_group()
compression.add_argument('--compression',
help='Compression (`gz` or `bz2`), default is none.')
compression.add_argument('--compressor',
help='Compressor program and arguments, '
'e.g. `pigz -p 4`')

parser.add_argument(
'--modes', action='append',
help='Specific mode to apply to specific file (from the file argument),'
Expand Down Expand Up @@ -298,7 +306,8 @@ def main():
# Add objects to the tar file
with TarFile(
options.output, helpers.GetFlagValue(options.directory),
options.compression, options.root_directory, options.mtime) as output:
options.compression, options.compressor, options.root_directory,
options.mtime) as output:

def file_attributes(filename):
if filename.startswith('/'):
Expand Down
26 changes: 24 additions & 2 deletions pkg/docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ There are currently no other well-known attributes.
## pkg_tar

```python
pkg_tar(name, extension, strip_prefix, package_dir, srcs,
mode, modes, deps, symlinks, package_file_name, package_variables)
pkg_tar(name, extension, strip_prefix, package_dir, srcs, compressor,
compressor_args, mode, modes, deps, symlinks, package_file_name,
package_variables)
```

Creates a tar file from a list of inputs.
Expand Down Expand Up @@ -145,6 +146,27 @@ Creates a tar file from a list of inputs.
</p>
</td>
</tr>
<tr>
<td><code>compressor</code></td>
<td>
<code>Label, optional</code>
<p>
Executable to be built and used as part of a custom compression filter.
For example, to compress with <code>pigz -p 4</code>, use <code>"@pigz"</code> here
(assuming a workspace rule named "pigz" exists).
</p>
</td>
</tr>
<tr>
<td><code>compressor_args</code></td>
<td>
<code>String, optional</code>
<p>
Arguments to be passed to <code>compressor</code>.
For example, to compress with <code>pigz -p 4</code>, use <code>"-p 4"</code> here.
</p>
</td>
</tr>
<tr>
<td><code>mode</code></td>
<td>
Expand Down
7 changes: 6 additions & 1 deletion pkg/pkg.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def _pkg_tar_impl(ctx):
"--owner=" + ctx.attr.owner,
"--owner_name=" + ctx.attr.ownername,
]
if ctx.executable.compressor:
args.append("--compressor=%s %s" % (ctx.executable.compressor.path, ctx.attr.compressor_args))
HackAttack marked this conversation as resolved.
Show resolved Hide resolved
if ctx.attr.mtime != _DEFAULT_MTIME:
if ctx.attr.portable_mtime:
fail("You may not set both mtime and portable_mtime")
Expand Down Expand Up @@ -129,7 +131,7 @@ def _pkg_tar_impl(ctx):
args += ["--empty_file=%s" % empty_file for empty_file in ctx.attr.empty_files]
if ctx.attr.empty_dirs:
args += ["--empty_dir=%s" % empty_dir for empty_dir in ctx.attr.empty_dirs]
if ctx.attr.extension:
if ctx.attr.extension and not ctx.executable.compressor:
dotPos = ctx.attr.extension.find(".")
if dotPos > 0:
dotPos += 1
Expand All @@ -149,6 +151,7 @@ def _pkg_tar_impl(ctx):
mnemonic = "PackageTar",
progress_message = "Writing: %s" % output_file.path,
inputs = file_inputs + ctx.files.deps + files,
tools = [ctx.executable.compressor] if ctx.executable.compressor else [],
executable = ctx.executable.build_tar,
arguments = ["@" + arg_file.path],
outputs = [output_file],
Expand Down Expand Up @@ -351,6 +354,8 @@ pkg_tar_impl = rule(
"include_runfiles": attr.bool(),
"empty_dirs": attr.string_list(),
"remap_paths": attr.string_dict(),
"compressor": attr.label(executable = True, cfg = "exec"),
"compressor_args": attr.string(),

# Common attributes
"out": attr.output(mandatory = True),
Expand Down
16 changes: 16 additions & 0 deletions pkg/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ exports_files(glob(["testdata/**"]))
filegroup(
name = "archive_testdata",
srcs = glob(["testdata/**"]) + [
":compressor",
":test_tar_compression",
":test_tar_package_dir",
":test_tar_package_dir_file",
],
Expand Down Expand Up @@ -88,6 +90,20 @@ copy_file(
out = "zipcontent/loremipsum.txt",
)

py_binary(
name = "compressor",
srcs = ["compressor.py"],
python_version = "PY3",
srcs_version = "PY3",
)

pkg_tar(
name = "test_tar_compression",
deps = ["testdata/tar_test.tar"],
compressor = ":compressor",
compressor_args = "-a -b -c"
)

#
# Tests for package_file_name
#
Expand Down
16 changes: 16 additions & 0 deletions pkg/tests/archive_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from bazel_tools.tools.python.runfiles import runfiles
from rules_pkg import archive
from tests import compressor


class SimpleArFileTest(unittest.TestCase):
Expand Down Expand Up @@ -403,6 +404,21 @@ def testPackageDirFileAttribute(self):
self.assertTarFileContent(package_dir, expected_content)
self.assertTarFileContent(package_dir_file, expected_content)

def testCustomCompression(self):
original = self.data_files.Rlocation(
"rules_pkg/tests/testdata/tar_test.tar")
compressed = self.data_files.Rlocation(
"rules_pkg/tests/test_tar_compression.tar")
expected_content = [
{"name": "./" + x, "data": x.encode("utf-8")} for x in ["a", "b", "ab"]
]
with open(compressed, "rb") as f_in, open(self.tempfile, "wb") as f_out:
# "Decompress" by skipping garbage bytes
f_in.seek(len(compressor.GARBAGE))
f_out.write(f_in.read())

self.assertTarFileContent(original, expected_content)
self.assertTarFileContent(self.tempfile, expected_content)

if __name__ == "__main__":
unittest.main()
10 changes: 10 additions & 0 deletions pkg/tests/compressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
'''Fake compressor that just prepends garbage bytes.'''

import sys

GARBAGE = b'garbage'

if __name__ == '__main__':
assert sys.argv[1:] == ['-a', '-b', '-c']
sys.stdout.buffer.write(GARBAGE)
sys.stdout.buffer.write(sys.stdin.buffer.read())