diff --git a/BUILD.bazel b/BUILD.bazel index fe131fb95..feb0df4e3 100755 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -120,3 +120,14 @@ sh_test( ":pusher.par", ], ) + +py_test( + name = "client_v2_2_unit_tests", + size = "large", + srcs = [ + "client_v2_2_unit_tests.py", + ":containerregistry", + ], + main = "client_v2_2_unit_tests.py", +) + diff --git a/client/v2_2/docker_image_.py b/client/v2_2/docker_image_.py index 53e2488b5..66e69375b 100755 --- a/client/v2_2/docker_image_.py +++ b/client/v2_2/docker_image_.py @@ -798,18 +798,19 @@ def __exit__(self, unused_type, unused_value, unused_traceback): pass -def _in_whiteout_dir(fs, name): +def _in_whiteout_dir(fs, opaque_whiteouts, name): while name: dirname = os.path.dirname(name) if name == dirname: break - if fs.get(dirname): + if fs.get(dirname) or dirname in opaque_whiteouts: return True name = dirname return False _WHITEOUT_PREFIX = '.wh.' +_OPAQUE_WHITEOUT_FILENAME = '.wh..wh..opq' def extract(image, tar): @@ -823,17 +824,27 @@ def extract(image, tar): # to whether they are a tombstone or not. fs = {} + opaque_whiteouts_in_higher_layers = set() + # Walk the layers, topmost first and add files. If we've seen them in a # higher layer then we skip them for layer in image.diff_ids(): buf = io.BytesIO(image.uncompressed_layer(layer)) with tarfile.open(mode='r:', fileobj=buf) as layer_tar: + opaque_whiteouts_in_this_layer = [] for tarinfo in layer_tar: # If we see a whiteout file, then don't add anything to the tarball # but ensure that any lower layers don't add a file with the whited # out name. basename = os.path.basename(tarinfo.name) dirname = os.path.dirname(tarinfo.name) + + # If we see an opaque whiteout file, then don't add anything to the + # tarball but ensure that any lower layers don't add files or + # directories which are siblings of the whiteout file. + if basename == _OPAQUE_WHITEOUT_FILENAME: + opaque_whiteouts_in_this_layer.append(dirname) + tombstone = basename.startswith(_WHITEOUT_PREFIX) if tombstone: basename = basename[len(_WHITEOUT_PREFIX):] @@ -845,7 +856,7 @@ def extract(image, tar): continue # Check for a whited out parent directory - if _in_whiteout_dir(fs, name): + if _in_whiteout_dir(fs, opaque_whiteouts_in_higher_layers, name): continue # Mark this file as handled by adding its name. @@ -857,3 +868,4 @@ def extract(image, tar): tar.addfile(tarinfo, fileobj=layer_tar.extractfile(tarinfo)) else: tar.addfile(tarinfo, fileobj=None) + opaque_whiteouts_in_higher_layers.update(opaque_whiteouts_in_this_layer) diff --git a/client_v2_2_unit_tests.py b/client_v2_2_unit_tests.py new file mode 100644 index 000000000..55792c41d --- /dev/null +++ b/client_v2_2_unit_tests.py @@ -0,0 +1,167 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import io +from StringIO import StringIO +import tarfile +import unittest + +from containerregistry.client.v2_2 import docker_image as v2_2_image + + +class MockImage(object): + """Mock of DockerImage, implementing only the methods called by extract().""" + + def __init__(self): + self._fs_layers = OrderedDict() + + def add_layer(self, filenames): + """Add a layer to the image. + + Args: + filenames: a list of filenames or (filename, content) pairs. Filenames + with trailing slashes become directory entries in the generated tar + """ + buf = io.BytesIO() + with tarfile.open(mode='w:', fileobj=buf) as tf: + for entry in filenames: + if (isinstance(entry, basestring)): + name = entry + content = "" + else: + (name, content) = entry + tarinfo = tarfile.TarInfo(name) + tarinfo.size = len(content) + if name.endswith("/"): + tarinfo.type = tarfile.DIRTYPE + tf.addfile(tarinfo, fileobj=(StringIO(content) if content else None)) + buf.seek(0) + new_layer_id = str(len(self._fs_layers)) + self._fs_layers[new_layer_id] = buf.getvalue() + + def diff_ids(self): + return reversed(self._fs_layers.keys()) + + def uncompressed_layer(self, layer_id): + return self._fs_layers[layer_id] + + +class TestExtract(unittest.TestCase): + + def _test_flatten(self, layer_filenames, expected_flattened_output): + # Construct a mock DockerImage with the specified layers: + img = MockImage() + for filenames in layer_filenames: + img.add_layer(filenames) + buf = io.BytesIO() + + # Run the actual extract logic: + with tarfile.open(mode='w:', fileobj=buf) as tar: + v2_2_image.extract(img, tar) + + # Compare the extract() output to the expected results: + buf.seek(0) + flattened_output = [] + with tarfile.open(mode='r', fileobj=buf) as tar: + for tarinfo in tar: + if tarinfo.isdir(): + flattened_output.append(tarinfo.name + "/") + else: + contents = tar.extractfile(tarinfo).read() + if contents: + flattened_output.append((tarinfo.name, contents)) + else: + flattened_output.append(tarinfo.name) + self.assertEqual(flattened_output, expected_flattened_output) + + def test_single_layer(self): + self._test_flatten( + [["/directory/", "/file"]], + ["/directory/", "/file"] + ) + + def test_purely_additive_layers(self): + self._test_flatten( + [ + ["dir/", "dir/file1", "file"], + ["dir/file2", "file2"] + ], + ["dir/file2", "file2", "dir/", "dir/file1", "file"] + ) + + def test_highest_layer_of_file_takes_precedence(self): + self._test_flatten( + [ + [("file", "a")], + [("file", "b")] + ], + [("file", "b")] + ) + + def test_single_file_whiteout(self): + self._test_flatten( + [ + ["/foo"], + ["/.wh.foo"] + ], + [] + ) + + def test_parent_directory_whiteout(self): + self._test_flatten( + [ + ["/x/a/", "/x/b/", "/x/b/1"], + ["/x/.wh.b"] + ], + ["/x/a/"] + ) + + def test_opaque_whiteout(self): + # Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + self._test_flatten( + [ + ["a/", "a/b/", "a/b/c/", "a/b/c/bar"], + ["a/", "a/.wh..wh..opq", "a/b/", "a/b/c/", "a/b/c/foo"], + ], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo"], + ) + + self._test_flatten( + [ + ["a/", "a/b/", "a/b/c/", "a/b/c/bar"], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo", "a/.wh..wh..opq"], + ], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo"], + ) + + def test_opaque_whiteout_preserves_parent_directory(self): + # Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + self._test_flatten( + [ + [ + "bin/", + "bin/my-app-binary", + "bin/my-app-tools", + "bin/tools/", + "bin/tools/my-app-tool-one" + ], + ["bin/.wh..wh..opq"], + ], + ["bin/"], + ) + + +if __name__ == "__main__": + unittest.main(verbosity=2)