diff --git a/apps/apps_publisher.py b/apps/apps_publisher.py index 0e260138..74109011 100644 --- a/apps/apps_publisher.py +++ b/apps/apps_publisher.py @@ -1,6 +1,7 @@ # Copyright (c) 2020 Foundries.io # SPDX-License-Identifier: Apache-2.0 +import json import logging from tempfile import NamedTemporaryFile @@ -15,11 +16,13 @@ class AppsPublisher: - def __init__(self, factory, publish_tool: str, archs: str, registry_host=DockerRegistryClient.DefaultRegistryHost): + def __init__(self, factory, publish_tool: str, archs: str, + registry_host=DockerRegistryClient.DefaultRegistryHost, layers_meta: dict = None): self._factory = factory self._publish_tool = publish_tool self._archs = archs self._registry_host = registry_host + self._layers_meta = layers_meta self._image_base_url = '{}/{}'.format(registry_host, self._factory) self._allowed_tags = ['${TAG}', 'latest'] @@ -83,6 +86,10 @@ def __publish(self, app: ComposeApps.App, tag: str): app_base_url = self._image_base_url + '/' + app.name self._app_tagged_url = app_base_url + ':app-' + tag # TODO: Consider implementation of the "publish tool" in DockerRegistryClient - with NamedTemporaryFile(mode="w+") as f: - cmd_exe(self._publish_tool, '-d', f.name, self._app_tagged_url, self._archs, cwd=app.dir) - return app_base_url + '@' + f.read().strip() + with NamedTemporaryFile(mode="w+") as layers_meta_file: + json.dump(self._layers_meta, layers_meta_file) + layers_meta_file.flush() + with NamedTemporaryFile(mode="w+") as f: + cmd_exe(self._publish_tool, '-d', f.name, '-l', layers_meta_file.name, + self._app_tagged_url, self._archs, cwd=app.dir) + return app_base_url + '@' + f.read().strip() diff --git a/apps/build.sh b/apps/build.sh index 2b09f1be..0c321df6 100755 --- a/apps/build.sh +++ b/apps/build.sh @@ -217,3 +217,7 @@ done [ -d $HOME/.docker/manifests ] && mv $HOME/.docker/manifests /archive/manifests || echo 'no manifests to archive' PYTHONPATH=${HERE}/.. python3 ${HERE}/generate_non_factory_sboms.py --arch=$ARCH +# 1. Parse the local docker store (the one where the built images are stored). +# 2. Extract layers metadata (size, usage) of all Apps' images +# 3. Store the gathered layers metadata as a CI artifact +PYTHONPATH="${HERE}"/.. python3 "${HERE}"/get_layers_meta.py --apps-root "${REPO_ROOT}" --tag "${TAG}-${ARCH}" --out-file "/archive/layers_meta.json" diff --git a/apps/docker_store.py b/apps/docker_store.py new file mode 100644 index 00000000..7d5f57b3 --- /dev/null +++ b/apps/docker_store.py @@ -0,0 +1,142 @@ +import hashlib +import json +import os.path +import subprocess + + +class DockerStore: + _REPO_PATH = "image/overlay2/repositories.json" + + class Layer: + _DISTRIBUTION_DIGEST_PATH = "image/overlay2/distribution/v2metadata-by-diffid/sha256" + _LAYER_DB_PATH = "image/overlay2/layerdb/sha256" + _LAYER_DATA_BASE_PATH = "overlay2" + _SUPPORTED_HASH_TYPE = "sha256:" + + def __init__(self, data_root, layer_diff_id, parent_chain_id): + self._data_root = data_root + self.diff_id = layer_diff_id + self.digest = self._get_layer_archive_digest() + self.chain_id = self._get_chain_id(parent_chain_id) + # Wire/transfer size (in bytes) of unarchived layer + self.size = self._get_layer_size_from_meta() + self.cache_id = self._get_cache_id() + self.data_path = os.path.join(self._data_root, self._LAYER_DATA_BASE_PATH, + self.cache_id) + + # Disk usage (in bytes) of unarchived layer taking into account the volume block size + self.usage = self._get_disk_usage() + # Disk usage (in bytes) of unarchived layer along with metadata + # taking into account the volume block size + self.usage_with_meta = self._get_disk_usage_with_metadata() + # Size of the file containing metadata about the layer's TAR stream. + # It's stored in + # `/image/overlay2/layerdb/sha256//tar-split.json.gz + # It can be used to partially "fsck" layer data/files on disk based on file/dir names + # and their sizes stored in the "tar-split" file. + # E.g. `tar-split a --input \ + # --path /overlay2//diff/> + # --output /dev/null` + self.tar_split_size = self._get_tar_split_size() + self.overall_usage = self.usage_with_meta + self.tar_split_size + + def _get_layer_archive_digest(self): + digest_file_path = os.path.join(self._data_root, self._DISTRIBUTION_DIGEST_PATH, + self.diff_id[len(self._SUPPORTED_HASH_TYPE):]) + if not os.path.exists(digest_file_path): + raise Exception( + f"Image layer diff ID to digest mapping is not found in: {digest_file_path}") + + with open(digest_file_path) as f: + digests = json.load(f) + return digests[0]["Digest"] + + def _get_chain_id(self, parent_chain_id): + if not parent_chain_id: + return self.diff_id + bytes = parent_chain_id + " " + self.diff_id + return self._SUPPORTED_HASH_TYPE + hashlib.sha256(bytes.encode('utf-8')).hexdigest() + + def _get_layer_size_from_meta(self): + size_file = os.path.join(self._data_root, self._LAYER_DB_PATH, + self.chain_id[len(self._SUPPORTED_HASH_TYPE):], "size") + if not os.path.exists(size_file): + raise Exception(f"Layer size file is missing: {size_file}") + with open(size_file) as f: + size_str = f.readline() + return int(size_str) + + def _get_cache_id(self): + cache_id_file = os.path.join(self._data_root, self._LAYER_DB_PATH, + self.chain_id[len(self._SUPPORTED_HASH_TYPE):], "cache-id") + if not os.path.exists(cache_id_file): + raise Exception(f"Layer cache-id file is missing: {cache_id_file}") + with open(cache_id_file) as f: + cache_id = f.readline() + return cache_id + + def _get_disk_usage_with_metadata(self): + # get disk usage of the layer diff/rootfs along with its metadata + # taking into account a block size + du_output = subprocess.check_output("du -sk " + self.data_path, shell=True) + return int(du_output.decode().split()[0])*1024 + + def _get_disk_usage(self): + # get disk usage of the layer diff/rootfs taking into account a block size + du_output = subprocess.check_output("du -sk " + self.data_path + "/diff", shell=True) + return int(du_output.decode().split()[0])*1024 + + def _get_tar_split_size(self): + tar_split_file = os.path.join(self._data_root, self._LAYER_DB_PATH, + self.chain_id[len(self._SUPPORTED_HASH_TYPE):], + "tar-split.json.gz") + if not os.path.exists(tar_split_file): + raise Exception(f"Layer tar_split file is missing: {tar_split_file}") + + return os.path.getsize(tar_split_file) + + class Image: + _IMAGE_DB_ROOT_PATH = "image/overlay2/imagedb" + _IMAGE_DB_CONTENT_PATH = "content/sha256" + _SUPPORTED_HASH_TYPE = "sha256:" + + def __init__(self, data_root, image_conf_hash): + if not image_conf_hash.startswith(self._SUPPORTED_HASH_TYPE): + raise Exception(f"Unsupported image config hash type: {image_conf_hash}") + + image_conf_path = os.path.join(data_root, + self._IMAGE_DB_ROOT_PATH, self._IMAGE_DB_CONTENT_PATH, + image_conf_hash[len(self._SUPPORTED_HASH_TYPE):]) + if not os.path.exists(image_conf_path): + raise Exception(f"Image config has not been found in: {image_conf_path}") + + self.layers = [] + self.conf_hash = image_conf_hash + with open(image_conf_path) as f: + image_conf = json.load(f) + cur_chain_id = None + for layer_diff_id in image_conf["rootfs"]["diff_ids"]: + layer = DockerStore.Layer(data_root, layer_diff_id, cur_chain_id) + self.layers.append(layer) + cur_chain_id = layer.chain_id + + def __init__(self, data_root="/var/lib/docker"): + self.data_root = data_root + self._cfg_to_image = {} + self.images_by_ref = {} + self._parse_repositories() + + def _parse_repositories(self): + repos_file = os.path.join(self.data_root, self._REPO_PATH) + if not os.path.exists(repos_file): + raise Exception(f"No `repositories.json` is found in the docker store: {repos_file}") + with open(repos_file) as f: + repos = json.load(f) + fs_stats = os.fstatvfs(f.fileno()) + self.fs_block_size = fs_stats.f_bsize + + for image_base_ref, image_refs in repos["Repositories"].items(): + for ref, image_conf_hash in image_refs.items(): + if image_conf_hash not in self._cfg_to_image: + self._cfg_to_image[image_conf_hash] = self.Image(self.data_root, image_conf_hash) + self.images_by_ref[ref] = self._cfg_to_image[image_conf_hash] diff --git a/apps/get_layers_meta.py b/apps/get_layers_meta.py new file mode 100644 index 00000000..2fd5fe2e --- /dev/null +++ b/apps/get_layers_meta.py @@ -0,0 +1,82 @@ +import argparse +import json +import os.path + +from helpers import status +from apps.docker_store import DockerStore +from apps.compose_apps import ComposeApps + + +def get_args(): + parser = argparse.ArgumentParser("Parse docker store and obtain layers metadata of all Apps' images") + parser.add_argument("-d", "--docker-data-root", + help="Path to the docker data root", default="/var/lib/docker") + parser.add_argument("-a", "--apps-root", help="Path to the compose apps root dir", default="./") + parser.add_argument("-t", "--tag", help="Expected tag of images") + parser.add_argument("-o", "--out-file", help="Json file to ouput the gathered layers metadata to") + a = parser.parse_args() + return a + + +def print_layer_details(layer: DockerStore.Layer): + title2value = { + "DiffID": layer.diff_id, + "ChainID": layer.chain_id, + "CacheID": layer.cache_id, + "DataPath": layer.data_path, + "Size": layer.size, + "Usage": layer.usage, + "UsageWithMeta": layer.usage_with_meta, + "TarMetaSize": layer.tar_split_size, + "Overall Usage": layer.overall_usage + } + for t, v in title2value.items(): + status(f"{' ':<16}{t:<16}{v}", prefix="") + + +if __name__ == '__main__': + args = get_args() + status(f"Parsing the docker store {args.docker_data_root}...") + docker_store = DockerStore(args.docker_data_root) + status(f"The docker store has been parsed; fs block size is {docker_store.fs_block_size}") + + status("Processing metadata about each App layer...") + apps = ComposeApps(args.apps_root, quiet=True) + apps_layers_meta = { + "fs_block_size": docker_store.fs_block_size, + "layers": {} + } + for app in apps: + status(f"Processing App metadata: {app.name}", prefix="=== ") + for img in app.images(expand_env=True): + img_uri = img + if img_uri.startswith("hub.foundries.io"): + img_uri += ":" + args.tag + + image = docker_store.images_by_ref.get(img_uri) + if not image: + status("Image metadata are not found in local store; " + f"`SKIP_ARCHS` must be set for the image: {img_uri}", prefix="==== ") + continue + + status(f"Image: {img_uri}", prefix="==== ") + for layer in image.layers: + if layer.digest in apps_layers_meta["layers"]: + status(f"Layer has been already processed: {layer.digest}", prefix="\t=====") + continue + status(f"Layer: {layer.digest}", prefix="\t=====") + apps_layers_meta["layers"][layer.digest] = { + "size": layer.size, + "usage": layer.overall_usage + } + print_layer_details(layer) + status("\n", prefix="") + status("Image processing done", prefix="==== ") + + status(f"App metadata has been successfully processed: {app.name}\n", prefix="=== ") + + status(f"Storing App layers metadata; file: {args.out_file}", prefix="=== ") + with open(args.out_file, "+w") as f: + json.dump(apps_layers_meta, f) + + status("Processing metadata about each App layer has been successfully completed") diff --git a/apps/publish.py b/apps/publish.py index 007089cc..b34b984f 100755 --- a/apps/publish.py +++ b/apps/publish.py @@ -3,11 +3,12 @@ # Copyright (c) 2020 Foundries.io # SPDX-License-Identifier: Apache-2.0 +import json import logging import argparse -from helpers import fio_dnsbase, status +from helpers import fio_dnsbase, status, jobserv_get from apps.target_manager import create_target from apps.compose_apps import ComposeApps from apps.apps_publisher import AppsPublisher @@ -18,6 +19,16 @@ logger = logging.getLogger(__name__) +def get_layers_metadata(factory: str, ver: str, archs: []) -> dict: + layers_meta = {} + project = f"{factory}/lmp" if factory != "lmp" else "lmp" + for a in archs: + run_name = {"amd64": "build-amd64", "arm64": "build-aarch64", "arm": "build-armhf"}[a] + status(f"Downloading layers metadata built by `{run_name}` run", prefix="=== ") + layers_meta[a] = jobserv_get(f"/projects/{project}/builds/{ver}/runs/{run_name}/layers_meta.json") + return layers_meta + + def main(factory: str, sha: str, targets_json: str, machines: [], platforms: [], app_root_dir: str, publish_tool: str, apps_version: str, target_tag: str, target_version: str, new_targets_file: str): publish_manifest_lists() @@ -28,9 +39,13 @@ def main(factory: str, sha: str, targets_json: str, machines: [], platforms: [], status('Compose Apps has been validated: {}'.format(apps.str)) + status('Downloading Apps\' layers metadata...') + layers_meta = get_layers_metadata(factory, target_version, platforms) + status('Apps\' layers metadata have been downloaded') + reg_host = "hub." + fio_dnsbase() archs = ','.join(platforms) if platforms else '' - apps_to_add_to_target = AppsPublisher(factory, publish_tool, archs, reg_host).publish(apps, apps_version) + apps_to_add_to_target = AppsPublisher(factory, publish_tool, archs, reg_host, layers_meta).publish(apps, apps_version) status('Creating Targets that refer to the published Apps; tag: {}, version: {}, machines: {}, platforms: {} ' .format(target_tag, target_version, ','.join(machines) if machines else '[]',