|
| 1 | +""" |
| 2 | +A TUF hash bin delegation example using the low-level TUF Metadata API. |
| 3 | +
|
| 4 | +As 'repository_tool' and 'repository_lib' are being deprecated, hash bin |
| 5 | +delegation interfaces are no longer available in this implementation. The |
| 6 | +example code in this file demonstrates how to easily implement those |
| 7 | +interfaces, and how to use them together with the TUF metadata API, to perform |
| 8 | +hash bin delegation. |
| 9 | +
|
| 10 | +Contents: |
| 11 | +- Re-usable hash bin delegation helpers |
| 12 | +- Basic hash bin delegation example |
| 13 | +
|
| 14 | +See 'basic_repo.py' for a more comprehensive TUF metadata API example. |
| 15 | +
|
| 16 | +NOTE: Metadata files will be written to a 'tmp*'-directory in CWD. |
| 17 | +
|
| 18 | +""" |
| 19 | +import hashlib |
| 20 | +import os |
| 21 | +import tempfile |
| 22 | +from collections import OrderedDict |
| 23 | +from datetime import datetime, timedelta |
| 24 | +from pathlib import Path |
| 25 | +from typing import Any, Dict, Iterator, List, Tuple |
| 26 | + |
| 27 | +from securesystemslib.keys import generate_ed25519_key |
| 28 | +from securesystemslib.signer import SSlibSigner |
| 29 | + |
| 30 | +from tuf.api.metadata import ( |
| 31 | + DelegatedRole, |
| 32 | + Delegations, |
| 33 | + Key, |
| 34 | + Metadata, |
| 35 | + TargetFile, |
| 36 | + Targets, |
| 37 | +) |
| 38 | +from tuf.api.serialization.json import JSONSerializer |
| 39 | + |
| 40 | + |
| 41 | +def _in(days: float) -> datetime: |
| 42 | + """Adds 'days' to now and returns datetime object w/o microseconds.""" |
| 43 | + return datetime.utcnow().replace(microsecond=0) + timedelta(days=days) |
| 44 | + |
| 45 | + |
| 46 | +SPEC_VERSION = "1.0.19" |
| 47 | +roles: Dict[str, Metadata] = {} |
| 48 | +keys: Dict[str, Dict[str, Any]] = {} |
| 49 | + |
| 50 | +# Hash bin delegation |
| 51 | +# =================== |
| 52 | +# Hash bin delegation allows to distribute a large number of target files over |
| 53 | +# multiple delegated targets metadata. The consequence is smaller metadata |
| 54 | +# files and thus a lower network overhead for repository-client communication. |
| 55 | +# |
| 56 | +# The assignment of target files to targets metadata is done automatically, |
| 57 | +# based on the hash of the target file name. More precisely, only a prefix of |
| 58 | +# the target file name hash is needed to assign it to the correct hash bin. |
| 59 | +# |
| 60 | +# The number of bins is the only number that needs to be configured. Everything |
| 61 | +# else is derived using the mathematical operations shown below. |
| 62 | +# |
| 63 | +# The right number of bins depends on the expected number of target files in a |
| 64 | +# repository. For the purpose of this example we choose ... |
| 65 | +NUMBER_OF_BINS = 32 # ..., which determines the length of any hash prefix |
| 66 | +# considered for bin assignment (PREFIX_LEN), how many hash prefixes are |
| 67 | +# covered by all bins (NUMBER_OF_PREFIXES), and how many prefixes are covered |
| 68 | +# by each individual bin (BIN_SIZE): |
| 69 | +# |
| 70 | +# The prefix length is the number of digits in the hexadecimal representation |
| 71 | +# (see 'x' in Python Format Specification) of the number of bins minus one |
| 72 | +# (counting starts at zero), i.e. ... |
| 73 | +PREFIX_LEN = len(f"{(NUMBER_OF_BINS - 1):x}") # ... 2. |
| 74 | +# |
| 75 | +# Compared to decimal, hexadecimal numbers can express higher numbers with |
| 76 | +# fewer digits and thus further decrease metadata sizes. With the above prefix |
| 77 | +# length of 2 we can represent at most ... |
| 78 | +NUMBER_OF_PREFIXES = 16 ** PREFIX_LEN # ... 256 prefixes, i.e. 00, 01, ..., ff. |
| 79 | +# |
| 80 | +# If the number of bins is a power of two, hash prefixes are evenly distributed |
| 81 | +# over all bins, which allows to calculate the uniform size of ... |
| 82 | +BIN_SIZE = NUMBER_OF_PREFIXES // NUMBER_OF_BINS # ... 8, where each bin is |
| 83 | +# responsible for a range of 8 prefixes, i.e. 00-07, 08-0f, ..., f8-ff. |
| 84 | + |
| 85 | +# Helpers |
| 86 | +# ------- |
| 87 | +def _bin_name(low: int, high: int) -> str: |
| 88 | + """Generates a bin name according to the hash prefixes the bin serves. |
| 89 | +
|
| 90 | + The name is either a single hash prefix for bin size 1, or a range of hash |
| 91 | + prefixes otherwise. The prefix length is needed to zero-left-pad the |
| 92 | + hex representation of the hash prefix for uniform bin name lengths. |
| 93 | + """ |
| 94 | + if low == high: |
| 95 | + return f"{low:0{PREFIX_LEN}x}" |
| 96 | + |
| 97 | + return f"{low:0{PREFIX_LEN}x}-{high:0{PREFIX_LEN}x}" |
| 98 | + |
| 99 | + |
| 100 | +def generate_hash_bins() -> Iterator[Tuple[str, List[str]]]: |
| 101 | + """Returns generator for bin names and hash prefixes per bin.""" |
| 102 | + # Iterate over the total number of hash prefixes in 'bin size'-steps to |
| 103 | + # generate bin names and a list of hash prefixes served by each bin. |
| 104 | + for low in range(0, NUMBER_OF_PREFIXES, BIN_SIZE): |
| 105 | + high = low + BIN_SIZE - 1 |
| 106 | + bin_name = _bin_name(low, high) |
| 107 | + hash_prefixes = [] |
| 108 | + for prefix in range(low, low + BIN_SIZE): |
| 109 | + hash_prefixes.append(f"{prefix:0{PREFIX_LEN}x}") |
| 110 | + |
| 111 | + yield bin_name, hash_prefixes |
| 112 | + |
| 113 | + |
| 114 | +def find_hash_bin(path: str) -> str: |
| 115 | + """Returns name of bin for target file based on the target path hash.""" |
| 116 | + # Generate hash digest of passed target path and take its prefix, given the |
| 117 | + # global prefix length for the given number of bins. |
| 118 | + hasher = hashlib.sha256() |
| 119 | + hasher.update(path.encode("utf-8")) |
| 120 | + target_name_hash = hasher.hexdigest() |
| 121 | + prefix = int(target_name_hash[:PREFIX_LEN], 16) |
| 122 | + # Find lower and upper bounds for hash prefix given its numerical value and |
| 123 | + # the the general bin size for the given number of bins. |
| 124 | + low = prefix - (prefix % BIN_SIZE) |
| 125 | + high = low + BIN_SIZE - 1 |
| 126 | + return _bin_name(low, high) |
| 127 | + |
| 128 | + |
| 129 | +# Keys |
| 130 | +# ---- |
| 131 | +# Given that the primary concern of hash bin delegation is to reduce network |
| 132 | +# overhead, it is acceptable to re-use one signing key for all delegated |
| 133 | +# targets roles (bin-n). However, we do use a different key for the delegating |
| 134 | +# targets role (bins). Considering the high responsibility but also low |
| 135 | +# volatility of the bins role, it is recommended to require signature |
| 136 | +# thresholds and keep the keys offline in a real-world scenario. |
| 137 | + |
| 138 | +# NOTE: See "Targets delegation" and "Signature thresholds" paragraphs in |
| 139 | +# 'basic_repo.py' for more details |
| 140 | +for name in ["bin-n", "bins"]: |
| 141 | + keys[name] = generate_ed25519_key() |
| 142 | + |
| 143 | + |
| 144 | +# Targets roles |
| 145 | +# ------------- |
| 146 | +# NOTE: See "Targets" and "Targets delegation" paragraphs in 'basic_repo.py' |
| 147 | +# example for more details about the Targets object. |
| 148 | + |
| 149 | +# Create preliminary delegating targets role (bins) and add public key for |
| 150 | +# delegated targets (bin_n) to key store. Delegation details are update below. |
| 151 | +roles["bins"] = Metadata[Targets]( |
| 152 | + signed=Targets( |
| 153 | + version=1, |
| 154 | + spec_version=SPEC_VERSION, |
| 155 | + expires=_in(365), |
| 156 | + targets={}, |
| 157 | + delegations=Delegations( |
| 158 | + keys={ |
| 159 | + keys["bin-n"]["keyid"]: Key.from_securesystemslib_key( |
| 160 | + keys["bin-n"] |
| 161 | + ) |
| 162 | + }, |
| 163 | + roles=OrderedDict(), |
| 164 | + ), |
| 165 | + ), |
| 166 | + signatures=OrderedDict(), |
| 167 | +) |
| 168 | + |
| 169 | +# The hash bin generator yields an ordered list of incremental hash bin names |
| 170 | +# (ranges), plus the hash prefixes each bin is responsible for, e.g.: |
| 171 | +# |
| 172 | +# bin_n_name: 00-07 bin_n_hash_prefixes: 00 01 02 03 04 05 06 07 |
| 173 | +# 08-0f 08 09 0a 0b 0c 0d 0e 0f |
| 174 | +# 10-17 10 11 12 13 14 15 16 17 |
| 175 | +# ... ... |
| 176 | +# f8-ff f8 f9 fa fb fc fd fe ff |
| 177 | +for bin_n_name, bin_n_hash_prefixes in generate_hash_bins(): |
| 178 | + # Update delegating targets role (bins) with delegation details for each |
| 179 | + # delegated targets role (bin_n). |
| 180 | + roles["bins"].signed.delegations.roles[bin_n_name] = DelegatedRole( |
| 181 | + name=bin_n_name, |
| 182 | + keyids=[keys["bin-n"]["keyid"]], |
| 183 | + threshold=1, |
| 184 | + terminating=False, |
| 185 | + path_hash_prefixes=bin_n_hash_prefixes, |
| 186 | + ) |
| 187 | + |
| 188 | + # Create delegated targets roles (bin_n) |
| 189 | + roles[bin_n_name] = Metadata[Targets]( |
| 190 | + signed=Targets( |
| 191 | + version=1, spec_version=SPEC_VERSION, expires=_in(7), targets={} |
| 192 | + ), |
| 193 | + signatures=OrderedDict(), |
| 194 | + ) |
| 195 | + |
| 196 | +# Add target file |
| 197 | +# --------------- |
| 198 | +# For the purpose of this example we will protect the integrity of this very |
| 199 | +# example script by adding its file info to the corresponding bin metadata. |
| 200 | + |
| 201 | +# NOTE: See "Targets" paragraph in 'basic_repo.py' example for more details |
| 202 | +# about adding target file infos to targets metadata. |
| 203 | +local_path = Path(__file__).resolve() |
| 204 | +target_path = f"{local_path.parts[-2]}/{local_path.parts[-1]}" |
| 205 | +target_file_info = TargetFile.from_file(target_path, str(local_path)) |
| 206 | + |
| 207 | +# The right bin for a target file is determined by the 'target_path' hash, e.g.: |
| 208 | +# |
| 209 | +# target_path: 'repo_example/hashed_bin_delegation.py' |
| 210 | +# target_path (hash digest): '85e1a6c06305bd9c1e15c7ae565fd16ea304bfc...' |
| 211 | +# |
| 212 | +# --> considered hash prefix '85', falls into bin '80-87' |
| 213 | +bin_for_target = find_hash_bin(target_path) |
| 214 | +roles[bin_for_target].signed.targets[target_path] = target_file_info |
| 215 | + |
| 216 | + |
| 217 | +# Sign and persist |
| 218 | +# ---------------- |
| 219 | +# Sign all metadata and persist to temporary directory at CWD for review |
| 220 | +# (most notably see 'bins.json' and '80-87.json'). |
| 221 | + |
| 222 | +# NOTE: See "Persist metadata" paragraph in 'basic_repo.py' example for more |
| 223 | +# details about serialization formats and metadata file name convention. |
| 224 | +PRETTY = JSONSerializer(compact=False) |
| 225 | +TMP_DIR = tempfile.mkdtemp(dir=os.getcwd()) |
| 226 | + |
| 227 | +for role_name, role in roles.items(): |
| 228 | + key = keys["bins"] if role_name == "bins" else keys["bin-n"] |
| 229 | + signer = SSlibSigner(key) |
| 230 | + role.sign(signer) |
| 231 | + |
| 232 | + filename = f"{role_name}.json" |
| 233 | + filepath = os.path.join(TMP_DIR, filename) |
| 234 | + role.to_file(filepath, serializer=PRETTY) |
0 commit comments