Skip to content

Commit 8209189

Browse files
authored
Merge pull request #1700 from lukpueh/repo-examples-hbd
doc: basic hash bin delegation repo example + test
2 parents 002c828 + ef388da commit 8209189

File tree

3 files changed

+276
-1
lines changed

3 files changed

+276
-1
lines changed

examples/repo_example/basic_repo.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def _in(days: float) -> datetime:
268268
roles["root"].to_file(root_path, serializer=PRETTY)
269269

270270

271-
# Targets Delegation
271+
# Targets delegation
272272
# ==================
273273
# Similar to how the root role delegates responsibilities about integrity,
274274
# consistency and freshness to the corresponding top-level roles, a targets
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
"""
2+
A TUF hash bin delegation example using the low-level TUF Metadata API.
3+
4+
As 'repository_tool' and 'repository_lib' are being deprecated, hash bin
5+
delegation interfaces are no longer available in this implementation. The
6+
example code in this file demonstrates how to easily implement those
7+
interfaces, and how to use them together with the TUF metadata API, to perform
8+
hash bin delegation.
9+
10+
Contents:
11+
- Re-usable hash bin delegation helpers
12+
- Basic hash bin delegation example
13+
14+
See 'basic_repo.py' for a more comprehensive TUF metadata API example.
15+
16+
NOTE: Metadata files will be written to a 'tmp*'-directory in CWD.
17+
18+
"""
19+
import hashlib
20+
import os
21+
import tempfile
22+
from collections import OrderedDict
23+
from datetime import datetime, timedelta
24+
from pathlib import Path
25+
from typing import Any, Dict, Iterator, List, Tuple
26+
27+
from securesystemslib.keys import generate_ed25519_key
28+
from securesystemslib.signer import SSlibSigner
29+
30+
from tuf.api.metadata import (
31+
DelegatedRole,
32+
Delegations,
33+
Key,
34+
Metadata,
35+
TargetFile,
36+
Targets,
37+
)
38+
from tuf.api.serialization.json import JSONSerializer
39+
40+
41+
def _in(days: float) -> datetime:
42+
"""Adds 'days' to now and returns datetime object w/o microseconds."""
43+
return datetime.utcnow().replace(microsecond=0) + timedelta(days=days)
44+
45+
46+
SPEC_VERSION = "1.0.19"
47+
roles: Dict[str, Metadata] = {}
48+
keys: Dict[str, Dict[str, Any]] = {}
49+
50+
# Hash bin delegation
51+
# ===================
52+
# Hash bin delegation allows to distribute a large number of target files over
53+
# multiple delegated targets metadata. The consequence is smaller metadata
54+
# files and thus a lower network overhead for repository-client communication.
55+
#
56+
# The assignment of target files to targets metadata is done automatically,
57+
# based on the hash of the target file name. More precisely, only a prefix of
58+
# the target file name hash is needed to assign it to the correct hash bin.
59+
#
60+
# The number of bins is the only number that needs to be configured. Everything
61+
# else is derived using the mathematical operations shown below.
62+
#
63+
# The right number of bins depends on the expected number of target files in a
64+
# repository. For the purpose of this example we choose ...
65+
NUMBER_OF_BINS = 32 # ..., which determines the length of any hash prefix
66+
# considered for bin assignment (PREFIX_LEN), how many hash prefixes are
67+
# covered by all bins (NUMBER_OF_PREFIXES), and how many prefixes are covered
68+
# by each individual bin (BIN_SIZE):
69+
#
70+
# The prefix length is the number of digits in the hexadecimal representation
71+
# (see 'x' in Python Format Specification) of the number of bins minus one
72+
# (counting starts at zero), i.e. ...
73+
PREFIX_LEN = len(f"{(NUMBER_OF_BINS - 1):x}") # ... 2.
74+
#
75+
# Compared to decimal, hexadecimal numbers can express higher numbers with
76+
# fewer digits and thus further decrease metadata sizes. With the above prefix
77+
# length of 2 we can represent at most ...
78+
NUMBER_OF_PREFIXES = 16 ** PREFIX_LEN # ... 256 prefixes, i.e. 00, 01, ..., ff.
79+
#
80+
# If the number of bins is a power of two, hash prefixes are evenly distributed
81+
# over all bins, which allows to calculate the uniform size of ...
82+
BIN_SIZE = NUMBER_OF_PREFIXES // NUMBER_OF_BINS # ... 8, where each bin is
83+
# responsible for a range of 8 prefixes, i.e. 00-07, 08-0f, ..., f8-ff.
84+
85+
# Helpers
86+
# -------
87+
def _bin_name(low: int, high: int) -> str:
88+
"""Generates a bin name according to the hash prefixes the bin serves.
89+
90+
The name is either a single hash prefix for bin size 1, or a range of hash
91+
prefixes otherwise. The prefix length is needed to zero-left-pad the
92+
hex representation of the hash prefix for uniform bin name lengths.
93+
"""
94+
if low == high:
95+
return f"{low:0{PREFIX_LEN}x}"
96+
97+
return f"{low:0{PREFIX_LEN}x}-{high:0{PREFIX_LEN}x}"
98+
99+
100+
def generate_hash_bins() -> Iterator[Tuple[str, List[str]]]:
101+
"""Returns generator for bin names and hash prefixes per bin."""
102+
# Iterate over the total number of hash prefixes in 'bin size'-steps to
103+
# generate bin names and a list of hash prefixes served by each bin.
104+
for low in range(0, NUMBER_OF_PREFIXES, BIN_SIZE):
105+
high = low + BIN_SIZE - 1
106+
bin_name = _bin_name(low, high)
107+
hash_prefixes = []
108+
for prefix in range(low, low + BIN_SIZE):
109+
hash_prefixes.append(f"{prefix:0{PREFIX_LEN}x}")
110+
111+
yield bin_name, hash_prefixes
112+
113+
114+
def find_hash_bin(path: str) -> str:
115+
"""Returns name of bin for target file based on the target path hash."""
116+
# Generate hash digest of passed target path and take its prefix, given the
117+
# global prefix length for the given number of bins.
118+
hasher = hashlib.sha256()
119+
hasher.update(path.encode("utf-8"))
120+
target_name_hash = hasher.hexdigest()
121+
prefix = int(target_name_hash[:PREFIX_LEN], 16)
122+
# Find lower and upper bounds for hash prefix given its numerical value and
123+
# the the general bin size for the given number of bins.
124+
low = prefix - (prefix % BIN_SIZE)
125+
high = low + BIN_SIZE - 1
126+
return _bin_name(low, high)
127+
128+
129+
# Keys
130+
# ----
131+
# Given that the primary concern of hash bin delegation is to reduce network
132+
# overhead, it is acceptable to re-use one signing key for all delegated
133+
# targets roles (bin-n). However, we do use a different key for the delegating
134+
# targets role (bins). Considering the high responsibility but also low
135+
# volatility of the bins role, it is recommended to require signature
136+
# thresholds and keep the keys offline in a real-world scenario.
137+
138+
# NOTE: See "Targets delegation" and "Signature thresholds" paragraphs in
139+
# 'basic_repo.py' for more details
140+
for name in ["bin-n", "bins"]:
141+
keys[name] = generate_ed25519_key()
142+
143+
144+
# Targets roles
145+
# -------------
146+
# NOTE: See "Targets" and "Targets delegation" paragraphs in 'basic_repo.py'
147+
# example for more details about the Targets object.
148+
149+
# Create preliminary delegating targets role (bins) and add public key for
150+
# delegated targets (bin_n) to key store. Delegation details are update below.
151+
roles["bins"] = Metadata[Targets](
152+
signed=Targets(
153+
version=1,
154+
spec_version=SPEC_VERSION,
155+
expires=_in(365),
156+
targets={},
157+
delegations=Delegations(
158+
keys={
159+
keys["bin-n"]["keyid"]: Key.from_securesystemslib_key(
160+
keys["bin-n"]
161+
)
162+
},
163+
roles=OrderedDict(),
164+
),
165+
),
166+
signatures=OrderedDict(),
167+
)
168+
169+
# The hash bin generator yields an ordered list of incremental hash bin names
170+
# (ranges), plus the hash prefixes each bin is responsible for, e.g.:
171+
#
172+
# bin_n_name: 00-07 bin_n_hash_prefixes: 00 01 02 03 04 05 06 07
173+
# 08-0f 08 09 0a 0b 0c 0d 0e 0f
174+
# 10-17 10 11 12 13 14 15 16 17
175+
# ... ...
176+
# f8-ff f8 f9 fa fb fc fd fe ff
177+
for bin_n_name, bin_n_hash_prefixes in generate_hash_bins():
178+
# Update delegating targets role (bins) with delegation details for each
179+
# delegated targets role (bin_n).
180+
roles["bins"].signed.delegations.roles[bin_n_name] = DelegatedRole(
181+
name=bin_n_name,
182+
keyids=[keys["bin-n"]["keyid"]],
183+
threshold=1,
184+
terminating=False,
185+
path_hash_prefixes=bin_n_hash_prefixes,
186+
)
187+
188+
# Create delegated targets roles (bin_n)
189+
roles[bin_n_name] = Metadata[Targets](
190+
signed=Targets(
191+
version=1, spec_version=SPEC_VERSION, expires=_in(7), targets={}
192+
),
193+
signatures=OrderedDict(),
194+
)
195+
196+
# Add target file
197+
# ---------------
198+
# For the purpose of this example we will protect the integrity of this very
199+
# example script by adding its file info to the corresponding bin metadata.
200+
201+
# NOTE: See "Targets" paragraph in 'basic_repo.py' example for more details
202+
# about adding target file infos to targets metadata.
203+
local_path = Path(__file__).resolve()
204+
target_path = f"{local_path.parts[-2]}/{local_path.parts[-1]}"
205+
target_file_info = TargetFile.from_file(target_path, str(local_path))
206+
207+
# The right bin for a target file is determined by the 'target_path' hash, e.g.:
208+
#
209+
# target_path: 'repo_example/hashed_bin_delegation.py'
210+
# target_path (hash digest): '85e1a6c06305bd9c1e15c7ae565fd16ea304bfc...'
211+
#
212+
# --> considered hash prefix '85', falls into bin '80-87'
213+
bin_for_target = find_hash_bin(target_path)
214+
roles[bin_for_target].signed.targets[target_path] = target_file_info
215+
216+
217+
# Sign and persist
218+
# ----------------
219+
# Sign all metadata and persist to temporary directory at CWD for review
220+
# (most notably see 'bins.json' and '80-87.json').
221+
222+
# NOTE: See "Persist metadata" paragraph in 'basic_repo.py' example for more
223+
# details about serialization formats and metadata file name convention.
224+
PRETTY = JSONSerializer(compact=False)
225+
TMP_DIR = tempfile.mkdtemp(dir=os.getcwd())
226+
227+
for role_name, role in roles.items():
228+
key = keys["bins"] if role_name == "bins" else keys["bin-n"]
229+
signer = SSlibSigner(key)
230+
role.sign(signer)
231+
232+
filename = f"{role_name}.json"
233+
filepath = os.path.join(TMP_DIR, filename)
234+
role.to_file(filepath, serializer=PRETTY)

tests/test_examples.py

+41
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,47 @@ def test_basic_repo(self) -> None:
8787
],
8888
)
8989

90+
def test_hashed_bin_delegation(self):
91+
"""Run 'hashed_bin_delegation.py' and assert creation of metadata files."""
92+
self._run_script_and_assert_files(
93+
"hashed_bin_delegation.py",
94+
[
95+
"bins.json",
96+
"00-07.json",
97+
"08-0f.json",
98+
"10-17.json",
99+
"18-1f.json",
100+
"20-27.json",
101+
"28-2f.json",
102+
"30-37.json",
103+
"38-3f.json",
104+
"40-47.json",
105+
"48-4f.json",
106+
"50-57.json",
107+
"58-5f.json",
108+
"60-67.json",
109+
"68-6f.json",
110+
"70-77.json",
111+
"78-7f.json",
112+
"80-87.json",
113+
"88-8f.json",
114+
"90-97.json",
115+
"98-9f.json",
116+
"a0-a7.json",
117+
"a8-af.json",
118+
"b0-b7.json",
119+
"b8-bf.json",
120+
"c0-c7.json",
121+
"c8-cf.json",
122+
"d0-d7.json",
123+
"d8-df.json",
124+
"e0-e7.json",
125+
"e8-ef.json",
126+
"f0-f7.json",
127+
"f8-ff.json",
128+
],
129+
)
130+
90131

91132
if __name__ == "__main__":
92133
utils.configure_test_logging(sys.argv)

0 commit comments

Comments
 (0)