Skip to content

Commit d6a88ed

Browse files
committed
introduce package id and redo hash format again
Introduces the `id` field to `build.zig.zon`. Together with name, this represents a globally unique package identifier. This field should be initialized with a 16-bit random number when the package is first created, and then *never change*. This allows Zig to unambiguously detect when one package is an updated version of another. When forking a Zig project, this id should be regenerated with a new random number if the upstream project is still maintained. Otherwise, the fork is *hostile*, attempting to take control over the original project's identity. `0x0000` is invalid because it obviously means a random number wasn't used. `0xffff` is reserved to represent "naked" packages. Tracking issue #14288 Additionally: * Fix bad path in error messages regarding build.zig.zon file. * Manifest validates that `name` and `version` field of build.zig.zon are maximum 32 bytes. * Introduce error for root package to not switch to enum literal for name. * Introduce error for root package to omit `id`. * Update init template to generate `id` * Update init template to populate `minimum_zig_version`. * New package hash format changes: - name and version limited to 32 bytes via error rather than truncation - truncate sha256 to 192 bits rather than 40 bits - include the package id This means that, given only the package hashes for a complete dependency tree, it is possible to perform version selection and know the final size on disk, without doing any fetching whatsoever. This prevents wasted bandwidth since package versions not selected do not need to be fetched.
1 parent 9763dd2 commit d6a88ed

File tree

8 files changed

+151
-52
lines changed

8 files changed

+151
-52
lines changed

doc/build.zig.zon.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ build.zig.
1010

1111
### `name`
1212

13-
String. Required.
13+
Enum literal. Required.
1414

1515
This is the default name used by packages depending on this one. For example,
1616
when a user runs `zig fetch --save <url>`, this field is used as the key in the
@@ -20,12 +20,31 @@ will stick with this provided value.
2020
It is redundant to include "zig" in this name because it is already within the
2121
Zig package namespace.
2222

23+
Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes.
24+
25+
### `id`
26+
27+
Together with name, this represents a globally unique package identifier. This
28+
field should be initialized with a 16-bit random number when the package is
29+
first created, and then *never change*. This allows Zig to unambiguously detect
30+
when one package is an updated version of another.
31+
32+
When forking a Zig project, this id should be regenerated with a new random
33+
number if the upstream project is still maintained. Otherwise, the fork is
34+
*hostile*, attempting to take control over the original project's identity.
35+
36+
`0x0000` is invalid because it obviously means a random number wasn't used.
37+
38+
`0xffff` is reserved to represent "naked" packages.
39+
2340
### `version`
2441

2542
String. Required.
2643

2744
[semver](https://semver.org/)
2845

46+
Limited to 32 bytes.
47+
2948
### `minimum_zig_version`
3049

3150
String. Optional.

lib/init/build.zig

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,14 @@ pub fn build(b: *std.Build) void {
4242
// Modules can depend on one another using the `std.Build.Module.addImport` function.
4343
// This is what allows Zig source code to use `@import("foo")` where 'foo' is not a
4444
// file path. In this case, we set up `exe_mod` to import `lib_mod`.
45-
exe_mod.addImport("$_lib", lib_mod);
45+
exe_mod.addImport("$n_lib", lib_mod);
4646

4747
// Now, we will create a static library based on the module we created above.
4848
// This creates a `std.Build.Step.Compile`, which is the build step responsible
4949
// for actually invoking the compiler.
5050
const lib = b.addLibrary(.{
5151
.linkage = .static,
52-
.name = "$",
52+
.name = "$n",
5353
.root_module = lib_mod,
5454
});
5555

@@ -61,7 +61,7 @@ pub fn build(b: *std.Build) void {
6161
// This creates another `std.Build.Step.Compile`, but this one builds an executable
6262
// rather than a static library.
6363
const exe = b.addExecutable(.{
64-
.name = "$",
64+
.name = "$n",
6565
.root_module = exe_mod,
6666
});
6767

lib/init/build.zig.zon

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,29 @@
66
//
77
// It is redundant to include "zig" in this name because it is already
88
// within the Zig package namespace.
9-
.name = "$",
9+
.name = .$n,
1010

1111
// This is a [Semantic Version](https://semver.org/).
1212
// In a future version of Zig it will be used for package deduplication.
1313
.version = "0.0.0",
1414

15+
// Together with name, this represents a globally unique package
16+
// identifier. This field should be initialized with a 16-bit random number
17+
// when the package is first created, and then *never change*. This allows
18+
// unambiguous detection when one package is an updated version of another.
19+
//
20+
// When forking a Zig project, this id should be regenerated with a new
21+
// random number if the upstream project is still maintained. Otherwise,
22+
// the fork is *hostile*, attempting to take control over the original
23+
// project's identity. Thus it is recommended to leave the comment on the
24+
// following line intact, so that it shows up in code reviews that modify
25+
// the field.
26+
.id = $i, // Changing this has security and trust implications.
27+
28+
// Tracks the earliest Zig version that the package considers to be a
29+
// supported use case.
30+
.minimum_zig_version = "$v",
31+
1532
// This field is optional.
1633
// This is currently advisory only; Zig does not yet do anything
1734
// with this value.

lib/init/src/main.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,4 @@ test "fuzz example" {
4343
const std = @import("std");
4444

4545
/// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details.
46-
const lib = @import("$_lib");
46+
const lib = @import("$n_lib");

src/Package.zig

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,17 @@ pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
1010
pub const multihash_hex_digest_len = 2 * multihash_len;
1111
pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
1212

13+
pub fn randomId() u16 {
14+
return std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff);
15+
}
16+
1317
/// A user-readable, file system safe hash that identifies an exact package
1418
/// snapshot, including file contents.
1519
///
20+
/// The hash is not only to prevent collisions but must resist attacks where
21+
/// the adversary fully controls the contents being hashed. Thus, it contains
22+
/// a full SHA-256 digest.
23+
///
1624
/// This data structure can be used to store the legacy hash format too. Legacy
1725
/// hash format is scheduled to be removed after 0.14.0 is tagged.
1826
///
@@ -26,7 +34,8 @@ pub const Hash = struct {
2634
pub const Algo = std.crypto.hash.sha2.Sha256;
2735
pub const Digest = [Algo.digest_length]u8;
2836

29-
pub const max_len = 32 + 1 + 32 + 1 + 12;
37+
/// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh"
38+
pub const max_len = 32 + 1 + 32 + 1 + (16 + 32 + 192) / 6;
3039

3140
pub fn fromSlice(s: []const u8) Hash {
3241
assert(s.len <= max_len);
@@ -62,48 +71,35 @@ pub const Hash = struct {
6271
try std.testing.expect(h.isOld());
6372
}
6473

65-
/// Produces "$name-$semver-$sizedhash".
74+
/// Produces "$name-$semver-$hashplus".
6675
/// * name is the name field from build.zig.zon, truncated at 32 bytes and must
6776
/// be a valid zig identifier
6877
/// * semver is the version field from build.zig.zon, truncated at 32 bytes
69-
/// * sizedhash is the following 9-byte array, base64 encoded using -_ to make
78+
/// * hashplus is the following 39-byte array, base64 encoded using -_ to make
7079
/// it filesystem safe:
71-
/// - (4 bytes) LE u32 total decompressed size in bytes
72-
/// - (5 bytes) truncated SHA-256 of hashed files of the package
80+
/// - (2 bytes) LE u16 Package ID
81+
/// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated
82+
/// - (24 bytes) truncated SHA-256 digest of hashed files of the package
7383
///
74-
/// example: "nasm-2.16.1-2-BWdcABvF_jM1"
75-
pub fn init(digest: Digest, name: []const u8, ver: []const u8, size: u32) Hash {
84+
/// example: "nasm-2.16.1-3-AAD_ZlwACpGU-c3QXp_yNyn07Q5U9Rq-Cb1ur2G1"
85+
pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u16, size: u32) Hash {
86+
assert(name.len <= 32);
87+
assert(ver.len <= 32);
7688
var result: Hash = undefined;
7789
var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes);
78-
buf.appendSliceAssumeCapacity(name[0..@min(name.len, 32)]);
90+
buf.appendSliceAssumeCapacity(name);
7991
buf.appendAssumeCapacity('-');
80-
buf.appendSliceAssumeCapacity(ver[0..@min(ver.len, 32)]);
92+
buf.appendSliceAssumeCapacity(ver);
8193
buf.appendAssumeCapacity('-');
82-
var sizedhash: [9]u8 = undefined;
83-
std.mem.writeInt(u32, sizedhash[0..4], size, .little);
84-
sizedhash[4..].* = digest[0..5].*;
85-
_ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(12), &sizedhash);
94+
var hashplus: [30]u8 = undefined;
95+
std.mem.writeInt(u16, hashplus[0..2], id, .little);
96+
std.mem.writeInt(u32, hashplus[2..6], size, .little);
97+
hashplus[6..].* = digest[0..24].*;
98+
_ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(40), &hashplus);
8699
@memset(buf.unusedCapacitySlice(), 0);
87100
return result;
88101
}
89102

90-
/// Produces "$hashiname-N-$sizedhash". For packages that lack "build.zig.zon" metadata.
91-
/// * hashiname is [5..][0..24] bytes of the SHA-256, urlsafe-base64-encoded, for a total of 32 bytes encoded
92-
/// * the semver section is replaced with a hardcoded N which stands for
93-
/// "naked". It acts as a version number so that any future updates to the
94-
/// hash format can tell this hash format apart. Note that "N" is an
95-
/// invalid semver.
96-
/// * sizedhash is the same as in `init`.
97-
///
98-
/// The hash is broken up this way so that "sizedhash" can be calculated
99-
/// exactly the same way in both cases, and so that "name" and "hashiname" can
100-
/// be used interchangeably in both cases.
101-
pub fn initNaked(digest: Digest, size: u32) Hash {
102-
var name: [32]u8 = undefined;
103-
_ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]);
104-
return init(digest, &name, "N", size);
105-
}
106-
107103
/// Produces a unique hash based on the path provided. The result should
108104
/// not be user-visible.
109105
pub fn initPath(sub_path: []const u8, is_global: bool) Hash {
@@ -144,7 +140,7 @@ pub const MultihashFunction = enum(u16) {
144140

145141
pub const multihash_function: MultihashFunction = switch (Hash.Algo) {
146142
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
147-
else => @compileError("unreachable"),
143+
else => unreachable,
148144
};
149145

150146
pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest {

src/Package/Fetch.zig

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -586,9 +586,11 @@ pub fn computedPackageHash(f: *const Fetch) Package.Hash {
586586
if (f.manifest) |man| {
587587
var version_buffer: [32]u8 = undefined;
588588
const version: []const u8 = std.fmt.bufPrint(&version_buffer, "{}", .{man.version}) catch &version_buffer;
589-
return .init(f.computed_hash.digest, man.name, version, saturated_size);
589+
return .init(f.computed_hash.digest, man.name, version, man.id, saturated_size);
590590
}
591-
return .initNaked(f.computed_hash.digest, saturated_size);
591+
// In the future build.zig.zon fields will be added to allow overriding these values
592+
// for naked tarballs.
593+
return .init(f.computed_hash.digest, "N", "V", 0xffff, saturated_size);
592594
}
593595

594596
/// `computeHash` gets a free check for the existence of `build.zig`, but when
@@ -645,11 +647,13 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void {
645647

646648
f.manifest = try Manifest.parse(arena, ast.*, .{
647649
.allow_missing_paths_field = f.allow_missing_paths_field,
650+
.allow_missing_id = f.allow_missing_paths_field,
651+
.allow_name_string = f.allow_missing_paths_field,
648652
});
649653
const manifest = &f.manifest.?;
650654

651655
if (manifest.errors.len > 0) {
652-
const src_path = try eb.printString("{}{s}", .{ pkg_root, Manifest.basename });
656+
const src_path = try eb.printString("{}" ++ fs.path.sep_str ++ "{s}", .{ pkg_root, Manifest.basename });
653657
try manifest.copyErrorsIntoBundle(ast.*, src_path, eb);
654658
return error.FetchFailed;
655659
}

src/Package/Manifest.zig

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ pub const ErrorMessage = struct {
3636
};
3737

3838
name: []const u8,
39+
id: u16,
3940
version: std.SemanticVersion,
4041
version_node: Ast.Node.Index,
4142
dependencies: std.StringArrayHashMapUnmanaged(Dependency),
@@ -50,6 +51,8 @@ pub const ParseOptions = struct {
5051
allow_missing_paths_field: bool = false,
5152
/// Deprecated, to be removed after 0.14.0 is tagged.
5253
allow_name_string: bool = true,
54+
/// Deprecated, to be removed after 0.14.0 is tagged.
55+
allow_missing_id: bool = true,
5356
};
5457

5558
pub const Error = Allocator.Error;
@@ -70,13 +73,15 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest {
7073
.errors = .{},
7174

7275
.name = undefined,
76+
.id = 0,
7377
.version = undefined,
7478
.version_node = 0,
7579
.dependencies = .{},
7680
.dependencies_node = 0,
7781
.paths = .{},
7882
.allow_missing_paths_field = options.allow_missing_paths_field,
7983
.allow_name_string = options.allow_name_string,
84+
.allow_missing_id = options.allow_missing_id,
8085
.minimum_zig_version = null,
8186
.buf = .{},
8287
};
@@ -92,6 +97,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest {
9297

9398
return .{
9499
.name = p.name,
100+
.id = p.id,
95101
.version = p.version,
96102
.version_node = p.version_node,
97103
.dependencies = try p.dependencies.clone(p.arena),
@@ -143,13 +149,15 @@ const Parse = struct {
143149
errors: std.ArrayListUnmanaged(ErrorMessage),
144150

145151
name: []const u8,
152+
id: u16,
146153
version: std.SemanticVersion,
147154
version_node: Ast.Node.Index,
148155
dependencies: std.StringArrayHashMapUnmanaged(Dependency),
149156
dependencies_node: Ast.Node.Index,
150157
paths: std.StringArrayHashMapUnmanaged(void),
151158
allow_missing_paths_field: bool,
152159
allow_name_string: bool,
160+
allow_missing_id: bool,
153161
minimum_zig_version: ?std.SemanticVersion,
154162

155163
const InnerError = error{ ParseFailure, OutOfMemory };
@@ -167,6 +175,7 @@ const Parse = struct {
167175
var have_name = false;
168176
var have_version = false;
169177
var have_included_paths = false;
178+
var have_id = false;
170179

171180
for (struct_init.ast.fields) |field_init| {
172181
const name_token = ast.firstToken(field_init) - 2;
@@ -183,6 +192,9 @@ const Parse = struct {
183192
} else if (mem.eql(u8, field_name, "name")) {
184193
p.name = try parseName(p, field_init);
185194
have_name = true;
195+
} else if (mem.eql(u8, field_name, "id")) {
196+
p.id = try parseId(p, field_init);
197+
have_id = true;
186198
} else if (mem.eql(u8, field_name, "version")) {
187199
p.version_node = field_init;
188200
const version_text = try parseString(p, field_init);
@@ -206,6 +218,12 @@ const Parse = struct {
206218
}
207219
}
208220

221+
if (!have_id and !p.allow_missing_id) {
222+
try appendError(p, main_token, "missing top-level 'id' field; suggested value: 0x{x}", .{
223+
Package.randomId(),
224+
});
225+
}
226+
209227
if (!have_name) {
210228
try appendError(p, main_token, "missing top-level 'name' field", .{});
211229
}
@@ -359,6 +377,33 @@ const Parse = struct {
359377
}
360378
}
361379

380+
fn parseId(p: *Parse, node: Ast.Node.Index) !u16 {
381+
const ast = p.ast;
382+
const node_tags = ast.nodes.items(.tag);
383+
const main_tokens = ast.nodes.items(.main_token);
384+
const main_token = main_tokens[node];
385+
if (node_tags[node] != .number_literal) {
386+
return fail(p, main_token, "expected integer literal", .{});
387+
}
388+
const token_bytes = ast.tokenSlice(main_token);
389+
const parsed = std.zig.parseNumberLiteral(token_bytes);
390+
const n = switch (parsed) {
391+
.int => |n| n,
392+
.big_int, .float => return fail(p, main_token, "expected u16 integer literal, found {s}", .{
393+
@tagName(parsed),
394+
}),
395+
.failure => |err| return fail(p, main_token, "bad integer literal: {s}", .{@tagName(err)}),
396+
};
397+
const casted = std.math.cast(u16, n) orelse
398+
return fail(p, main_token, "integer value {d} does not fit into u16", .{n});
399+
switch (casted) {
400+
0x0000, 0xffff => return fail(p, main_token, "id value 0x{x} reserved; use 0x{x} instead", .{
401+
casted, Package.randomId(),
402+
}),
403+
else => return casted,
404+
}
405+
}
406+
362407
fn parseName(p: *Parse, node: Ast.Node.Index) ![]const u8 {
363408
const ast = p.ast;
364409
const node_tags = ast.nodes.items(.tag);
@@ -371,7 +416,7 @@ const Parse = struct {
371416
return fail(p, main_token, "name must be a valid bare zig identifier (hint: switch from string to enum literal)", .{});
372417

373418
if (name.len > max_name_len)
374-
return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{
419+
return fail(p, main_token, "name '{}' exceeds max length of {d}", .{
375420
std.zig.fmtId(name), max_name_len,
376421
});
377422

@@ -386,7 +431,7 @@ const Parse = struct {
386431
return fail(p, main_token, "name must be a valid bare zig identifier", .{});
387432

388433
if (ident_name.len > max_name_len)
389-
return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{
434+
return fail(p, main_token, "name '{}' exceeds max length of {d}", .{
390435
std.zig.fmtId(ident_name), max_name_len,
391436
});
392437

0 commit comments

Comments
 (0)