Skip to content

Commit 12355cf

Browse files
committed
Package: new hash format
legacy format is also supported. closes #20178
1 parent e0129b3 commit 12355cf

File tree

4 files changed

+269
-145
lines changed

4 files changed

+269
-145
lines changed

src/Package.zig

+156
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,164 @@
1+
const std = @import("std");
2+
const assert = std.debug.assert;
3+
14
pub const Module = @import("Package/Module.zig");
25
pub const Fetch = @import("Package/Fetch.zig");
36
pub const build_zig_basename = "build.zig";
47
pub const Manifest = @import("Package/Manifest.zig");
58

9+
pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
10+
pub const multihash_hex_digest_len = 2 * multihash_len;
11+
pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
12+
13+
/// A user-readable, file system safe hash that identifies an exact package
14+
/// snapshot, including file contents.
15+
///
16+
/// This data structure can be used to store the legacy hash format too. Legacy
17+
/// hash format is scheduled to be removed after 0.14.0 is tagged.
18+
pub const Hash = struct {
19+
/// Maximum size of a package hash. Unused bytes at the end are
20+
/// filled with zeroes.
21+
bytes: [max_len]u8,
22+
23+
pub const Algo = std.crypto.hash.sha2.Sha256;
24+
pub const Digest = [Algo.digest_length]u8;
25+
26+
pub const max_len = 32 + 1 + 32 + 1 + 12;
27+
28+
pub fn fromSlice(s: []const u8) Hash {
29+
assert(s.len <= max_len);
30+
var result: Hash = undefined;
31+
@memcpy(result.bytes[0..s.len], s);
32+
@memset(result.bytes[s.len..], 0);
33+
return result;
34+
}
35+
36+
pub fn toSlice(ph: *const Hash) []const u8 {
37+
var end: usize = ph.bytes.len;
38+
while (true) {
39+
end -= 1;
40+
if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1];
41+
}
42+
}
43+
44+
pub fn eql(a: *const Hash, b: *const Hash) bool {
45+
return std.mem.eql(u8, &a.bytes, &b.bytes);
46+
}
47+
48+
/// Distinguishes whether the legacy multihash format is being stored here.
49+
pub fn isOld(h: *const Hash) bool {
50+
if (h.bytes.len < 2) return false;
51+
const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false;
52+
if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false;
53+
if (h.toSlice().len != multihash_hex_digest_len) return false;
54+
return std.mem.indexOfScalar(u8, &h.bytes, '-') == null;
55+
}
56+
57+
test isOld {
58+
const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7");
59+
try std.testing.expect(h.isOld());
60+
}
61+
62+
/// Produces "$name-$semver-$sizedhash".
63+
/// * name is the name field from build.zig.zon, truncated at 32 bytes and must
64+
/// be a valid zig identifier
65+
/// * semver is the version field from build.zig.zon, truncated at 32 bytes
66+
/// * sizedhash is the following 9-byte array, base64 encoded using -_ to make
67+
/// it filesystem safe:
68+
/// - (4 bytes) LE u32 total decompressed size in bytes
69+
/// - (5 bytes) truncated SHA-256 of hashed files of the package
70+
///
71+
/// example: "nasm-2.16.1-2-BWdcABvF_jM1"
72+
pub fn init(digest: Digest, name: []const u8, ver: []const u8, size: u32) Hash {
73+
var result: Hash = undefined;
74+
var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes);
75+
buf.appendSliceAssumeCapacity(name[0..@min(name.len, 32)]);
76+
buf.appendAssumeCapacity('-');
77+
buf.appendSliceAssumeCapacity(ver[0..@min(ver.len, 32)]);
78+
buf.appendAssumeCapacity('-');
79+
var sizedhash: [9]u8 = undefined;
80+
std.mem.writeInt(u32, sizedhash[0..4], size, .little);
81+
sizedhash[4..].* = digest[0..5].*;
82+
_ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(12), &sizedhash);
83+
@memset(buf.unusedCapacitySlice(), 0);
84+
return result;
85+
}
86+
87+
/// Produces "$hashiname-N-$sizedhash". For packages that lack "build.zig.zon" metadata.
88+
/// * hashiname is [5..][0..24] bytes of the SHA-256, urlsafe-base64-encoded, for a total of 32 bytes encoded
89+
/// * the semver section is replaced with a hardcoded N which stands for
90+
/// "naked". It acts as a version number so that any future updates to the
91+
/// hash format can tell this hash format apart. Note that "N" is an
92+
/// invalid semver.
93+
/// * sizedhash is the same as in `init`.
94+
///
95+
/// The hash is broken up this way so that "sizedhash" can be calculated
96+
/// exactly the same way in both cases, and so that "name" and "hashiname" can
97+
/// be used interchangeably in both cases.
98+
pub fn initNaked(digest: Digest, size: u32) Hash {
99+
var name: [32]u8 = undefined;
100+
_ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]);
101+
return init(digest, &name, "N", size);
102+
}
103+
};
104+
105+
pub const MultihashFunction = enum(u16) {
106+
identity = 0x00,
107+
sha1 = 0x11,
108+
@"sha2-256" = 0x12,
109+
@"sha2-512" = 0x13,
110+
@"sha3-512" = 0x14,
111+
@"sha3-384" = 0x15,
112+
@"sha3-256" = 0x16,
113+
@"sha3-224" = 0x17,
114+
@"sha2-384" = 0x20,
115+
@"sha2-256-trunc254-padded" = 0x1012,
116+
@"sha2-224" = 0x1013,
117+
@"sha2-512-224" = 0x1014,
118+
@"sha2-512-256" = 0x1015,
119+
@"blake2b-256" = 0xb220,
120+
_,
121+
};
122+
123+
pub const multihash_function: MultihashFunction = switch (Hash.Algo) {
124+
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
125+
else => @compileError("unreachable"),
126+
};
127+
128+
pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest {
129+
const hex_charset = std.fmt.hex_charset;
130+
131+
var result: MultiHashHexDigest = undefined;
132+
133+
result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
134+
result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
135+
136+
result[2] = hex_charset[Hash.Algo.digest_length >> 4];
137+
result[3] = hex_charset[Hash.Algo.digest_length & 15];
138+
139+
for (digest, 0..) |byte, i| {
140+
result[4 + i * 2] = hex_charset[byte >> 4];
141+
result[5 + i * 2] = hex_charset[byte & 15];
142+
}
143+
return result;
144+
}
145+
146+
comptime {
147+
// We avoid unnecessary uleb128 code in hexDigest by asserting here the
148+
// values are small enough to be contained in the one-byte encoding.
149+
assert(@intFromEnum(multihash_function) < 127);
150+
assert(Hash.Algo.digest_length < 127);
151+
}
152+
153+
test Hash {
154+
const example_digest: Hash.Digest = .{
155+
0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87,
156+
0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f,
157+
};
158+
const result: Hash = .init(example_digest, "nasm", "2.16.1-2", 10 * 1024 * 1024);
159+
try std.testing.expectEqualStrings("nasm-2.16.1-2-AACgAMf1cbe0", result.toSlice());
160+
}
161+
6162
test {
7163
_ = Fetch;
8164
}

0 commit comments

Comments
 (0)