|
| 1 | +const std = @import("std"); |
| 2 | +const assert = std.debug.assert; |
| 3 | + |
1 | 4 | pub const Module = @import("Package/Module.zig");
|
2 | 5 | pub const Fetch = @import("Package/Fetch.zig");
|
3 | 6 | pub const build_zig_basename = "build.zig";
|
4 | 7 | pub const Manifest = @import("Package/Manifest.zig");
|
5 | 8 |
|
| 9 | +pub const multihash_len = 1 + 1 + Hash.Algo.digest_length; |
| 10 | +pub const multihash_hex_digest_len = 2 * multihash_len; |
| 11 | +pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; |
| 12 | + |
| 13 | +/// A user-readable, file system safe hash that identifies an exact package |
| 14 | +/// snapshot, including file contents. |
| 15 | +/// |
| 16 | +/// This data structure can be used to store the legacy hash format too. Legacy |
| 17 | +/// hash format is scheduled to be removed after 0.14.0 is tagged. |
| 18 | +pub const Hash = struct { |
| 19 | + /// Maximum size of a package hash. Unused bytes at the end are |
| 20 | + /// filled with zeroes. |
| 21 | + bytes: [max_len]u8, |
| 22 | + |
| 23 | + pub const Algo = std.crypto.hash.sha2.Sha256; |
| 24 | + pub const Digest = [Algo.digest_length]u8; |
| 25 | + |
| 26 | + pub const max_len = 32 + 1 + 32 + 1 + 12; |
| 27 | + |
| 28 | + pub fn fromSlice(s: []const u8) Hash { |
| 29 | + assert(s.len <= max_len); |
| 30 | + var result: Hash = undefined; |
| 31 | + @memcpy(result.bytes[0..s.len], s); |
| 32 | + @memset(result.bytes[s.len..], 0); |
| 33 | + return result; |
| 34 | + } |
| 35 | + |
| 36 | + pub fn toSlice(ph: *const Hash) []const u8 { |
| 37 | + var end: usize = ph.bytes.len; |
| 38 | + while (true) { |
| 39 | + end -= 1; |
| 40 | + if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1]; |
| 41 | + } |
| 42 | + } |
| 43 | + |
| 44 | + pub fn eql(a: *const Hash, b: *const Hash) bool { |
| 45 | + return std.mem.eql(u8, &a.bytes, &b.bytes); |
| 46 | + } |
| 47 | + |
| 48 | + /// Distinguishes whether the legacy multihash format is being stored here. |
| 49 | + pub fn isOld(h: *const Hash) bool { |
| 50 | + if (h.bytes.len < 2) return false; |
| 51 | + const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false; |
| 52 | + if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false; |
| 53 | + if (h.toSlice().len != multihash_hex_digest_len) return false; |
| 54 | + return std.mem.indexOfScalar(u8, &h.bytes, '-') == null; |
| 55 | + } |
| 56 | + |
| 57 | + test isOld { |
| 58 | + const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7"); |
| 59 | + try std.testing.expect(h.isOld()); |
| 60 | + } |
| 61 | + |
| 62 | + /// Produces "$name-$semver-$sizedhash". |
| 63 | + /// * name is the name field from build.zig.zon, truncated at 32 bytes and must |
| 64 | + /// be a valid zig identifier |
| 65 | + /// * semver is the version field from build.zig.zon, truncated at 32 bytes |
| 66 | + /// * sizedhash is the following 9-byte array, base64 encoded using -_ to make |
| 67 | + /// it filesystem safe: |
| 68 | + /// - (4 bytes) LE u32 total decompressed size in bytes |
| 69 | + /// - (5 bytes) truncated SHA-256 of hashed files of the package |
| 70 | + /// |
| 71 | + /// example: "nasm-2.16.1-2-BWdcABvF_jM1" |
| 72 | + pub fn init(digest: Digest, name: []const u8, ver: []const u8, size: u32) Hash { |
| 73 | + var result: Hash = undefined; |
| 74 | + var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes); |
| 75 | + buf.appendSliceAssumeCapacity(name[0..@min(name.len, 32)]); |
| 76 | + buf.appendAssumeCapacity('-'); |
| 77 | + buf.appendSliceAssumeCapacity(ver[0..@min(ver.len, 32)]); |
| 78 | + buf.appendAssumeCapacity('-'); |
| 79 | + var sizedhash: [9]u8 = undefined; |
| 80 | + std.mem.writeInt(u32, sizedhash[0..4], size, .little); |
| 81 | + sizedhash[4..].* = digest[0..5].*; |
| 82 | + _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(12), &sizedhash); |
| 83 | + @memset(buf.unusedCapacitySlice(), 0); |
| 84 | + return result; |
| 85 | + } |
| 86 | + |
| 87 | + /// Produces "$hashiname-N-$sizedhash". For packages that lack "build.zig.zon" metadata. |
| 88 | + /// * hashiname is [5..][0..24] bytes of the SHA-256, urlsafe-base64-encoded, for a total of 32 bytes encoded |
| 89 | + /// * the semver section is replaced with a hardcoded N which stands for |
| 90 | + /// "naked". It acts as a version number so that any future updates to the |
| 91 | + /// hash format can tell this hash format apart. Note that "N" is an |
| 92 | + /// invalid semver. |
| 93 | + /// * sizedhash is the same as in `init`. |
| 94 | + /// |
| 95 | + /// The hash is broken up this way so that "sizedhash" can be calculated |
| 96 | + /// exactly the same way in both cases, and so that "name" and "hashiname" can |
| 97 | + /// be used interchangeably in both cases. |
| 98 | + pub fn initNaked(digest: Digest, size: u32) Hash { |
| 99 | + var name: [32]u8 = undefined; |
| 100 | + _ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]); |
| 101 | + return init(digest, &name, "N", size); |
| 102 | + } |
| 103 | +}; |
| 104 | + |
| 105 | +pub const MultihashFunction = enum(u16) { |
| 106 | + identity = 0x00, |
| 107 | + sha1 = 0x11, |
| 108 | + @"sha2-256" = 0x12, |
| 109 | + @"sha2-512" = 0x13, |
| 110 | + @"sha3-512" = 0x14, |
| 111 | + @"sha3-384" = 0x15, |
| 112 | + @"sha3-256" = 0x16, |
| 113 | + @"sha3-224" = 0x17, |
| 114 | + @"sha2-384" = 0x20, |
| 115 | + @"sha2-256-trunc254-padded" = 0x1012, |
| 116 | + @"sha2-224" = 0x1013, |
| 117 | + @"sha2-512-224" = 0x1014, |
| 118 | + @"sha2-512-256" = 0x1015, |
| 119 | + @"blake2b-256" = 0xb220, |
| 120 | + _, |
| 121 | +}; |
| 122 | + |
| 123 | +pub const multihash_function: MultihashFunction = switch (Hash.Algo) { |
| 124 | + std.crypto.hash.sha2.Sha256 => .@"sha2-256", |
| 125 | + else => @compileError("unreachable"), |
| 126 | +}; |
| 127 | + |
| 128 | +pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest { |
| 129 | + const hex_charset = std.fmt.hex_charset; |
| 130 | + |
| 131 | + var result: MultiHashHexDigest = undefined; |
| 132 | + |
| 133 | + result[0] = hex_charset[@intFromEnum(multihash_function) >> 4]; |
| 134 | + result[1] = hex_charset[@intFromEnum(multihash_function) & 15]; |
| 135 | + |
| 136 | + result[2] = hex_charset[Hash.Algo.digest_length >> 4]; |
| 137 | + result[3] = hex_charset[Hash.Algo.digest_length & 15]; |
| 138 | + |
| 139 | + for (digest, 0..) |byte, i| { |
| 140 | + result[4 + i * 2] = hex_charset[byte >> 4]; |
| 141 | + result[5 + i * 2] = hex_charset[byte & 15]; |
| 142 | + } |
| 143 | + return result; |
| 144 | +} |
| 145 | + |
| 146 | +comptime { |
| 147 | + // We avoid unnecessary uleb128 code in hexDigest by asserting here the |
| 148 | + // values are small enough to be contained in the one-byte encoding. |
| 149 | + assert(@intFromEnum(multihash_function) < 127); |
| 150 | + assert(Hash.Algo.digest_length < 127); |
| 151 | +} |
| 152 | + |
| 153 | +test Hash { |
| 154 | + const example_digest: Hash.Digest = .{ |
| 155 | + 0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87, |
| 156 | + 0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f, |
| 157 | + }; |
| 158 | + const result: Hash = .init(example_digest, "nasm", "2.16.1-2", 10 * 1024 * 1024); |
| 159 | + try std.testing.expectEqualStrings("nasm-2.16.1-2-AACgAMf1cbe0", result.toSlice()); |
| 160 | +} |
| 161 | + |
6 | 162 | test {
|
7 | 163 | _ = Fetch;
|
8 | 164 | }
|
0 commit comments