Skip to content

Commit 6a7ee1f

Browse files
committed
std.compress.xz public API cleanup
* add xz to std.compress * prefer importing std.zig by file name, to reduce reliance on the standard library being a special case. * extract some types from inside generic functions. These types are the same regardless of the generic parameters. * expose some more types in the std.compress.xz namespace. * rename xz.stream to xz.decompress * rename check.Kind to Check * use std.leb for LEB instead of a redundant implementation
1 parent ad20732 commit 6a7ee1f

File tree

8 files changed

+157
-186
lines changed

8 files changed

+157
-186
lines changed

lib/std/compress.zig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ const std = @import("std.zig");
33
pub const deflate = @import("compress/deflate.zig");
44
pub const gzip = @import("compress/gzip.zig");
55
pub const zlib = @import("compress/zlib.zig");
6+
pub const xz = @import("compress/xz.zig");
67

78
pub fn HashedReader(
89
comptime ReaderType: anytype,
@@ -38,4 +39,5 @@ test {
3839
_ = deflate;
3940
_ = gzip;
4041
_ = zlib;
42+
_ = xz;
4143
}

lib/std/compress/xz.zig

Lines changed: 139 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,142 @@
1-
pub usingnamespace @import("xz/stream.zig");
1+
const std = @import("std");
2+
const block = @import("xz/block.zig");
3+
const Allocator = std.mem.Allocator;
4+
const Crc32 = std.hash.Crc32;
5+
6+
pub const Flags = packed struct(u16) {
7+
reserved1: u8,
8+
check_kind: Check,
9+
reserved2: u4,
10+
};
11+
12+
pub const Header = extern struct {
13+
magic: [6]u8,
14+
flags: Flags,
15+
crc32: u32,
16+
};
17+
18+
pub const Footer = extern struct {
19+
crc32: u32,
20+
backward_size: u32,
21+
flags: Flags,
22+
magic: [2]u8,
23+
};
24+
25+
pub const Check = enum(u4) {
26+
none = 0x00,
27+
crc32 = 0x01,
28+
crc64 = 0x04,
29+
sha256 = 0x0A,
30+
_,
31+
};
32+
33+
pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
34+
return Decompress(@TypeOf(reader)).init(allocator, reader);
35+
}
36+
37+
pub fn Decompress(comptime ReaderType: type) type {
38+
return struct {
39+
const Self = @This();
40+
41+
pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
42+
pub const Reader = std.io.Reader(*Self, Error, read);
43+
44+
allocator: Allocator,
45+
block_decoder: block.Decoder(ReaderType),
46+
in_reader: ReaderType,
47+
48+
fn init(allocator: Allocator, source: ReaderType) !Self {
49+
const header = try source.readStruct(Header);
50+
51+
if (!std.mem.eql(u8, &header.magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
52+
return error.BadHeader;
53+
54+
if (header.flags.reserved1 != 0 or header.flags.reserved2 != 0)
55+
return error.BadHeader;
56+
57+
const hash = Crc32.hash(std.mem.asBytes(&header.flags));
58+
if (hash != header.crc32)
59+
return error.WrongChecksum;
60+
61+
return Self{
62+
.allocator = allocator,
63+
.block_decoder = try block.decoder(allocator, source, header.flags.check_kind),
64+
.in_reader = source,
65+
};
66+
}
67+
68+
pub fn deinit(self: *Self) void {
69+
self.block_decoder.deinit();
70+
}
71+
72+
pub fn reader(self: *Self) Reader {
73+
return .{ .context = self };
74+
}
75+
76+
pub fn read(self: *Self, buffer: []u8) Error!usize {
77+
if (buffer.len == 0)
78+
return 0;
79+
80+
const r = try self.block_decoder.read(buffer);
81+
if (r != 0)
82+
return r;
83+
84+
const index_size = blk: {
85+
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
86+
hasher.hasher.update(&[1]u8{0x00});
87+
88+
var counter = std.io.countingReader(hasher.reader());
89+
counter.bytes_read += 1;
90+
91+
const counting_reader = counter.reader();
92+
93+
const record_count = try std.leb.readULEB128(u64, counting_reader);
94+
if (record_count != self.block_decoder.block_count)
95+
return error.CorruptInput;
96+
97+
var i: usize = 0;
98+
while (i < record_count) : (i += 1) {
99+
// TODO: validate records
100+
_ = try std.leb.readULEB128(u64, counting_reader);
101+
_ = try std.leb.readULEB128(u64, counting_reader);
102+
}
103+
104+
while (counter.bytes_read % 4 != 0) {
105+
if (try counting_reader.readByte() != 0)
106+
return error.CorruptInput;
107+
}
108+
109+
const hash_a = hasher.hasher.final();
110+
const hash_b = try counting_reader.readIntLittle(u32);
111+
if (hash_a != hash_b)
112+
return error.WrongChecksum;
113+
114+
break :blk counter.bytes_read;
115+
};
116+
117+
const footer = try self.in_reader.readStruct(Footer);
118+
const backward_size = (footer.backward_size + 1) * 4;
119+
if (backward_size != index_size)
120+
return error.CorruptInput;
121+
122+
if (footer.flags.reserved1 != 0 or footer.flags.reserved2 != 0)
123+
return error.CorruptInput;
124+
125+
var hasher = Crc32.init();
126+
hasher.update(std.mem.asBytes(&footer.backward_size));
127+
hasher.update(std.mem.asBytes(&footer.flags));
128+
const hash = hasher.final();
129+
if (hash != footer.crc32)
130+
return error.WrongChecksum;
131+
132+
if (!std.mem.eql(u8, &footer.magic, &.{ 'Y', 'Z' }))
133+
return error.CorruptInput;
134+
135+
return 0;
136+
}
137+
};
138+
}
2139

3140
test {
4-
_ = @import("xz/stream.zig");
141+
_ = @import("xz/test.zig");
5142
}

lib/std/compress/xz/block.zig

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
const std = @import("std");
2-
const check = @import("check.zig");
1+
const std = @import("../../std.zig");
32
const lzma = @import("lzma.zig");
4-
const multibyte = @import("multibyte.zig");
53
const Allocator = std.mem.Allocator;
64
const Crc32 = std.hash.Crc32;
75
const Crc64 = std.hash.crc.Crc64Xz;
86
const Sha256 = std.crypto.hash.sha2.Sha256;
7+
const xz = std.compress.xz;
98

109
const DecodeError = error{
1110
CorruptInput,
@@ -16,8 +15,8 @@ const DecodeError = error{
1615
Overflow,
1716
};
1817

19-
pub fn decoder(allocator: Allocator, reader: anytype, check_kind: check.Kind) !Decoder(@TypeOf(reader)) {
20-
return Decoder(@TypeOf(reader)).init(allocator, reader, check_kind);
18+
pub fn decoder(allocator: Allocator, reader: anytype, check: xz.Check) !Decoder(@TypeOf(reader)) {
19+
return Decoder(@TypeOf(reader)).init(allocator, reader, check);
2120
}
2221

2322
pub fn Decoder(comptime ReaderType: type) type {
@@ -31,17 +30,17 @@ pub fn Decoder(comptime ReaderType: type) type {
3130

3231
allocator: Allocator,
3332
inner_reader: ReaderType,
34-
check_kind: check.Kind,
33+
check: xz.Check,
3534
err: ?Error,
3635
accum: lzma.LzAccumBuffer,
3736
lzma_state: lzma.DecoderState,
3837
block_count: usize,
3938

40-
fn init(allocator: Allocator, in_reader: ReaderType, check_kind: check.Kind) !Self {
39+
fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self {
4140
return Self{
4241
.allocator = allocator,
4342
.inner_reader = in_reader,
44-
.check_kind = check_kind,
43+
.check = check,
4544
.err = null,
4645
.accum = .{},
4746
.lzma_state = try lzma.DecoderState.init(allocator),
@@ -116,10 +115,10 @@ pub fn Decoder(comptime ReaderType: type) type {
116115
return error.Unsupported;
117116

118117
if (flags.has_packed_size)
119-
packed_size = try multibyte.readInt(header_reader);
118+
packed_size = try std.leb.readULEB128(u64, header_reader);
120119

121120
if (flags.has_unpacked_size)
122-
unpacked_size = try multibyte.readInt(header_reader);
121+
unpacked_size = try std.leb.readULEB128(u64, header_reader);
123122

124123
const FilterId = enum(u64) {
125124
lzma2 = 0x21,
@@ -128,7 +127,7 @@ pub fn Decoder(comptime ReaderType: type) type {
128127

129128
const filter_id = @intToEnum(
130129
FilterId,
131-
try multibyte.readInt(header_reader),
130+
try std.leb.readULEB128(u64, header_reader),
132131
);
133132

134133
if (@enumToInt(filter_id) >= 0x4000_0000_0000_0000)
@@ -137,7 +136,7 @@ pub fn Decoder(comptime ReaderType: type) type {
137136
if (filter_id != .lzma2)
138137
return error.Unsupported;
139138

140-
const properties_size = try multibyte.readInt(header_reader);
139+
const properties_size = try std.leb.readULEB128(u64, header_reader);
141140
if (properties_size != 1)
142141
return error.CorruptInput;
143142

@@ -177,8 +176,7 @@ pub fn Decoder(comptime ReaderType: type) type {
177176
return error.CorruptInput;
178177
}
179178

180-
// Check
181-
switch (self.check_kind) {
179+
switch (self.check) {
182180
.none => {},
183181
.crc32 => {
184182
const hash_a = Crc32.hash(unpacked_bytes);

lib/std/compress/xz/check.zig

Lines changed: 0 additions & 7 deletions
This file was deleted.

lib/std/compress/xz/lzma.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// Ported from https://github.com/gendx/lzma-rs
22

3-
const std = @import("std");
3+
const std = @import("../../std.zig");
44
const assert = std.debug.assert;
55
const Allocator = std.mem.Allocator;
66
const ArrayListUnmanaged = std.ArrayListUnmanaged;

lib/std/compress/xz/multibyte.zig

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)