Skip to content

Commit 96a55f6

Browse files
authored
Merge pull request #14434 from FnControlOption/xz
Add xz decoder closes #14300 closes #2851
2 parents fcef728 + d0dedef commit 96a55f6

32 files changed

+1230
-16
lines changed

build.zig

+2
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ pub fn build(b: *Builder) !void {
122122
"compress-gettysburg.txt",
123123
"compress-pi.txt",
124124
"rfc1951.txt",
125+
// exclude files from lib/std/compress/xz/testdata
126+
".xz",
125127
// exclude files from lib/std/tz/
126128
".tzif",
127129
// others

lib/std/compress.zig

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ const std = @import("std.zig");
33
pub const deflate = @import("compress/deflate.zig");
44
pub const gzip = @import("compress/gzip.zig");
55
pub const zlib = @import("compress/zlib.zig");
6+
pub const xz = @import("compress/xz.zig");
67

78
pub fn HashedReader(
89
comptime ReaderType: anytype,
@@ -38,4 +39,5 @@ test {
3839
_ = deflate;
3940
_ = gzip;
4041
_ = zlib;
42+
_ = xz;
4143
}

lib/std/compress/gzip.zig

+5-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//
22
// Decompressor for GZIP data streams (RFC1952)
33

4-
const std = @import("std");
4+
const std = @import("../std.zig");
55
const io = std.io;
66
const fs = std.fs;
77
const testing = std.testing;
@@ -17,10 +17,7 @@ const FCOMMENT = 1 << 4;
1717

1818
const max_string_len = 1024;
1919

20-
/// TODO: the fully qualified namespace to this declaration is
21-
/// std.compress.gzip.GzipStream which has a redundant "gzip" in the name.
22-
/// Instead, it should be `std.compress.gzip.Stream`.
23-
pub fn GzipStream(comptime ReaderType: type) type {
20+
pub fn Decompress(comptime ReaderType: type) type {
2421
return struct {
2522
const Self = @This();
2623

@@ -154,14 +151,14 @@ pub fn GzipStream(comptime ReaderType: type) type {
154151
};
155152
}
156153

157-
pub fn gzipStream(allocator: mem.Allocator, reader: anytype) !GzipStream(@TypeOf(reader)) {
158-
return GzipStream(@TypeOf(reader)).init(allocator, reader);
154+
pub fn decompress(allocator: mem.Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
155+
return Decompress(@TypeOf(reader)).init(allocator, reader);
159156
}
160157

161158
fn testReader(data: []const u8, comptime expected: []const u8) !void {
162159
var in_stream = io.fixedBufferStream(data);
163160

164-
var gzip_stream = try gzipStream(testing.allocator, in_stream.reader());
161+
var gzip_stream = try decompress(testing.allocator, in_stream.reader());
165162
defer gzip_stream.deinit();
166163

167164
// Read and decompress the whole file

lib/std/compress/xz.zig

+145
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
const std = @import("std");
2+
const block = @import("xz/block.zig");
3+
const Allocator = std.mem.Allocator;
4+
const Crc32 = std.hash.Crc32;
5+
6+
pub const Check = enum(u4) {
7+
none = 0x00,
8+
crc32 = 0x01,
9+
crc64 = 0x04,
10+
sha256 = 0x0A,
11+
_,
12+
};
13+
14+
fn readStreamFlags(reader: anytype, check: *Check) !void {
15+
var bit_reader = std.io.bitReader(.Little, reader);
16+
17+
const reserved1 = try bit_reader.readBitsNoEof(u8, 8);
18+
if (reserved1 != 0)
19+
return error.CorruptInput;
20+
21+
check.* = @intToEnum(Check, try bit_reader.readBitsNoEof(u4, 4));
22+
23+
const reserved2 = try bit_reader.readBitsNoEof(u4, 4);
24+
if (reserved2 != 0)
25+
return error.CorruptInput;
26+
}
27+
28+
pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
29+
return Decompress(@TypeOf(reader)).init(allocator, reader);
30+
}
31+
32+
pub fn Decompress(comptime ReaderType: type) type {
33+
return struct {
34+
const Self = @This();
35+
36+
pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
37+
pub const Reader = std.io.Reader(*Self, Error, read);
38+
39+
allocator: Allocator,
40+
block_decoder: block.Decoder(ReaderType),
41+
in_reader: ReaderType,
42+
43+
fn init(allocator: Allocator, source: ReaderType) !Self {
44+
const magic = try source.readBytesNoEof(6);
45+
if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
46+
return error.BadHeader;
47+
48+
var check: Check = undefined;
49+
const hash_a = blk: {
50+
var hasher = std.compress.hashedReader(source, Crc32.init());
51+
try readStreamFlags(hasher.reader(), &check);
52+
break :blk hasher.hasher.final();
53+
};
54+
55+
const hash_b = try source.readIntLittle(u32);
56+
if (hash_a != hash_b)
57+
return error.WrongChecksum;
58+
59+
return Self{
60+
.allocator = allocator,
61+
.block_decoder = try block.decoder(allocator, source, check),
62+
.in_reader = source,
63+
};
64+
}
65+
66+
pub fn deinit(self: *Self) void {
67+
self.block_decoder.deinit();
68+
}
69+
70+
pub fn reader(self: *Self) Reader {
71+
return .{ .context = self };
72+
}
73+
74+
pub fn read(self: *Self, buffer: []u8) Error!usize {
75+
if (buffer.len == 0)
76+
return 0;
77+
78+
const r = try self.block_decoder.read(buffer);
79+
if (r != 0)
80+
return r;
81+
82+
const index_size = blk: {
83+
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
84+
hasher.hasher.update(&[1]u8{0x00});
85+
86+
var counter = std.io.countingReader(hasher.reader());
87+
counter.bytes_read += 1;
88+
89+
const counting_reader = counter.reader();
90+
91+
const record_count = try std.leb.readULEB128(u64, counting_reader);
92+
if (record_count != self.block_decoder.block_count)
93+
return error.CorruptInput;
94+
95+
var i: usize = 0;
96+
while (i < record_count) : (i += 1) {
97+
// TODO: validate records
98+
_ = try std.leb.readULEB128(u64, counting_reader);
99+
_ = try std.leb.readULEB128(u64, counting_reader);
100+
}
101+
102+
while (counter.bytes_read % 4 != 0) {
103+
if (try counting_reader.readByte() != 0)
104+
return error.CorruptInput;
105+
}
106+
107+
const hash_a = hasher.hasher.final();
108+
const hash_b = try counting_reader.readIntLittle(u32);
109+
if (hash_a != hash_b)
110+
return error.WrongChecksum;
111+
112+
break :blk counter.bytes_read;
113+
};
114+
115+
const hash_a = try self.in_reader.readIntLittle(u32);
116+
117+
const hash_b = blk: {
118+
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
119+
const hashed_reader = hasher.reader();
120+
121+
const backward_size = (try hashed_reader.readIntLittle(u32) + 1) * 4;
122+
if (backward_size != index_size)
123+
return error.CorruptInput;
124+
125+
var check: Check = undefined;
126+
try readStreamFlags(hashed_reader, &check);
127+
128+
break :blk hasher.hasher.final();
129+
};
130+
131+
if (hash_a != hash_b)
132+
return error.WrongChecksum;
133+
134+
const magic = try self.in_reader.readBytesNoEof(2);
135+
if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
136+
return error.CorruptInput;
137+
138+
return 0;
139+
}
140+
};
141+
}
142+
143+
test {
144+
_ = @import("xz/test.zig");
145+
}

0 commit comments

Comments
 (0)