Skip to content

Commit 8ce0690

Browse files
committed
std.tar: writer optimizations
Init Header with file defaults. Writing file is most common case. Conversion to octal without bufPrint. Checksum calculation without branching.
1 parent 60ce389 commit 8ce0690

File tree

1 file changed

+84
-86
lines changed

1 file changed

+84
-86
lines changed

lib/std/tar/writer.zig

+84-86
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,18 @@ const testing = std.testing;
66
/// Use setRoot to nest all following entries under single root. If file don't
77
/// fit into posix header (name+prefix: 100+155 bytes) gnu extented header will
88
/// be used for long names. Options enables setting file premission mode and
9-
/// mtime. Default is to use current time for mtime and
10-
/// `default_mode`.file/dir/sym_link for mode.
9+
/// mtime. Default is to use current time for mtime and 0o664 for file mode.
1110
pub fn writer(underlying_writer: anytype) Writer(@TypeOf(underlying_writer)) {
1211
return .{ .underlying_writer = underlying_writer };
1312
}
1413

1514
pub fn Writer(comptime WriterType: type) type {
1615
return struct {
1716
const block_size = @sizeOf(Header);
17+
const empty_block: [block_size]u8 = [_]u8{0} ** block_size;
1818

19-
/// Options for writing file/dir/link. If left empty
20-
/// `default_mode.file`/`default_mode.dir`/`default_mode.sym_link` is
21-
/// used for mode and current time for mtime.
19+
/// Options for writing file/dir/link. If left empty 0o664 is used for
20+
/// file mode and current time for mtime.
2221
pub const Options = struct {
2322
/// File system permission mode.
2423
mode: u32 = 0,
@@ -31,16 +30,15 @@ pub fn Writer(comptime WriterType: type) type {
3130
prefix: []const u8 = "",
3231
mtime_now: u64 = 0,
3332

34-
/// Sets prefix for all other add* method paths.
33+
/// Sets prefix for all other write* method paths.
3534
pub fn setRoot(self: *Self, root: []const u8) !void {
3635
if (root.len > 0)
3736
try self.writeDir(root, .{});
3837

3938
self.prefix = root;
4039
}
4140

42-
/// Writes directory. If options are omitted `default_mode.dir` is
43-
/// used for mode and current time for `mtime`.
41+
/// Writes directory.
4442
pub fn writeDir(self: *Self, sub_path: []const u8, opt: Options) !void {
4543
try self.writeHeader(.directory, sub_path, "", 0, opt);
4644
}
@@ -49,14 +47,19 @@ pub fn Writer(comptime WriterType: type) type {
4947
pub fn writeFile(self: *Self, sub_path: []const u8, file: std.fs.File) !void {
5048
const stat = try file.stat();
5149
const mtime: u64 = @intCast(@divFloor(stat.mtime, std.time.ns_per_s));
52-
try self.writeHeader(.regular, sub_path, "", stat.size, .{ .mtime = mtime });
50+
51+
var header = Header{};
52+
try self.setPath(&header, sub_path);
53+
try header.setSize(stat.size);
54+
try header.setMtime(mtime);
55+
try header.write(self.underlying_writer);
56+
5357
try self.underlying_writer.writeFile(file);
54-
try self.writePadding(@intCast(stat.size));
58+
try self.writePadding(stat.size);
5559
}
5660

5761
/// Writes file reading file content from `reader`. Number of bytes in
58-
/// reader must be equal to `size`. If options are omitted `default_mode.file` is
59-
/// used for mode and current time for `mtime`.
62+
/// reader must be equal to `size`.
6063
pub fn writeFileStream(self: *Self, sub_path: []const u8, size: usize, reader: anytype, opt: Options) !void {
6164
try self.writeHeader(.regular, sub_path, "", @intCast(size), opt);
6265

@@ -68,22 +71,19 @@ pub fn Writer(comptime WriterType: type) type {
6871
}
6972

7073
/// Writes file using bytes buffer `content` for size and file content.
71-
/// If options are omitted `default_mode.file` is used for mode and
72-
/// current time for `mtime`.
7374
pub fn writeFileBytes(self: *Self, sub_path: []const u8, content: []const u8, opt: Options) !void {
7475
try self.writeHeader(.regular, sub_path, "", @intCast(content.len), opt);
7576
try self.underlying_writer.writeAll(content);
7677
try self.writePadding(content.len);
7778
}
7879

79-
/// Writes symlink. If options are omitted `default_mode.sym_link` is
80-
/// used for mode and current time for `mtime`.
80+
/// Writes symlink.
8181
pub fn writeLink(self: *Self, sub_path: []const u8, link_name: []const u8, opt: Options) !void {
8282
try self.writeHeader(.symbolic_link, sub_path, link_name, 0, opt);
8383
}
8484

8585
/// Writes fs.Dir.WalkerEntry. Uses `mtime` from file system entry and
86-
/// default from `default_mode` for entry mode .
86+
/// default for entry mode .
8787
pub fn writeEntry(self: *Self, entry: std.fs.Dir.Walker.WalkerEntry) !void {
8888
switch (entry.kind) {
8989
.directory => {
@@ -115,9 +115,10 @@ pub fn Writer(comptime WriterType: type) type {
115115
) !void {
116116
var header = Header.init(typeflag);
117117
try self.setPath(&header, sub_path);
118-
try self.setMtime(&header, opt.mtime);
119118
try header.setSize(size);
120-
try header.setMode(opt.mode);
119+
try header.setMtime(if (opt.mtime != 0) opt.mtime else self.mtimeNow());
120+
if (opt.mode != 0)
121+
try header.setMode(opt.mode);
121122
if (typeflag == .symbolic_link)
122123
header.setLinkname(link_name) catch |err| switch (err) {
123124
error.NameTooLong => try self.writeExtendedHeader(.gnu_long_link, &.{link_name}),
@@ -126,17 +127,10 @@ pub fn Writer(comptime WriterType: type) type {
126127
try header.write(self.underlying_writer);
127128
}
128129

129-
fn setMtime(self: *Self, header: *Header, mtime: u64) !void {
130-
const mt = blk: {
131-
if (mtime == 0) {
132-
// use time now
133-
if (self.mtime_now == 0)
134-
self.mtime_now = @intCast(std.time.timestamp());
135-
break :blk self.mtime_now;
136-
}
137-
break :blk mtime;
138-
};
139-
try header.setMtime(mt);
130+
fn mtimeNow(self: *Self) u64 {
131+
if (self.mtime_now == 0)
132+
self.mtime_now = @intCast(std.time.timestamp());
133+
return self.mtime_now;
140134
}
141135

142136
fn entryMtime(entry: std.fs.Dir.Walker.WalkerEntry) !u64 {
@@ -174,28 +168,22 @@ pub fn Writer(comptime WriterType: type) type {
174168
try self.writePadding(len);
175169
}
176170

177-
fn writePadding(self: *Self, bytes: usize) !void {
178-
const remainder = bytes % block_size;
179-
if (remainder == 0) return;
180-
const padding = block_size - remainder;
181-
try self.underlying_writer.writeByteNTimes(0, padding);
171+
fn writePadding(self: *Self, bytes: u64) !void {
172+
const pos: usize = @intCast(bytes % block_size);
173+
if (pos == 0) return;
174+
try self.underlying_writer.writeAll(empty_block[pos..]);
182175
}
183176

184177
/// Tar should finish with two zero blocks, but 'reasonable system must
185178
/// not assume that such a block exists when reading an archive' (from
186179
/// reference). In practice it is safe to skip this finish.
187180
pub fn finish(self: *Self) !void {
188-
try self.underlying_writer.writeByteNTimes(0, block_size * 2);
181+
try self.underlying_writer.writeAll(&empty_block);
182+
try self.underlying_writer.writeAll(&empty_block);
189183
}
190184
};
191185
}
192186

193-
const default_mode = struct {
194-
const file = 0o664;
195-
const dir = 0o775;
196-
const sym_link = 0o777;
197-
};
198-
199187
/// A struct that is exactly 512 bytes and matches tar file format. This is
200188
/// intended to be used for outputting tar files; for parsing there is
201189
/// `std.tar.Header`.
@@ -208,24 +196,24 @@ const Header = extern struct {
208196
// strings. All other fields are zero-filled octal numbers in ASCII. Each
209197
// numeric field of width w contains w minus 1 digits, and a null.
210198
// Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
211-
// POSIX header: byte offset
212-
name: [100]u8, // 0
213-
mode: [7:0]u8, // 100
214-
uid: [7:0]u8, // 108
215-
gid: [7:0]u8, // 116
216-
size: [11:0]u8, // 124
217-
mtime: [11:0]u8, // 136
218-
checksum: [7:0]u8, // 148
219-
typeflag: FileType, // 156
220-
linkname: [100]u8, // 157
221-
magic: [6]u8, // 257
222-
version: [2]u8, // 263
223-
uname: [32]u8, // 265
224-
gname: [32]u8, // 297
225-
devmajor: [7:0]u8, // 329
226-
devminor: [7:0]u8, // 337
227-
prefix: [155]u8, // 345
228-
pad: [12]u8, // 500
199+
// POSIX header: byte offset
200+
name: [100]u8 = [_]u8{0} ** 100, // 0
201+
mode: [7:0]u8 = default_mode.file, // 100
202+
uid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 108
203+
gid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 116
204+
size: [11:0]u8 = [_:0]u8{'0'} ** 11, // 124
205+
mtime: [11:0]u8 = [_:0]u8{'0'} ** 11, // 136
206+
checksum: [7:0]u8 = [_:0]u8{' '} ** 7, // 148
207+
typeflag: FileType = .regular, // 156
208+
linkname: [100]u8 = [_]u8{0} ** 100, // 157
209+
magic: [6]u8 = [_]u8{ 'u', 's', 't', 'a', 'r', 0 }, // 257
210+
version: [2]u8 = [_]u8{ '0', '0' }, // 263
211+
uname: [32]u8 = [_]u8{0} ** 32, // unused 265
212+
gname: [32]u8 = [_]u8{0} ** 32, // unused 297
213+
devmajor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 329
214+
devminor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 337
215+
prefix: [155]u8 = [_]u8{0} ** 155, // 345
216+
pad: [12]u8 = [_]u8{0} ** 12, // unused 500
229217

230218
pub const FileType = enum(u8) {
231219
regular = '0',
@@ -235,47 +223,56 @@ const Header = extern struct {
235223
gnu_long_link = 'K',
236224
};
237225

226+
const default_mode = struct {
227+
const file = [_:0]u8{ '0', '0', '0', '0', '6', '6', '4' }; // 0o664
228+
const dir = [_:0]u8{ '0', '0', '0', '0', '7', '7', '5' }; // 0o775
229+
const sym_link = [_:0]u8{ '0', '0', '0', '0', '7', '7', '7' }; // 0o777
230+
const other = [_:0]u8{ '0', '0', '0', '0', '0', '0', '0' }; // 0o000
231+
};
232+
238233
pub fn init(typeflag: FileType) Header {
239-
var header = std.mem.zeroes(Header);
240-
header.magic = [_]u8{ 'u', 's', 't', 'a', 'r', 0 };
241-
header.version = [_]u8{ '0', '0' };
242-
header.typeflag = typeflag;
243-
return header;
234+
return .{
235+
.typeflag = typeflag,
236+
.mode = switch (typeflag) {
237+
.directory => default_mode.dir,
238+
.symbolic_link => default_mode.sym_link,
239+
.regular => default_mode.file,
240+
else => default_mode.other,
241+
},
242+
};
244243
}
245244

246245
pub fn setSize(self: *Header, size: u64) !void {
247-
_ = try std.fmt.bufPrint(&self.size, "{o:0>11}", .{size});
246+
try octal(&self.size, size);
247+
}
248+
249+
fn octal(buf: []u8, value: u64) !void {
250+
var remainder: u64 = value;
251+
var pos: usize = buf.len;
252+
while (remainder > 0 and pos > 0) {
253+
pos -= 1;
254+
const c: u8 = @as(u8, @intCast(remainder % 8)) + '0';
255+
buf[pos] = c;
256+
remainder /= 8;
257+
if (pos == 0 and remainder > 0) return error.OctalOverflow;
258+
}
248259
}
249260

250261
pub fn setMode(self: *Header, mode: u32) !void {
251-
const m: u32 = if (mode == 0)
252-
switch (self.typeflag) {
253-
.directory => default_mode.dir,
254-
.symbolic_link => default_mode.sym_link,
255-
else => default_mode.file,
256-
}
257-
else
258-
mode;
259-
_ = try std.fmt.bufPrint(&self.mode, "{o:0>7}", .{m});
262+
try octal(&self.mode, mode);
260263
}
261264

262265
// Integer number of seconds since January 1, 1970, 00:00 Coordinated Universal Time.
263266
// mtime == 0 will use current time
264267
pub fn setMtime(self: *Header, mtime: u64) !void {
265-
_ = try std.fmt.bufPrint(&self.mtime, "{o:0>11}", .{mtime});
268+
try octal(&self.mtime, mtime);
266269
}
267270

268271
pub fn updateChecksum(self: *Header) !void {
269-
const offset = @offsetOf(Header, "checksum");
270-
var checksum: usize = 0;
271-
for (std.mem.asBytes(self), 0..) |val, i| {
272-
checksum += if (i >= offset and i < offset + @sizeOf(@TypeOf(self.checksum)))
273-
' '
274-
else
275-
val;
276-
}
277-
278-
_ = try std.fmt.bufPrint(&self.checksum, "{o:0>7}", .{checksum});
272+
var checksum: usize = ' '; // other 7 self.checksum bytes are initialized to ' '
273+
for (std.mem.asBytes(self)) |val|
274+
checksum += val;
275+
try octal(&self.checksum, checksum);
279276
}
280277

281278
pub fn write(self: *Header, output_writer: anytype) !void {
@@ -495,5 +492,6 @@ test "write files" {
495492
try actual.writeAll(content.writer());
496493
try testing.expectEqualSlices(u8, expected.content, content.items);
497494
}
495+
try wrt.finish();
498496
}
499497
}

0 commit comments

Comments
 (0)