Skip to content

Commit

Permalink
Optimization passes (#55)
Browse files Browse the repository at this point in the history
* New zig build asm command to generate assembly for perf analysis
* Gate the debug trace and trap features behind a compile-time option. Most users probably don't care about this and it has a non-trivial impact on perf.
* Fix instruction immediates regressing to 32 bytes. When they were changed to be extern structs, the ordering started to matter. Now there's a comptime check to make sure they're always 16 bytes.
* Embed param/return count in function instance to avoid an extra pointer hop.
* Minor optimization to use @Memset to initialize locals since the default values are always 0 anyway.
* Some optimization notes for the future.
  • Loading branch information
rdunnington authored Jul 1, 2024
1 parent 2ff4f4b commit 5931f4f
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 94 deletions.
18 changes: 12 additions & 6 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,19 @@ const ExeOpts = struct {
step_name: []const u8,
description: []const u8,
step_dependencies: ?[]*Build.Step = null,
should_emit_asm: bool = false,
emit_asm_step: ?*Build.Step = null,
options: *Build.Step.Options,
};

pub fn build(b: *Build) void {
const should_emit_asm = b.option(bool, "asm", "Emit asm for the bytebox binaries") orelse false;

const enable_metering = b.option(bool, "meter", "Enable metering") orelse false;
const enable_debug_trace = b.option(bool, "debug_trace", "Enable debug tracing feature") orelse false;
const enable_debug_trap = b.option(bool, "debug_trap", "Enable debug trap features") orelse false;

const options = b.addOptions();
options.addOption(bool, "enable_metering", enable_metering);
options.addOption(bool, "enable_debug_trace", enable_debug_trace);
options.addOption(bool, "enable_debug_trap", enable_debug_trap);

const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
Expand All @@ -45,7 +47,7 @@ pub fn build(b: *Build) void {

bytebox_module.addOptions("config", options);

// exe.root_module.addImport(import.name, import.module);
const emit_asm_step: *Build.Step = b.step("asm", "Emit assembly");

const imports = [_]ModuleImport{
.{ .name = "bytebox", .module = bytebox_module },
Expand All @@ -57,7 +59,7 @@ pub fn build(b: *Build) void {
.root_src = "run/main.zig",
.step_name = "run",
.description = "Run a wasm program",
.should_emit_asm = should_emit_asm,
.emit_asm_step = emit_asm_step,
.options = options,
});

Expand Down Expand Up @@ -146,7 +148,11 @@ fn buildExeWithRunStep(b: *Build, target: Build.ResolvedTarget, optimize: std.bu
}
exe.root_module.addOptions("config", opts.options);

// exe.emit_asm = if (opts.should_emit_asm) .emit else .default;
if (opts.emit_asm_step) |asm_step| {
const asm_filename = std.fmt.allocPrint(b.allocator, "{s}.asm", .{opts.exe_name}) catch unreachable;
asm_step.dependOn(&b.addInstallFile(exe.getEmittedAsm(), asm_filename).step);
}

b.installArtifact(exe);

if (opts.step_dependencies) |steps| {
Expand Down
13 changes: 13 additions & 0 deletions optimize.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
== Failed Optimizations ==

* Giving locals their own stack space separate from values. The idea here was to save
some perf on push/pop of call frames so that we wouldn't have to copy the return values
back to the appropriate place. But since the wasm calling convention is to pass params
via the stack, you'd have to copy them elsewhere anyway, defeating the point of
the optimization anyway, which is to avoid copying values around.

* Instruction stream. Instead of having an array of structs that contain opcode + immediates,
have a byte stream of opcodes and immediates where you don't have to pay for the extra memory
of the immediates if you don't need them. But it turns out that a lot of instructions
use immediates anyway and the overhead of fetching them out of the stream is more
expensive than just paying for the cache hits. Overall memory is
10 changes: 9 additions & 1 deletion run/main.zig
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const std = @import("std");
const bytebox = @import("bytebox");
const config = bytebox.config;
const wasi = bytebox.wasi;

const Val = bytebox.Val;
Expand Down Expand Up @@ -105,7 +106,12 @@ fn parseCmdOpts(args: [][]const u8, env_buffer: *std.ArrayList([]const u8), dir_
arg_index += 1;
if (getArgSafe(arg_index, args)) |mode_str| {
if (bytebox.DebugTrace.parseMode(mode_str)) |mode| {
opts.trace = mode;
if (config.enable_debug_trace == false) {
log.err("Bytebox was not compiled with -Ddebug_trace=true. Enable this compile time flag if you want to enable tracing at runtime.", .{});
opts.invalid_arg = mode_str;
} else {
opts.trace = mode;
}
} else {
opts.invalid_arg = mode_str;
}
Expand Down Expand Up @@ -169,6 +175,8 @@ fn printHelp(args: [][]const u8) void {
\\ * none (default)
\\ * function
\\ * instruction
\\ Note that this requires bytebox to be compiled with the flag -Ddebug_trace=true,
\\ which is off by default for performance reasons.
\\
\\
;
Expand Down
1 change: 1 addition & 0 deletions src/core.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const def = @import("definition.zig");
const inst = @import("instance.zig");
const vm_stack = @import("vm_stack.zig");
const vm_register = @import("vm_register.zig");
pub const config = @import("config");
pub const wasi = @import("wasi.zig");

pub const LogLevel = common.LogLevel;
Expand Down
34 changes: 7 additions & 27 deletions src/definition.zig
Original file line number Diff line number Diff line change
Expand Up @@ -809,43 +809,19 @@ pub const TablePairImmediates = extern struct {

pub const BlockImmediates = extern struct {
block_type: BlockType,
num_returns: u16,
block_value: BlockTypeValue,
num_returns: u32,
continuation: u32,
};

pub const IfImmediates = extern struct {
block_type: BlockType,
num_returns: u16,
block_value: BlockTypeValue,
num_returns: u32,
else_continuation: u32,
end_continuation: u32,
};

// const InstructionImmediatesTypes = enum(u8) {
// Void,
// ValType,
// ValueI32,
// ValueF32,
// ValueI64,
// ValueF64,
// ValueVec,
// Index,
// LabelId,
// MemoryOffset,
// MemoryOffsetAndLane,
// Block,
// CallIndirect,
// TablePair,
// If,
// VecShuffle16,
// };

pub const AlignedBytes = struct {
bytes: []align(1) const u8,
alignment: usize,
};

pub const InstructionImmediates = extern union {
Void: void,
ValType: ValType,
Expand All @@ -865,6 +841,10 @@ pub const InstructionImmediates = extern union {
VecShuffle16: [16]u8,
};

comptime {
std.debug.assert(@sizeOf(InstructionImmediates) == 16);
}

pub const Instruction = struct {
opcode: Opcode,
immediate: InstructionImmediates,
Expand Down Expand Up @@ -901,7 +881,7 @@ pub const Instruction = struct {
block_value = BlockTypeValue{ .ValType = valtype };
}

const num_returns: u32 = @as(u32, @intCast(block_value.getBlocktypeReturnTypes(block_type, _module).len));
const num_returns: u16 = @intCast(block_value.getBlocktypeReturnTypes(block_type, _module).len);

return InstructionImmediates{
.Block = BlockImmediates{
Expand Down
6 changes: 4 additions & 2 deletions src/instance.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const AllocError = std.mem.Allocator.Error;

const builtin = @import("builtin");

const config = @import("config");
const metering = @import("metering.zig");

const common = @import("common.zig");
Expand Down Expand Up @@ -70,6 +71,7 @@ pub const DebugTrace = struct {
};

pub fn setMode(new_mode: Mode) void {
std.debug.assert(config.enable_debug_trace == true);
mode = new_mode;
}

Expand All @@ -86,11 +88,11 @@ pub const DebugTrace = struct {
}

pub fn shouldTraceFunctions() bool {
return mode == .Function;
return config.enable_debug_trace and mode == .Function;
}

pub fn shouldTraceInstructions() bool {
return mode == .Instruction;
return config.enable_debug_trace and mode == .Instruction;
}

pub fn printIndent(indent: u32) void {
Expand Down
Loading

0 comments on commit 5931f4f

Please sign in to comment.