From 3bc3561a8ed14ede7e601cd22366ef2a638251cd Mon Sep 17 00:00:00 2001 From: Tom Read Cutting Date: Thu, 19 Oct 2023 09:15:24 +0100 Subject: [PATCH 1/2] Hack together a very rough archiver/zar intergration --- src/archive/archive/Archive.zig | 1611 +++++++ src/archive/archive/zld/Coff.zig | 126 + src/archive/archive/zld/Coff/Object.zig | 187 + src/archive/archive/zld/Coff/Options.zig | 91 + src/archive/archive/zld/Elf.zig | 1523 ++++++ src/archive/archive/zld/Elf/Archive.zig | 211 + src/archive/archive/zld/Elf/Atom.zig | 381 ++ src/archive/archive/zld/Elf/Object.zig | 494 ++ src/archive/archive/zld/Elf/Options.zig | 143 + src/archive/archive/zld/Elf/gc.zig | 204 + src/archive/archive/zld/MachO.zig | 4081 +++++++++++++++++ src/archive/archive/zld/MachO/Archive.zig | 232 + src/archive/archive/zld/MachO/Atom.zig | 1052 +++++ .../archive/zld/MachO/CodeSignature.zig | 445 ++ src/archive/archive/zld/MachO/DwarfInfo.zig | 512 +++ src/archive/archive/zld/MachO/Dylib.zig | 534 +++ src/archive/archive/zld/MachO/Object.zig | 1054 +++++ src/archive/archive/zld/MachO/Options.zig | 495 ++ src/archive/archive/zld/MachO/Trie.zig | 612 +++ src/archive/archive/zld/MachO/UnwindInfo.zig | 835 ++++ src/archive/archive/zld/MachO/dead_strip.zig | 472 ++ .../archive/zld/MachO/dyld_info/Rebase.zig | 574 +++ .../archive/zld/MachO/dyld_info/bind.zig | 740 +++ src/archive/archive/zld/MachO/eh_frame.zig | 621 +++ src/archive/archive/zld/MachO/fat.zig | 55 + .../archive/zld/MachO/load_commands.zig | 312 ++ src/archive/archive/zld/MachO/thunks.zig | 355 ++ src/archive/archive/zld/ThreadPool.zig | 155 + src/archive/archive/zld/WaitGroup.zig | 46 + src/archive/archive/zld/Wasm.zig | 1957 ++++++++ src/archive/archive/zld/Wasm/Archive.zig | 223 + src/archive/archive/zld/Wasm/Atom.zig | 197 + src/archive/archive/zld/Wasm/Object.zig | 1010 ++++ src/archive/archive/zld/Wasm/Options.zig | 224 + src/archive/archive/zld/Wasm/Symbol.zig | 173 + src/archive/archive/zld/Wasm/emit_wasm.zig | 667 +++ src/archive/archive/zld/Wasm/sections.zig | 372 ++ src/archive/archive/zld/Wasm/types.zig | 304 ++ src/archive/archive/zld/Zld.zig | 142 + src/archive/archive/zld/aarch64.zig | 1943 ++++++++ src/archive/archive/zld/main.zig | 83 + src/archive/archive/zld/strtab.zig | 113 + src/archive/archive/zld/tapi.zig | 168 + src/archive/archive/zld/tapi/Tokenizer.zig | 460 ++ src/archive/archive/zld/tapi/parse.zig | 690 +++ src/archive/archive/zld/tapi/parse/test.zig | 558 +++ src/archive/archive/zld/tapi/yaml.zig | 727 +++ src/archive/archive/zld/test.zig | 394 ++ src/archive/archive/zld/tracy.zig | 308 ++ src/archive/main.zig | 649 +++ src/archive/test.zig | 620 +++ src/archive/tracking_buffered_writer.zig | 32 + src/archive/tracy.zig | 308 ++ src/link.zig | 15 +- 54 files changed, 30488 insertions(+), 2 deletions(-) create mode 100644 src/archive/archive/Archive.zig create mode 100644 src/archive/archive/zld/Coff.zig create mode 100644 src/archive/archive/zld/Coff/Object.zig create mode 100644 src/archive/archive/zld/Coff/Options.zig create mode 100644 src/archive/archive/zld/Elf.zig create mode 100644 src/archive/archive/zld/Elf/Archive.zig create mode 100644 src/archive/archive/zld/Elf/Atom.zig create mode 100644 src/archive/archive/zld/Elf/Object.zig create mode 100644 src/archive/archive/zld/Elf/Options.zig create mode 100644 src/archive/archive/zld/Elf/gc.zig create mode 100644 src/archive/archive/zld/MachO.zig create mode 100644 src/archive/archive/zld/MachO/Archive.zig create mode 100644 src/archive/archive/zld/MachO/Atom.zig create mode 100644 src/archive/archive/zld/MachO/CodeSignature.zig create mode 100644 src/archive/archive/zld/MachO/DwarfInfo.zig create mode 100644 src/archive/archive/zld/MachO/Dylib.zig create mode 100644 src/archive/archive/zld/MachO/Object.zig create mode 100644 src/archive/archive/zld/MachO/Options.zig create mode 100644 src/archive/archive/zld/MachO/Trie.zig create mode 100644 src/archive/archive/zld/MachO/UnwindInfo.zig create mode 100644 src/archive/archive/zld/MachO/dead_strip.zig create mode 100644 src/archive/archive/zld/MachO/dyld_info/Rebase.zig create mode 100644 src/archive/archive/zld/MachO/dyld_info/bind.zig create mode 100644 src/archive/archive/zld/MachO/eh_frame.zig create mode 100644 src/archive/archive/zld/MachO/fat.zig create mode 100644 src/archive/archive/zld/MachO/load_commands.zig create mode 100644 src/archive/archive/zld/MachO/thunks.zig create mode 100644 src/archive/archive/zld/ThreadPool.zig create mode 100644 src/archive/archive/zld/WaitGroup.zig create mode 100644 src/archive/archive/zld/Wasm.zig create mode 100644 src/archive/archive/zld/Wasm/Archive.zig create mode 100644 src/archive/archive/zld/Wasm/Atom.zig create mode 100644 src/archive/archive/zld/Wasm/Object.zig create mode 100644 src/archive/archive/zld/Wasm/Options.zig create mode 100644 src/archive/archive/zld/Wasm/Symbol.zig create mode 100644 src/archive/archive/zld/Wasm/emit_wasm.zig create mode 100644 src/archive/archive/zld/Wasm/sections.zig create mode 100644 src/archive/archive/zld/Wasm/types.zig create mode 100644 src/archive/archive/zld/Zld.zig create mode 100644 src/archive/archive/zld/aarch64.zig create mode 100644 src/archive/archive/zld/main.zig create mode 100644 src/archive/archive/zld/strtab.zig create mode 100644 src/archive/archive/zld/tapi.zig create mode 100644 src/archive/archive/zld/tapi/Tokenizer.zig create mode 100644 src/archive/archive/zld/tapi/parse.zig create mode 100644 src/archive/archive/zld/tapi/parse/test.zig create mode 100644 src/archive/archive/zld/tapi/yaml.zig create mode 100644 src/archive/archive/zld/test.zig create mode 100644 src/archive/archive/zld/tracy.zig create mode 100644 src/archive/main.zig create mode 100644 src/archive/test.zig create mode 100644 src/archive/tracking_buffered_writer.zig create mode 100644 src/archive/tracy.zig diff --git a/src/archive/archive/Archive.zig b/src/archive/archive/Archive.zig new file mode 100644 index 000000000000..23e290600c73 --- /dev/null +++ b/src/archive/archive/Archive.zig @@ -0,0 +1,1611 @@ +const Archive = @This(); + +const builtin = @import("builtin"); +const std = @import("std"); +const build_options = @import("build_options"); +const trace = @import("../tracy.zig").trace; +const traceNamed = @import("../tracy.zig").traceNamed; +const fmt = std.fmt; +const fs = std.fs; +const mem = std.mem; +const logger = std.log.scoped(.archive); +const elf = std.elf; +const Elf = @import("zld/Zld.zig").Elf; +const MachO = @import("zld/Zld.zig").MachO; +const macho = std.macho; +const Coff = @import("zld/Zld.zig").Coff; +// We don't have any kind of bitcode parsing support at the moment, but we need +// to report dealing with bitcode files as an error. So embed magic like this +// matching the format of the actual zld package for now. +const Bitcode = struct { + const magic = "BC\xC0\xDE"; +}; +const coff = std.coff; +const tracking_buffered_writer = @import("../tracking_buffered_writer.zig"); + +const Allocator = std.mem.Allocator; + +dir: fs.Dir, +file: fs.File, +name: []const u8, +created: bool, + +// We need to differentiate between inferred and output archive type, as other ar +// programs "just handle" any valid archive for parsing, regarldess of what a +// user has specified - the user specification should only matter for writing +// archives. +inferred_archive_type: ArchiveType, +output_archive_type: ArchiveType, + +files: std.ArrayListUnmanaged(ArchivedFile), +symbols: std.ArrayListUnmanaged(Symbol), + +// Use it so we can easily lookup files indices when inserting! +// TODO: A trie is probably a lot better here +file_name_to_index: std.StringArrayHashMapUnmanaged(u64), + +modifiers: Modifiers, + +stat: fs.File.Stat, + +pub const ArchiveType = enum { + ambiguous, + gnu, + gnuthin, + gnu64, + bsd, + darwin, // *mostly* like BSD, with some differences in limited contexts when writing for determinism reasons + darwin64, + coff, // (windows) + + pub fn getAlignment(self: ArchiveType) u32 { + // See: https://github.com/llvm-mirror/llvm/blob/2c4ca6832fa6b306ee6a7010bfb80a3f2596f824/lib/Object/ArchiveWriter.cpp#L311 + return switch (self) { + .ambiguous => unreachable, + else => if (self.isBsdLike()) @as(u32, 8) else @as(u32, 2), + }; + } + + pub fn getFileAlignment(self: ArchiveType) u32 { + // In this context, bsd like archives get 2 byte alignment but darwin + // stick to 8 byte alignment + return switch (self) { + .ambiguous => unreachable, + else => if (self.isDarwin()) @as(u32, 8) else @as(u32, 2), + }; + } + + pub fn isBsdLike(self: ArchiveType) bool { + return switch (self) { + .bsd, .darwin, .darwin64 => true, + else => false, + }; + } + + pub fn isDarwin(self: ArchiveType) bool { + return switch (self) { + .darwin, .darwin64 => true, + else => false, + }; + } +}; + +pub const Operation = enum { + insert, + delete, + move, + print_contents, + quick_append, + ranlib, + print_names, + extract, + print_symbols, + undefined, +}; + +// We seperate errors into two classes, "handled" and "unhandled". +// The reason for this is that "handled" errors log appropriate error +// messages at the point they are created, whereas unhandled errors do +// not so the caller will need to print appropriate error messages +// themselves (if needed at all). +pub const UnhandledError = CreateError || ParseError || InsertError || DeleteError || FinalizeError || CriticalError; +pub const HandledError = HandledIoError || error{ + UnknownFormat, +}; + +// We can set this to true just to make Handled errors are actually handled at +// comptime! +pub const test_errors_handled = true; // build_options.test_errors_handled; + +pub const HandledIoError = if (test_errors_handled) error{Handled} else IoError; + +pub const CreateError = error{}; + +pub const ParseError = error{ + NotArchive, + MalformedArchive, + Overflow, + InvalidCharacter, +}; + +pub const InsertError = error{}; +pub const DeleteError = error{}; +pub const FinalizeError = error{}; + +pub const CriticalError = error{ + OutOfMemory, + TODO, +}; + +pub const IoError = error{ + AccessDenied, + BrokenPipe, + ConnectionResetByPeer, + ConnectionTimedOut, + DiskQuota, + InputOutput, + IsDir, + NotOpenForReading, + InvalidArgument, + InvalidHandle, + OperationAborted, + SystemResources, + Unexpected, + Unseekable, + WouldBlock, + EndOfStream, + BadPathName, + DeviceBusy, + FileBusy, + FileLocksNotSupported, + FileNotFound, + FileTooBig, + InvalidUtf8, + LockViolation, + NameTooLong, + NetNameDeleted, + NetworkNotFound, + NoDevice, + NoSpaceLeft, + NotDir, + NotOpenForWriting, + PathAlreadyExists, + PipeBusy, + ProcessFdQuotaExceeded, + SharingViolation, + SymLinkLoop, + SystemFdQuotaExceeded, +}; + +// All archive files start with this magic string +pub const magic_string = "!\n"; +pub const magic_thin = "!\n"; + +// GNU constants +pub const gnu_first_line_buffer_length = 60; +pub const gnu_string_table_seek_pos = magic_string.len + gnu_first_line_buffer_length; + +// BSD constants +pub const bsd_name_length_signifier = "#1/"; +pub const bsd_symdef_magic = "__.SYMDEF"; +pub const bsd_symdef_64_magic = "__.SYMDEF_64"; +pub const bsd_symdef_sorted_magic = "__.SYMDEF SORTED"; + +pub const bsd_symdef_longest_magic = @max(@max(bsd_symdef_magic.len, bsd_symdef_64_magic.len), bsd_symdef_sorted_magic.len); + +pub const invalid_file_index = std.math.maxInt(u64); + +// The format (unparsed) of the archive per-file header +// NOTE: The reality is more complex than this as different mechanisms +// have been devised for storing the names of files which exceed 16 byte! +pub const Header = extern struct { + ar_name: [16]u8, + ar_date: [12]u8, + ar_uid: [6]u8, + ar_gid: [6]u8, + ar_mode: [8]u8, + ar_size: [10]u8, + ar_fmag: [2]u8, + + pub const format_string = "{s: <16}{: <12}{: <6}{: <6}{: <8}{: <10}`\n"; +}; + +pub const ExplicitBooleanSetting = enum { ambiguous, set_true, set_false }; + +pub const MoveSetting = union(enum) { + end, + before: ?[]const u8, + after: ?[]const u8, +}; + +pub const Modifiers = struct { + // Supress warning for file creation + create: bool = false, + // Only insert files with more recent timestamps than archive + update_only: bool = false, + use_real_timestamps_and_ids: bool = false, + build_symbol_table: bool = true, + sort_symbol_table: ExplicitBooleanSetting = .ambiguous, + verbose: bool = false, + move_setting: MoveSetting = .end, + show_version: bool = false, + help: bool = false, + quick_append_members: bool = false, + instance_to_delete: u32 = 1, + preserve_original_dates: bool = false, + use_full_paths_when_matching: bool = false, + thin_archives: bool = false, +}; + +pub const Contents = struct { + bytes: []align(8) u8, + length: u64, + mode: u64, + timestamp: u128, // file modified time + uid: u32, + gid: u32, + + // TODO: deallocation + + pub fn write(self: *const Contents, out_stream: anytype, stderr: anytype) !void { + try out_stream.writeAll(self.bytes); + _ = stderr; + } +}; + +// An internal represantion of files being archived +pub const ArchivedFile = struct { + name: []const u8, + contents: Contents, + + const Self = @This(); +}; + +pub const Symbol = struct { + name: []const u8, + file_index: u64, +}; + +// TODO: BSD symbol table interpretation is architecture dependent, +// is there a way we can interpret this? (will be needed for +// cross-compilation etc. could possibly take it as a spec?) +// Using harcoding this information here is a bit of a hacky +// workaround in the short term - even though it is part of +// the spec. +const IntType = i32; + +// TODO: This name is confusing because ranlib is also the name of the ranlib +// program - but also what this struct is traditionally called within archives. +// :/ +// type of ranlib used depends on the archive storage format +fn Ranlib(comptime storage: type) type { + return extern struct { + ran_strx: storage, // offset of symbol name in symbol table + ran_off: storage, // offset of file header in archive + }; +} + +const ErrorContext = enum { + accessing, + creating, + opening, + reading, + seeking, + stat, + writing, +}; + +pub fn printFileIoError(comptime context: ErrorContext, file_name: []const u8, err: IoError) HandledIoError { + const context_str = @tagName(context); + + switch (err) { + error.AccessDenied => logger.err("Error " ++ context_str ++ " '{s}', access denied.", .{file_name}), + error.FileNotFound => logger.err("Error " ++ context_str ++ " '{s}', file not found.", .{file_name}), + else => logger.err("Error " ++ context_str ++ " '{s}'.", .{file_name}), + } + if (test_errors_handled) return error.Handled; + return err; +} + +// The weird return type is so that we can distinguish between handled and unhandled IO errors, +// i.e. if test_errors_handled is set to true, and raw calls to io operations will return in a compile failure +pub fn handleFileIoError(comptime context: ErrorContext, file_name: []const u8, err_result: anytype) HandledIoError!@typeInfo(@TypeOf(err_result)).ErrorUnion.payload { + const unwrapped_result = err_result catch |err| { + return printFileIoError(context, file_name, err); + }; + return unwrapped_result; +} + +// These are the defaults llvm ar uses (excepting windows) +// https://github.com/llvm-mirror/llvm/blob/master/tools/llvm-ar/llvm-ar.cpp +pub fn getDefaultArchiveTypeFromHost() ArchiveType { + // if (build_options.mimmick_broken_cross_compiled_llvm_ar_behaviour) { + // return .gnu; + // } + + if (builtin.os.tag.isDarwin()) return .darwin; + return .gnu; +} + +pub fn create( + dir: fs.Dir, + file: fs.File, + name: []const u8, + output_archive_type: ArchiveType, + modifiers: Modifiers, + created: bool, +) (CreateError || HandledIoError)!Archive { + return Archive{ + .dir = dir, + .file = file, + .name = name, + .inferred_archive_type = .ambiguous, + .output_archive_type = output_archive_type, + .files = .{}, + .symbols = .{}, + .file_name_to_index = .{}, + .modifiers = modifiers, + .stat = try handleFileIoError(.stat, name, file.stat()), + .created = created, + }; +} + +const SymbolStringTableAndOffsets = struct { + unpadded_symbol_table_length: i32, + symbol_table: []u8, + symbol_offsets: []i32, + + pub fn deinit(self: *const SymbolStringTableAndOffsets, allocator: Allocator) void { + allocator.free(self.symbol_offsets); + allocator.free(self.symbol_table); + } +}; + +pub fn buildSymbolTable( + self: *Archive, + allocator: Allocator, +) !SymbolStringTableAndOffsets { + const tracy = trace(@src()); + defer tracy.end(); + var symbol_table_size: usize = 0; + const symbol_offsets = try allocator.alloc(i32, self.symbols.items.len); + errdefer allocator.free(symbol_offsets); + + for (self.symbols.items, 0..) |symbol, idx| { + symbol_offsets[idx] = @intCast(symbol_table_size); + symbol_table_size += symbol.name.len + 1; + } + + const unpadded_symbol_table_length = symbol_table_size; + + while (symbol_table_size % self.output_archive_type.getAlignment() != 0) { + symbol_table_size += 1; + } + + const symbol_table = try allocator.alloc(u8, symbol_table_size); + symbol_table_size = 0; + + for (self.symbols.items) |symbol| { + mem.copy(u8, symbol_table[symbol_table_size..(symbol.name.len + symbol_table_size)], symbol.name); + symbol_table[symbol_table_size + symbol.name.len] = 0; + symbol_table_size += symbol.name.len + 1; + } + + while (symbol_table_size % self.output_archive_type.getAlignment() != 0) { + symbol_table[symbol_table_size] = 0; + symbol_table_size += 1; + } + + const result: SymbolStringTableAndOffsets = .{ + .unpadded_symbol_table_length = @as(i32, @intCast(unpadded_symbol_table_length)), + .symbol_table = symbol_table, + .symbol_offsets = symbol_offsets, + }; + return result; +} + +fn calculatePadding(self: *Archive, file_pos: usize) usize { + var padding = file_pos % self.output_archive_type.getAlignment(); + padding = (self.output_archive_type.getAlignment() - padding) % self.output_archive_type.getAlignment(); + return padding; +} + +const TrackingBufferedWriter = tracking_buffered_writer.TrackingBufferedWriter(std.io.BufferedWriter(4096, std.fs.File.Writer)); + +// TODO: This needs to be integrated into the workflow +// used for parsing. (use same error handling workflow etc.) +/// Use same naming scheme for objects (as found elsewhere in the file). +pub fn finalize(self: *Archive, allocator: Allocator) (FinalizeError || HandledIoError || CriticalError)!void { + const tracy = trace(@src()); + defer tracy.end(); + if (self.output_archive_type == .ambiguous) { + // if output archive type is still ambiguous (none was inferred, and + // none was set) then we need to infer it from the host platform! + self.output_archive_type = getDefaultArchiveTypeFromHost(); + } + + // Overwrite all contents + try handleFileIoError(.seeking, self.name, self.file.seekTo(0)); + + // We wrap the buffered writer so that can we can track file position more easily + var buffered_writer = TrackingBufferedWriter{ .buffered_writer = std.io.bufferedWriter(self.file.writer()) }; + const writer = buffered_writer.writer(); + + try handleFileIoError(.writing, self.name, writer.writeAll(if (self.output_archive_type == .gnuthin) magic_thin else magic_string)); + + const header_names = try allocator.alloc([16]u8, self.files.items.len); + + const SortContext = struct { + files: std.ArrayListUnmanaged(ArchivedFile), + }; + const SortFn = struct { + fn sorter(context: *const SortContext, x: Symbol, y: Symbol) bool { + const x_file_name = context.files.items[x.file_index].name; + const y_file_name = context.files.items[y.file_index].name; + // we only sort symbol names internally within file, but maintain + // the order within which they are added. + if (x.file_index < y.file_index) { + return true; + } else if (x.file_index > y.file_index) { + return false; + } + const order = std.mem.order(u8, x_file_name, y_file_name); + if (order == .eq) { + return std.mem.lessThan(u8, x.name, y.name); + } + return order == .lt; + } + }; + + // Sort the symbols + const sort_symbol_table = switch (self.modifiers.sort_symbol_table) { + .ambiguous => self.output_archive_type.isDarwin(), + .set_true => true, + .set_false => false, + }; + + const sort_context: SortContext = .{ .files = self.files }; + if (sort_symbol_table) { + const tracy_scope = traceNamed(@src(), "Sort Symbol Table"); + defer tracy_scope.end(); + std.sort.block(Symbol, self.symbols.items, &sort_context, SortFn.sorter); + } + + // Calculate the offset of file independent of string table and symbol table itself. + // It is basically magic size + file size from position 0 + + const relative_file_offsets = try allocator.alloc(i32, self.files.items.len); + defer allocator.free(relative_file_offsets); + + { + var offset: u32 = 0; + for (self.files.items, 0..) |file, idx| { + relative_file_offsets[idx] = @as(i32, @intCast(offset)); + offset += @as(u32, @intCast(@sizeOf(Header) + file.contents.bytes.len)); + + // BSD also keeps the name in its data section + if (self.output_archive_type.isBsdLike()) { + offset += @as(u32, @intCast(file.name.len)); + + // Add padding + while (offset % self.output_archive_type.getAlignment() != 0) { + offset += 1; + } + } + } + } + + // Set the mtime of symbol table to now seconds in non-deterministic mode + const symtab_time: u64 = (if (self.modifiers.use_real_timestamps_and_ids) @as(u64, @intCast(std.time.milliTimestamp())) else 0) / 1000; + + switch (self.output_archive_type) { + .gnu, .gnuthin, .gnu64 => { + // GNU format: Create string table + var string_table = std.ArrayList(u8).init(allocator); + defer string_table.deinit(); + + // Generate the complete string table + for (self.files.items, 0..) |file, index| { + const is_the_name_allowed = (file.name.len < 16) and (self.output_archive_type != .gnuthin); + + // If the file is small enough to fit in header, then just write it there + // Otherwise, add it to string table and add a reference to its location + const name = if (is_the_name_allowed) try mem.concat(allocator, u8, &.{ file.name, "/" }) else try std.fmt.allocPrint(allocator, "/{}", .{blk: { + // Get the position of the file in string table + const pos = string_table.items.len; + + // Now add the file name to string table + try string_table.appendSlice(file.name); + try string_table.appendSlice("/\n"); + + break :blk pos; + }}); + defer allocator.free(name); + + // Edit the header + _ = std.fmt.bufPrint(&(header_names[index]), "{s: <16}", .{name}) catch |e| switch (e) { + // Should be unreachable as the buffer should already definetely be large enough... + error.NoSpaceLeft => unreachable, + }; + } + + // Write the symbol table itself + if (self.modifiers.build_symbol_table and self.symbols.items.len != 0) { + const tracy_scope = traceNamed(@src(), "Write Symbol Table"); + defer tracy_scope.end(); + const symbol_string_table_and_offsets = try self.buildSymbolTable(allocator); + defer symbol_string_table_and_offsets.deinit(allocator); + + const symbol_table = symbol_string_table_and_offsets.symbol_table; + + const format = self.output_archive_type; + const int_size: usize = if (format == .gnu64) @sizeOf(u64) else @sizeOf(u32); + + const symbol_table_size = + symbol_table.len + // The string of symbols + (self.symbols.items.len * int_size) + // Size of all symbol offsets + int_size; // Value denoting the length of symbol table + + const magic: []const u8 = if (format == .gnu64) "/SYM64/" else "/"; + + try handleFileIoError(.writing, self.name, writer.print(Header.format_string, .{ magic, symtab_time, 0, 0, 0, symbol_table_size })); + + { + const tracy_scope_inner = traceNamed(@src(), "Write Symbol Count"); + defer tracy_scope_inner.end(); + if (format == .gnu64) { + try handleFileIoError(.writing, self.name, writer.writeIntBig(u64, @as(u64, @intCast(self.symbols.items.len)))); + } else { + try handleFileIoError(.writing, self.name, writer.writeIntBig(u32, @as(u32, @intCast(self.symbols.items.len)))); + } + } + + // magic_string.len == magic_thin.len, so its not a problem + var offset_to_files = magic_string.len; + // Size of symbol table itself + offset_to_files += @sizeOf(Header) + symbol_table_size; + + // Add padding + while (offset_to_files % self.output_archive_type.getAlignment() != 0) { + offset_to_files += 1; + } + + // Size of string table + if (string_table.items.len != 0) { + offset_to_files += @sizeOf(Header) + string_table.items.len; + } + + // Add further padding + while (offset_to_files % self.output_archive_type.getAlignment() != 0) { + offset_to_files += 1; + } + + { + const tracy_scope_inner = traceNamed(@src(), "Write Symbol File Offsets"); + defer tracy_scope_inner.end(); + + for (self.symbols.items) |symbol| { + if (format == .gnu64) { + try handleFileIoError(.writing, self.name, writer.writeIntBig(i64, relative_file_offsets[symbol.file_index] + @as(i64, @intCast(offset_to_files)))); + } else { + try handleFileIoError(.writing, self.name, writer.writeIntBig(i32, relative_file_offsets[symbol.file_index] + @as(i32, @intCast(offset_to_files)))); + } + } + } + + try handleFileIoError(.writing, self.name, writer.writeAll(symbol_table)); + } + + // Write the string table itself + { + const tracy_scope = traceNamed(@src(), "Write String Table"); + defer tracy_scope.end(); + if (string_table.items.len != 0) { + while (string_table.items.len % self.output_archive_type.getAlignment() != 0) { + try string_table.append('\n'); + } + try handleFileIoError(.writing, self.name, writer.print("//{s}{: <10}`\n{s}", .{ " " ** 46, string_table.items.len, string_table.items })); + } + } + }, + .bsd, .darwin, .darwin64 => { + // BSD format: Write the symbol table + // In darwin if symbol table writing is enabled the expected behaviour + // is that we write an empty symbol table! + // However - there is one exception to this, which is that llvm ar + // does not generate the symbol table if we haven't just created + // the archive *and* we aren't running from a darwing host. + // WHAT ?! + const write_symbol_table = write_symbol_table: { + var result = self.modifiers.build_symbol_table; + if (!builtin.os.tag.isDarwin() and !self.created) { + result = result and self.symbols.items.len != 0; + } + break :write_symbol_table result; + }; + if (write_symbol_table) { + const tracy_scope = traceNamed(@src(), "Write Symbol Table"); + defer tracy_scope.end(); + const symbol_string_table_and_offsets = try self.buildSymbolTable(allocator); + defer symbol_string_table_and_offsets.deinit(allocator); + + const symbol_table = symbol_string_table_and_offsets.symbol_table; + + const format = self.output_archive_type; + const int_size: usize = if (format == .darwin64) @sizeOf(u64) else @sizeOf(u32); + + const num_ranlib_bytes = self.symbols.items.len * @sizeOf(Ranlib(IntType)); + + const symbol_table_size = + bsd_symdef_64_magic.len + // Length of name + int_size + // Int describing the size of ranlib + num_ranlib_bytes + // Size of ranlib structs + int_size + // Int describing size of symbol table's strings + symbol_table.len; // The lengths of strings themselves + + try handleFileIoError(.writing, self.name, writer.print(Header.format_string, .{ "#1/12", symtab_time, 0, 0, 0, symbol_table_size })); + + const endian = builtin.cpu.arch.endian(); + + if (format == .darwin64) { + try handleFileIoError(.writing, self.name, writer.writeAll(bsd_symdef_64_magic)); + try handleFileIoError(.writing, self.name, writer.writeInt(u64, @as(u64, @intCast(num_ranlib_bytes)), endian)); + } else { + try handleFileIoError(.writing, self.name, writer.writeAll(bsd_symdef_magic ++ "\x00\x00\x00")); + try handleFileIoError(.writing, self.name, writer.writeInt(u32, @as(u32, @intCast(num_ranlib_bytes)), endian)); + } + + const ranlibs = try allocator.alloc(Ranlib(IntType), self.symbols.items.len); + defer allocator.free(ranlibs); + + var offset_to_files: usize = magic_string.len + @sizeOf(Header) + symbol_table_size; + + // Add padding + while (offset_to_files % self.output_archive_type.getAlignment() != 0) { + offset_to_files += 1; + } + + for (self.symbols.items, 0..) |symbol, idx| { + ranlibs[idx].ran_strx = symbol_string_table_and_offsets.symbol_offsets[idx]; + ranlibs[idx].ran_off = relative_file_offsets[symbol.file_index] + @as(i32, @intCast(offset_to_files)); + } + + try handleFileIoError(.writing, self.name, writer.writeAll(mem.sliceAsBytes(ranlibs))); + + if (format == .darwin64) { + try handleFileIoError(.writing, self.name, writer.writeInt(u64, @as(u64, @intCast(symbol_string_table_and_offsets.unpadded_symbol_table_length)), endian)); + } else { + try handleFileIoError(.writing, self.name, writer.writeInt(u32, @as(u32, @intCast(symbol_string_table_and_offsets.unpadded_symbol_table_length)), endian)); + } + + try handleFileIoError(.writing, self.name, writer.writeAll(symbol_table)); + } + }, + // This needs to be able to tell whatsupp. + else => unreachable, + } + + // Write the files + + const tracy_scope = traceNamed(@src(), "Write Files To Archive"); + defer tracy_scope.end(); + for (self.files.items, 0..) |file, index| { + var header_buffer: [@sizeOf(Header)]u8 = undefined; + + const file_length = file_length_calculation: { + if (!self.output_archive_type.isBsdLike()) { + break :file_length_calculation file.contents.length; + } else { + const padding = self.calculatePadding(buffered_writer.file_pos + header_buffer.len + file.name.len); + + // BSD format: Just write the length of the name in header + _ = std.fmt.bufPrint(&(header_names[index]), "#1/{: <13}", .{file.name.len + padding}) catch |e| switch (e) { + // Should be unreachable as the buffer should already definetely be large enough... + error.NoSpaceLeft => unreachable, + }; + if (self.output_archive_type.isDarwin()) { + var file_padding = file.contents.length % self.output_archive_type.getFileAlignment(); + file_padding = (self.output_archive_type.getFileAlignment() - file_padding) % self.output_archive_type.getFileAlignment(); + break :file_length_calculation file.contents.length + file.name.len + padding + file_padding; + } else { + break :file_length_calculation file.contents.length + file.name.len + padding; + } + } + }; + + _ = std.fmt.bufPrint( + &header_buffer, + Header.format_string, + .{ &header_names[index], file.contents.timestamp, file.contents.uid, file.contents.gid, file.contents.mode, file_length }, + ) catch |e| switch (e) { + // Should be unreachable as the buffer should already definetely be large enough... + error.NoSpaceLeft => unreachable, + }; + + // TODO: handle errors + _ = try handleFileIoError(.writing, self.name, writer.write(&header_buffer)); + + // Write the name of the file in the data section + if (self.output_archive_type.isBsdLike()) { + try handleFileIoError(.writing, self.name, writer.writeAll(file.name)); + try handleFileIoError(.writing, self.name, writer.writeByteNTimes(0, self.calculatePadding(buffered_writer.file_pos))); + } + + if (self.output_archive_type != .gnuthin) { + try handleFileIoError(.writing, self.name, file.contents.write(writer, null)); + try handleFileIoError(.writing, self.name, writer.writeByteNTimes('\n', self.calculatePadding(buffered_writer.file_pos))); + } + } + + try handleFileIoError(.writing, self.name, buffered_writer.flush()); + + // Truncate the file size + try handleFileIoError(.writing, self.name, self.file.setEndPos(buffered_writer.file_pos)); +} + +pub fn deleteFiles(self: *Archive, file_names: []const []const u8) (DeleteError || HandledIoError || CriticalError)!void { + const tracy = trace(@src()); + defer tracy.end(); + // For the list of given file names, find the entry in self.files + // and remove it from self.files. + for (file_names) |file_name| { + for (self.files.items, 0..) |file, index| { + if (std.mem.eql(u8, file.name, file_name)) { + // Remove all the symbols associated with the file + // when file is deleted + var idx: usize = 0; + while (idx < self.symbols.items.len) { + const sym = self.symbols.items[idx]; + if (sym.file_index == index) { + _ = self.symbols.orderedRemove(idx); + continue; + } + idx += 1; + } + + // Reset the index for all future symbols + for (self.symbols.items) |*sym| { + if (sym.file_index > index) { + sym.file_index -= 1; + } + } + + _ = self.files.orderedRemove(index); + break; + } + } + } +} + +pub fn moveFiles(self: *Archive, file_names: []const []const u8) !void { + switch (self.modifiers.move_setting) { + .end => { + // TODO: find files, move them, deal with all boundary cases! + _ = file_names; + }, + .before, .after => { + // TODO: bounds check! + // const relpos = file_names[0]; + // const other_files = file_names[1..file_names.len]; + }, + } + logger.err("Move operation still needs to be implemented!\n", .{}); + return error.TODO; +} + +pub fn extract(self: *Archive, file_names: []const []const u8) !void { + if (self.inferred_archive_type == .gnuthin) { + // TODO: better error + return error.ExtractingFromThin; + } + + for (self.files.items) |archived_file| { + for (file_names) |file_name| { + if (std.mem.eql(u8, archived_file.name, file_name)) { + const file = try self.dir.createFile(archived_file.name, .{}); + defer file.close(); + + try file.writeAll(archived_file.contents.bytes); + break; + } + } + } +} + +pub fn addToSymbolTable(self: *Archive, allocator: Allocator, archived_file: *const ArchivedFile, file_index: usize, file: fs.File, file_offset: u32) (CriticalError || HandledIoError)!void { + // TODO: make this read directly from the file contents buffer! + + // Get the file magic + try handleFileIoError(.seeking, archived_file.name, file.seekTo(file_offset)); + + var magic: [4]u8 = undefined; + _ = try handleFileIoError(.reading, archived_file.name, file.reader().read(&magic)); + + try handleFileIoError(.seeking, archived_file.name, file.seekTo(file_offset)); + + blk: { + // TODO: Load object from memory (upstream zld) + // TODO(TRC):Now this should assert that the magic number is what we expect it to be + // based on the parsed archive type! Not inferring what we should do based on it. + // switch(self.output_archive_type) + // { + + // } + if (mem.eql(u8, magic[0..elf.MAGIC.len], elf.MAGIC)) { + if (self.output_archive_type == .ambiguous) { + // TODO: double check that this is the correct inference + self.output_archive_type = .gnu; + } + var elf_file = Elf.Object{ .name = archived_file.name, .data = archived_file.contents.bytes }; + defer elf_file.deinit(allocator); + + // TODO: Do not use builtin.target like this, be more flexible! + elf_file.parse(allocator, builtin.cpu.arch) catch |err| switch (err) { + error.NotObject => break :blk, + error.OutOfMemory => return error.OutOfMemory, + error.TODOBigEndianSupport, error.TODOElf32bitSupport, error.EndOfStream => return error.TODO, + }; + + for (elf_file.symtab.items) |sym| { + switch (sym.st_info >> 4) { + elf.STB_WEAK, elf.STB_GLOBAL => { + if (!(elf.SHN_LORESERVE <= sym.st_shndx and sym.st_shndx < elf.SHN_HIRESERVE and sym.st_shndx == elf.SHN_UNDEF)) { + const symbol = Symbol{ + .name = try allocator.dupe(u8, elf_file.getString(sym.st_name)), + .file_index = file_index, + }; + + try self.symbols.append(allocator, symbol); + } + }, + else => {}, + } + } + } else if (mem.eql(u8, magic[0..Bitcode.magic.len], Bitcode.magic)) { + logger.warn("Zig ar does not currently support bitcode files, so no symbol table will be constructed for {s}.", .{archived_file.name}); + break :blk; + + // var bitcode_file = Bitcode{ .file = file, .name = archived_file.name }; + // defer bitcode_file.deinit(allocator); + + // // TODO: Do not use builtin.target like this, be more flexible! + // bitcode_file.parse(allocator, builtin.target) catch |err| switch (err) { + // error.NotObject => break :blk, + // else => |e| return e, + //}; + } else { + // TODO(TRC):Now this should assert that the magic number is what we expect it to be + // based on the parsed archive type! Not inferring what we should do based on it. + // TODO: Should be based on target cpu arch! + const magic_num = mem.readInt(u32, magic[0..], builtin.cpu.arch.endian()); + + if (magic_num == macho.MH_MAGIC or magic_num == macho.MH_MAGIC_64) { + if (self.output_archive_type == .ambiguous) { + self.output_archive_type = .darwin; + } + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); + }; + + var macho_file = MachO.Object{ .name = archived_file.name, .mtime = mtime, .contents = archived_file.contents.bytes }; + defer macho_file.deinit(allocator); + + // TODO: Should be based on target cpu arch! + macho_file.parse(allocator, builtin.cpu.arch) catch |err| switch (err) { + error.NotObject => break :blk, + error.OutOfMemory => return error.OutOfMemory, + error.UnsupportedCpuArchitecture, error.EndOfStream => return error.TODO, + }; + + if (macho_file.in_symtab) |in_symtab| { + for (in_symtab, 0..) |_, sym_index| { + const sym = macho_file.getSourceSymbol(@as(u32, @intCast(sym_index))); + if (sym != null and sym.?.ext() and sym.?.sect()) { + const symbol = Symbol{ + .name = try allocator.dupe(u8, macho_file.getSymbolName(@as(u32, @intCast(sym_index)))), + .file_index = file_index, + }; + + try self.symbols.append(allocator, symbol); + } + } + } + } else if (false) { + // TODO: Figure out the condition under which a file is a coff + // file. This was originally just an else clause - but a file + // might not contain any symbols! + var coff_file = Coff.Object{ .file = file, .name = archived_file.name }; + defer coff_file.deinit(allocator); + + coff_file.parse(allocator, builtin.target) catch |err| return err; + + for (coff_file.symtab.items) |sym| { + if (sym.storage_class == Coff.Object.IMAGE_SYM_CLASS_EXTERNAL) { + const symbol = Symbol{ + .name = try allocator.dupe(u8, sym.getName(&coff_file)), + .file_index = file_index, + }; + try self.symbols.append(allocator, symbol); + } + } + } + } + } +} + +pub fn insertFiles(self: *Archive, allocator: Allocator, file_names: []const []const u8) (InsertError || HandledIoError || CriticalError)!void { + const tracy = trace(@src()); + defer tracy.end(); + + // TODO: distribute this across n jobs in different chunks? + for (file_names) |file_name| { + // Open the file and read all of its contents + const file = try handleFileIoError(.opening, file_name, self.dir.openFile(file_name, .{ .mode = .read_only })); + defer file.close(); + + // We only need to do this because file stats don't include + // guid and uid - maybe the right solution is to integrate that into + // the std so we can call file.stat() on all platforms. + var gid: u32 = 0; + var uid: u32 = 0; + var mtime: i128 = 0; + var size: u64 = undefined; + var mode: u64 = undefined; + + // FIXME: Currently windows doesnt support the Stat struct + if (builtin.os.tag == .windows) { + const file_stats = try handleFileIoError(.stat, file_name, file.stat()); + // Convert timestamp from ns to s + mtime = file_stats.mtime; + size = file_stats.size; + mode = file_stats.mode; + } else { + const file_stats = try handleFileIoError(.stat, file_name, std.os.fstat(file.handle)); + + gid = file_stats.gid; + uid = file_stats.uid; + const mtime_full = file_stats.mtime(); + mtime = mtime_full.tv_sec * std.time.ns_per_s + mtime_full.tv_nsec; + size = @as(u64, @intCast(file_stats.size)); + mode = file_stats.mode; + } + + if (self.modifiers.update_only) { + // TODO: Write a test that checks for this functionality still working! + // TODO: Is this even correct? Shouldn't it be comparing to mtime in archive already? + if (self.stat.mtime >= mtime and !self.created) { + continue; + } + } + + if (!self.modifiers.use_real_timestamps_and_ids) { + gid = 0; + uid = 0; + mtime = 0; + // Even though it's not documented - in deterministic mode permissions are always set to: + // https://github.com/llvm-mirror/llvm/blob/2c4ca6832fa6b306ee6a7010bfb80a3f2596f824/include/llvm/Object/ArchiveWriter.h#L27 + // https://github.com/llvm-mirror/llvm/blob/2c4ca6832fa6b306ee6a7010bfb80a3f2596f824/lib/Object/ArchiveWriter.cpp#L105 + mode = 644; + } + + const timestamp = @as(u128, @intCast(@divFloor(mtime, std.time.ns_per_s))); + + // Extract critical error from error set - so IO errors can be handled seperately + const bytes_or_io_error = file.readToEndAllocOptions(allocator, std.math.maxInt(usize), size, @alignOf(u64), null) catch |e| switch (e) { + error.OutOfMemory => return error.OutOfMemory, + else => @as(IoError, @errorCast(e)), + }; + var archived_file = ArchivedFile{ + .name = try allocator.dupe(u8, fs.path.basename(file_name)), + .contents = Contents{ + .bytes = try handleFileIoError(.reading, file_name, bytes_or_io_error), + .length = size, + .mode = mode, + .timestamp = timestamp, + .gid = gid, + .uid = uid, + }, + }; + + const file_index = if (self.file_name_to_index.get(file_name)) |file_id| file_id else self.files.items.len; + + // Read symbols + if (self.modifiers.build_symbol_table) { + try self.addToSymbolTable(allocator, &archived_file, file_index, file, 0); + } + + // A trie-based datastructure would be better for this! + const getOrPutResult = try self.file_name_to_index.getOrPut(allocator, archived_file.name); + if (getOrPutResult.found_existing) { + const existing_index = getOrPutResult.value_ptr.*; + self.files.items[existing_index] = archived_file; + } else { + getOrPutResult.value_ptr.* = self.files.items.len; + try self.files.append(allocator, archived_file); + } + } +} + +pub fn parse(self: *Archive, allocator: Allocator) (ParseError || HandledIoError || CriticalError)!void { + const tracy = trace(@src()); + defer tracy.end(); + const reader = self.file.reader(); + { + // Is the magic header found at the start of the archive? + var magic: [magic_string.len]u8 = undefined; + const bytes_read = try handleFileIoError(.reading, self.name, reader.read(&magic)); + + if (bytes_read == 0) { + // Archive is empty and that is ok! + return; + } + + if (bytes_read < magic_string.len) { + return ParseError.NotArchive; + } + + const is_thin_archive = mem.eql(u8, &magic, magic_thin); + + if (is_thin_archive) + self.inferred_archive_type = .gnuthin; + + if (!(mem.eql(u8, &magic, magic_string) or is_thin_archive)) { + return ParseError.NotArchive; + } + } + + var gnu_symbol_table_contents: []u8 = undefined; + var string_table_contents: []u8 = undefined; + var has_gnu_symbol_table = false; + { + // https://www.freebsd.org/cgi/man.cgi?query=ar&sektion=5 + // Process string/symbol tables and/or try to infer archive type! + var starting_seek_pos = magic_string.len; + while (true) { + var first_line_buffer: [gnu_first_line_buffer_length]u8 = undefined; + + const has_line_to_process = result: { + const chars_read = try handleFileIoError(.reading, self.name, reader.read(&first_line_buffer)); + + if (chars_read < first_line_buffer.len) { + break :result false; + } + + break :result true; + }; + + if (!has_line_to_process) { + try handleFileIoError(.seeking, self.name, reader.context.seekTo(starting_seek_pos)); + break; + } + + if (mem.eql(u8, first_line_buffer[0..2], "//"[0..2])) { + switch (self.inferred_archive_type) { + .ambiguous => self.inferred_archive_type = .gnu, + .gnu, .gnuthin, .gnu64 => {}, + else => { + return ParseError.MalformedArchive; + }, + } + + const string_table_num_bytes_string = first_line_buffer[48..58]; + const string_table_num_bytes = try fmt.parseInt(u32, mem.trim(u8, string_table_num_bytes_string, " "), 10); + + string_table_contents = try allocator.alloc(u8, string_table_num_bytes); + + try handleFileIoError(.reading, self.name, reader.readNoEof(string_table_contents)); + break; + } else if (!has_gnu_symbol_table and first_line_buffer[0] == '/') { + has_gnu_symbol_table = true; + switch (self.inferred_archive_type) { + .ambiguous => self.inferred_archive_type = .gnu, + .gnu, .gnuthin, .gnu64 => {}, + else => { + return ParseError.MalformedArchive; + }, + } + + const symbol_table_num_bytes_string = first_line_buffer[48..58]; + const symbol_table_num_bytes = try fmt.parseInt(u32, mem.trim(u8, symbol_table_num_bytes_string, " "), 10); + + const num_symbols = try handleFileIoError(.reading, self.name, reader.readInt(u32, .Big)); + + var num_bytes_remaining = symbol_table_num_bytes - @sizeOf(u32); + + const number_array = try allocator.alloc(u32, num_symbols); + for (number_array, 0..) |_, number_index| { + number_array[number_index] = try handleFileIoError(.reading, self.name, reader.readInt(u32, .Big)); + } + defer allocator.free(number_array); + + num_bytes_remaining = num_bytes_remaining - (@sizeOf(u32) * num_symbols); + + gnu_symbol_table_contents = try allocator.alloc(u8, num_bytes_remaining); + + const contents_read = try handleFileIoError(.reading, self.name, reader.read(gnu_symbol_table_contents)); + if (contents_read < gnu_symbol_table_contents.len) { + return ParseError.MalformedArchive; + } + + var current_symbol_string = gnu_symbol_table_contents; + var current_byte: u32 = 0; + while (current_byte < gnu_symbol_table_contents.len) { + var symbol_length: u32 = 0; + var skip_length: u32 = 0; + + var found_zero = false; + for (current_symbol_string) |byte| { + if (found_zero and byte != 0) { + break; + } + + current_byte = current_byte + 1; + + if (byte == 0) { + found_zero = true; + } + + skip_length = skip_length + 1; + + if (!found_zero) { + symbol_length = symbol_length + 1; + } + } + + if (!self.modifiers.build_symbol_table) { + const symbol = Symbol{ + .name = current_symbol_string[0..symbol_length], + // Note - we don't set the final file-index here, + // we recalculate and override that later in parsing + // when we know what they are! + .file_index = number_array[self.symbols.items.len], + }; + + try self.symbols.append(allocator, symbol); + } + current_symbol_string = current_symbol_string[skip_length..]; + } + + starting_seek_pos = starting_seek_pos + first_line_buffer.len + symbol_table_num_bytes; + } else { + try handleFileIoError(.seeking, self.name, reader.context.seekTo(starting_seek_pos)); + break; + } + } + } + + var is_first = true; + + var file_offset_to_index: std.AutoArrayHashMapUnmanaged(u64, u64) = .{}; + defer file_offset_to_index.clearAndFree(allocator); + + while (true) { + const file_offset = file_offset_result: { + var current_file_offset = try handleFileIoError(.accessing, self.name, reader.context.getPos()); + // Archived files must start on even byte boundaries! + // https://www.unix.com/man-page/opensolaris/3head/ar.h/ + if (@mod(current_file_offset, 2) == 1) { + try handleFileIoError(.accessing, self.name, reader.skipBytes(1, .{})); + current_file_offset = current_file_offset + 1; + } + break :file_offset_result current_file_offset; + }; + + const archive_header = reader.readStruct(Header) catch |err| switch (err) { + error.EndOfStream => break, + else => { + return printFileIoError(.reading, self.name, err); + }, + }; + + // the lifetime of the archive headers will matched that of the parsed files (for now) + // so we can take a reference to the strings stored there directly! + var trimmed_archive_name = mem.trim(u8, &archive_header.ar_name, " "); + + // Check against gnu naming properties + const ends_with_gnu_slash = (trimmed_archive_name[trimmed_archive_name.len - 1] == '/'); + var gnu_offset_value: u32 = 0; + const starts_with_gnu_offset = trimmed_archive_name[0] == '/'; + if (starts_with_gnu_offset) { + gnu_offset_value = try fmt.parseInt(u32, trimmed_archive_name[1..trimmed_archive_name.len], 10); + } + + const must_be_gnu = ends_with_gnu_slash or starts_with_gnu_offset or has_gnu_symbol_table; + + // TODO: if modifiers.use_real_timestamps_and_ids is disabled, do we ignore this from existing archive? + // Check against llvm ar + const timestamp = try fmt.parseInt(u128, mem.trim(u8, &archive_header.ar_date, " "), 10); + const uid = try fmt.parseInt(u32, mem.trim(u8, &archive_header.ar_uid, " "), 10); + const gid = try fmt.parseInt(u32, mem.trim(u8, &archive_header.ar_gid, " "), 10); + + // Check against bsd naming properties + const starts_with_bsd_name_length = (trimmed_archive_name.len >= 2) and mem.eql(u8, trimmed_archive_name[0..2], bsd_name_length_signifier[0..2]); + const could_be_bsd = starts_with_bsd_name_length; + + // TODO: Have a proper mechanism for erroring on the wrong types of archive. + switch (self.inferred_archive_type) { + .ambiguous => { + if (must_be_gnu) { + self.inferred_archive_type = .gnu; + } else if (could_be_bsd) { + self.inferred_archive_type = .bsd; + } else { + return error.TODO; + } + }, + .gnu, .gnuthin, .gnu64 => { + if (!must_be_gnu) { + return ParseError.MalformedArchive; + } + }, + .bsd, .darwin, .darwin64 => { + if (must_be_gnu) { + return ParseError.MalformedArchive; + } + }, + else => { + if (must_be_gnu) { + return error.TODO; + } + + return error.TODO; + }, + } + + if (ends_with_gnu_slash) { + // slice-off the slash + trimmed_archive_name = trimmed_archive_name[0 .. trimmed_archive_name.len - 1]; + } + + if (starts_with_gnu_offset) { + const name_offset_in_string_table = try fmt.parseInt(u32, mem.trim(u8, trimmed_archive_name[1..trimmed_archive_name.len], " "), 10); + + // Navigate to the start of the string in the string table + const string_start = string_table_contents[name_offset_in_string_table..string_table_contents.len]; + + // Find the end of the string (which is always a newline) + const end_string_index = mem.indexOf(u8, string_start, "\n"); + if (end_string_index == null) { + return ParseError.MalformedArchive; + } + const string_full = string_start[0..end_string_index.?]; + + // String must have a forward slash before the newline, so check that + // is there and remove it as well! + if (string_full[string_full.len - 1] != '/') { + return ParseError.MalformedArchive; + } + + // Referencing the slice directly is fine as same bumb allocator is + // used as for the rest of the datastructure! + trimmed_archive_name = string_full[0 .. string_full.len - 1]; + } + + var seek_forward_amount = try fmt.parseInt(u32, mem.trim(u8, &archive_header.ar_size, " "), 10); + + // Make sure that these allocations get properly disposed of later! + if (starts_with_bsd_name_length) { + trimmed_archive_name = trimmed_archive_name[bsd_name_length_signifier.len..trimmed_archive_name.len]; + const archive_name_length = try fmt.parseInt(u32, trimmed_archive_name, 10); + + // TODO: go through int casts & don't assume that they will just work, add defensive error checking + // for them. (an internal checked cast or similar). + + if (is_first) { + // TODO: make sure this does a check on self.inferred_archive_type! + + // This could be the symbol table! So parse that here! + const current_seek_pos = try handleFileIoError(.accessing, self.name, reader.context.getPos()); + var symbol_magic_check_buffer: [bsd_symdef_longest_magic]u8 = undefined; + + // TODO: handle not reading enough characters! + const chars_read = try handleFileIoError(.reading, self.name, reader.read(&symbol_magic_check_buffer)); + + var sorted = false; + + const magic_match = magic_match_result: { + if (chars_read >= bsd_symdef_magic.len and mem.eql(u8, bsd_symdef_magic, symbol_magic_check_buffer[0..bsd_symdef_magic.len])) { + var magic_len = bsd_symdef_magic.len; + + if (chars_read >= bsd_symdef_64_magic.len and mem.eql(u8, bsd_symdef_64_magic[bsd_symdef_magic.len..], symbol_magic_check_buffer[bsd_symdef_magic.len..])) { + magic_len = bsd_symdef_64_magic.len; + } else if (chars_read >= bsd_symdef_sorted_magic.len and mem.eql(u8, bsd_symdef_sorted_magic[bsd_symdef_magic.len..], symbol_magic_check_buffer[bsd_symdef_magic.len..])) { + magic_len = bsd_symdef_sorted_magic.len; + sorted = true; + } + + if (chars_read - magic_len > 0) { + try handleFileIoError(.seeking, self.name, reader.context.seekBy(@as(i64, @intCast(magic_len)) - @as(i64, @intCast(chars_read)))); + } + + seek_forward_amount = seek_forward_amount - @as(u32, @intCast(magic_len)); + + break :magic_match_result true; + } + + break :magic_match_result false; + }; + + if (magic_match) { + // TODO: make this target arch endianess + const endianess = .Little; + + { + const current_pos = try handleFileIoError(.accessing, self.name, reader.context.getPos()); + const remainder = @as(u32, @intCast((self.inferred_archive_type.getAlignment() - current_pos % self.inferred_archive_type.getAlignment()) % self.inferred_archive_type.getAlignment())); + seek_forward_amount = seek_forward_amount - remainder; + try handleFileIoError(.accessing, self.name, reader.context.seekBy(remainder)); + } + + // TODO: error if negative (because spec defines this as a long, so should never be that large?) + const num_ranlib_bytes = try handleFileIoError(.reading, self.name, reader.readInt(IntType, endianess)); + seek_forward_amount = seek_forward_amount - @as(u32, @sizeOf(IntType)); + + // TODO: error if this doesn't divide properly? + // const num_symbols = @divExact(num_ranlib_bytes, @sizeOf(Ranlib(IntType))); + + var ranlib_bytes = try allocator.alloc(u8, @as(u32, @intCast(num_ranlib_bytes))); + + // TODO: error handling + _ = try handleFileIoError(.reading, self.name, reader.read(ranlib_bytes)); + seek_forward_amount = seek_forward_amount - @as(u32, @intCast(num_ranlib_bytes)); + + var ranlibs = mem.bytesAsSlice(Ranlib(IntType), ranlib_bytes); + + const symbol_strings_length = try handleFileIoError(.reading, self.name, reader.readInt(u32, endianess)); + // TODO: We don't really need this information, but maybe it could come in handy + // later? + _ = symbol_strings_length; + + seek_forward_amount = seek_forward_amount - @as(u32, @sizeOf(IntType)); + + const symbol_string_bytes = try allocator.alloc(u8, seek_forward_amount); + seek_forward_amount = 0; + _ = try handleFileIoError(.reading, self.name, reader.read(symbol_string_bytes)); + + if (!self.modifiers.build_symbol_table) { + for (ranlibs) |ranlib| { + const symbol_string = mem.sliceTo(symbol_string_bytes[@as(u64, @intCast(ranlib.ran_strx))..], 0); + + const symbol = Symbol{ + .name = symbol_string, + // Note - we don't set the final file-index here, + // we recalculate and override that later in parsing + // when we know what they are! + .file_index = @as(u64, @intCast(ranlib.ran_off)), + }; + + try self.symbols.append(allocator, symbol); + } + + // We have a symbol table! + } + try handleFileIoError(.seeking, self.name, reader.context.seekBy(seek_forward_amount)); + continue; + } + + try handleFileIoError(.seeking, self.name, reader.context.seekTo(current_seek_pos)); + } + + const archive_name_buffer = try allocator.alloc(u8, archive_name_length); + + try handleFileIoError(.reading, self.name, reader.readNoEof(archive_name_buffer)); + + seek_forward_amount = seek_forward_amount - archive_name_length; + + // strip null characters from name - TODO find documentation on this + // could not find documentation on this being needed, but some archivers + // seems to insert these (for alignment reasons?) + trimmed_archive_name = mem.trim(u8, archive_name_buffer, "\x00"); + } else { + const archive_name_buffer = try allocator.alloc(u8, trimmed_archive_name.len); + mem.copy(u8, archive_name_buffer, trimmed_archive_name); + trimmed_archive_name = archive_name_buffer; + } + + const parsed_file = ArchivedFile{ + .name = trimmed_archive_name, + .contents = Contents{ + .bytes = try allocator.alignedAlloc(u8, @alignOf(u64), seek_forward_amount), + .length = seek_forward_amount, + .mode = try fmt.parseInt(u32, mem.trim(u8, &archive_header.ar_mode, " "), 10), + .timestamp = timestamp, + .uid = uid, + .gid = gid, + }, + }; + + const offset_hack = try handleFileIoError(.seeking, self.name, reader.context.getPos()); + + if (self.inferred_archive_type == .gnuthin) { + var thin_file = try handleFileIoError(.opening, trimmed_archive_name, self.dir.openFile(trimmed_archive_name, .{})); + defer thin_file.close(); + + try handleFileIoError(.reading, trimmed_archive_name, thin_file.reader().readNoEof(parsed_file.contents.bytes)); + } else { + try handleFileIoError(.reading, self.name, reader.readNoEof(parsed_file.contents.bytes)); + } + + if (self.modifiers.build_symbol_table) { + const post_offset_hack = try handleFileIoError(.seeking, self.name, reader.context.getPos()); + // TODO: Actually handle these errors! + self.addToSymbolTable(allocator, &parsed_file, self.files.items.len, reader.context, @as(u32, @intCast(offset_hack))) catch { + return error.TODO; + }; + + try handleFileIoError(.seeking, self.name, reader.context.seekTo(post_offset_hack)); + } + + try self.file_name_to_index.put(allocator, trimmed_archive_name, self.files.items.len); + try file_offset_to_index.put(allocator, file_offset, self.files.items.len); + try self.files.append(allocator, parsed_file); + + is_first = false; + } + + if (is_first) { + const current_position = try handleFileIoError(.accessing, self.name, reader.context.getPos()); + if (current_position > magic_string.len) { + return ParseError.MalformedArchive; + } + } + + if (!self.modifiers.build_symbol_table) { + for (self.symbols.items) |*symbol| { + if (file_offset_to_index.get(symbol.file_index)) |file_index| { + symbol.file_index = file_index; + } else { + symbol.file_index = invalid_file_index; + } + } + } + + // Set output archive type based on inference or current os if necessary + if (self.output_archive_type == .ambiguous) { + // Set output archive type of one we might just have parsed... + self.output_archive_type = self.inferred_archive_type; + } +} + +pub const MRIParser = struct { + script: []const u8, + archive: ?Archive, + file_name: ?[]const u8, + + const CommandType = enum { + open, + create, + createthin, + addmod, + list, + delete, + extract, + save, + clear, + end, + }; + + const Self = @This(); + + pub fn init(allocator: Allocator, file: fs.File) !Self { + const self = Self{ + .script = try file.readToEndAlloc(allocator, std.math.maxInt(usize)), + .archive = null, + .file_name = null, + }; + + return self; + } + + // Returns the next token + fn getToken(iter: *mem.SplitIterator(u8)) ?[]const u8 { + while (iter.next()) |tok| { + if (mem.startsWith(u8, tok, "*")) break; + if (mem.startsWith(u8, tok, ";")) break; + return tok; + } + return null; + } + + // Returns a slice of tokens + fn getTokenLine(allocator: Allocator, iter: *mem.SplitIterator(u8)) ![]const []const u8 { + var list = std.ArrayList([]const u8).init(allocator); + errdefer list.deinit(); + + while (getToken(iter)) |tok| { + try list.append(tok); + } + return list.toOwnedSlice(); + } + + pub fn execute(self: *Self, allocator: Allocator, stdout: fs.File.Writer, stderr: fs.File.Writer) !void { + // Split the file into lines + var parser = mem.split(u8, self.script, "\n"); + + while (parser.next()) |line| { + // Split the line by spaces + var line_parser = mem.split(u8, line, " "); + + if (getToken(&line_parser)) |tok| { + var command_name = try allocator.dupe(u8, tok); + defer allocator.free(command_name); + + _ = std.ascii.lowerString(command_name, tok); + + if (std.meta.stringToEnum(CommandType, command_name)) |command| { + if (self.archive) |archive| { + switch (command) { + .addmod => { + const file_names = try getTokenLine(allocator, &line_parser); + defer allocator.free(file_names); + + try self.archive.?.insertFiles(allocator, file_names); + }, + .list => { + // TODO: verbose output + for (archive.files.items) |parsed_file| { + try stdout.print("{s}\n", .{parsed_file.name}); + } + }, + .delete => { + const file_names = try getTokenLine(allocator, &line_parser); + try self.archive.?.deleteFiles(file_names); + }, + .extract => { + const file_names = try getTokenLine(allocator, &line_parser); + try self.archive.?.extract(file_names); + }, + .save => { + try self.archive.?.finalize(allocator); + }, + .clear => { + // This is a bit of a hack but its reliable. + // Instead of clearing out unsaved changes, we re-open the current file, which overwrites the changes. + const file = try handleFileIoError(.opening, self.file_name, self.dir.openFile(self.file_name.?, .{ .mode = .read_write })); + self.archive = Archive.create(file, self.file_name.?); + + try self.archive.?.parse(allocator, stderr); + }, + .end => return, + else => { + try stderr.print( + "Archive `{s}` is currently open.\nThe command `{s}` can only be executed when no current archive is active.\n", + .{ self.file_name.?, command_name }, + ); + return error.ArchiveAlreadyOpen; + }, + } + } else { + switch (command) { + .open => { + const file_name = getToken(&line_parser).?; + + const file = try handleFileIoError(.opening, file_name, self.dir.openFile(file_name, .{ .mode = .read_write })); + self.archive = Archive.create(file, file_name); + self.file_name = file_name; + + try self.archive.?.parse(allocator, stderr); + }, + .create, .createthin => { + // TODO: Thin archives creation + const file_name = getToken(&line_parser).?; + + const file = try self.dir.createFile(file_name, .{ .read = true }); + self.archive = Archive.create(file, file_name); + self.file_name = file_name; + + try self.archive.?.parse(allocator, stderr); + }, + .end => return, + else => { + try stderr.print("No currently active archive found.\nThe command `{s}` can only be executed when there is an opened archive.\n", .{command_name}); + return error.NoCurrentArchive; + }, + } + } + } + } + } + } +}; diff --git a/src/archive/archive/zld/Coff.zig b/src/archive/archive/zld/Coff.zig new file mode 100644 index 000000000000..dae359e1e4f0 --- /dev/null +++ b/src/archive/archive/zld/Coff.zig @@ -0,0 +1,126 @@ +const Coff = @This(); + +const std = @import("std"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const coff = std.coff; +const fs = std.fs; +const log = std.log.scoped(.coff); +const mem = std.mem; + +const Allocator = mem.Allocator; +pub const Object = @import("Coff/Object.zig"); +pub const Options = @import("Coff/Options.zig"); +const ThreadPool = @import("ThreadPool.zig"); +const Zld = @import("Zld.zig"); + +pub const base_tag = Zld.Tag.coff; + +base: Zld, +options: Options, + +objects: std.ArrayListUnmanaged(Object) = .{}, + +pub fn openPath(allocator: Allocator, options: Options, thread_pool: *ThreadPool) !*Coff { + const file = try options.emit.directory.createFile(options.emit.sub_path, .{ + .truncate = true, + .read = true, + .mode = if (builtin.os.tag == .windows) 0 else 0o777, + }); + errdefer file.close(); + + const self = try createEmpty(allocator, options, thread_pool); + errdefer allocator.destroy(self); + + self.base.file = file; + + return self; +} + +fn createEmpty(gpa: Allocator, options: Options, thread_pool: *ThreadPool) !*Coff { + const self = try gpa.create(Coff); + + self.* = .{ + .base = .{ + .tag = .coff, + .allocator = gpa, + .file = undefined, + .thread_pool = thread_pool, + }, + .options = options, + }; + + return self; +} + +pub fn deinit(self: *Coff) void { + for (self.objects.items) |*object| { + object.deinit(self.base.allocator); + } + + self.objects.deinit(self.base.allocator); +} + +pub fn closeFiles(self: *const Coff) void { + for (self.objects.items) |object| { + object.file.close(); + } +} + +pub fn flush(self: *Coff) !void { + const gpa = self.base.allocator; + + var positionals = std.ArrayList([]const u8).init(gpa); + defer positionals.deinit(); + try positionals.ensureTotalCapacity(self.options.positionals.len); + + for (self.options.positionals) |obj| { + positionals.appendAssumeCapacity(obj.path); + } + + try self.parsePositionals(positionals.items); +} + +fn parsePositionals(self: *Coff, files: []const []const u8) !void { + for (files) |file_name| { + const full_path = full_path: { + var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fs.realpath(file_name, &buffer); + break :full_path try self.base.allocator.dupe(u8, path); + }; + defer self.base.allocator.free(full_path); + log.debug("parsing input file path '{s}'", .{full_path}); + + if (try self.parseObject(full_path)) continue; + + log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); + } +} + +fn parseObject(self: *Coff, path: []const u8) !bool { + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + errdefer file.close(); + + const name = try self.base.allocator.dupe(u8, path); + errdefer self.base.allocator.free(name); + + var object = Object{ + .name = name, + .file = file, + }; + + object.parse(self.base.allocator, self.options.target.cpu_arch.?) catch |err| switch (err) { + error.EndOfStream => { + object.deinit(self.base.allocator); + return false; + }, + else => |e| return e, + }; + + try self.objects.append(self.base.allocator, object); + + return true; +} diff --git a/src/archive/archive/zld/Coff/Object.zig b/src/archive/archive/zld/Coff/Object.zig new file mode 100644 index 000000000000..1c07c3508c4c --- /dev/null +++ b/src/archive/archive/zld/Coff/Object.zig @@ -0,0 +1,187 @@ +const Object = @This(); + +const std = @import("std"); +const coff = std.coff; +const mem = std.mem; +const fs = std.fs; +const assert = std.debug.assert; +const log = std.log.scoped(.coff); + +const Allocator = mem.Allocator; + +file: fs.File, +name: []const u8, + +header: CoffHeader = undefined, + +symtab: std.ArrayListUnmanaged(Symbol) = .{}, +shdrtab: std.ArrayListUnmanaged(SectionHeader) = .{}, +strtab: []u8 = undefined, + +// TODO: Make these public in std.coff +const CoffHeader = extern struct { + machine: u16, + number_of_sections: u16, + timedate_stamp: u32, + pointer_to_symbol_table: u32, + number_of_symbols: u32, + size_of_optional_header: u16, + characteristics: u16, +}; + +const IMAGE_FILE_MACHINE_I386 = 0x014c; +const IMAGE_FILE_MACHINE_IA64 = 0x0200; +const IMAGE_FILE_MACHINE_AMD64 = 0x8664; + +const SectionHeader = extern struct { + const Misc = packed union { + physical_address: u32, + virtual_size: u32, + }; + + name: [8]u8, + misc: Misc, + size_of_raw_data: u32, + pointer_to_raw_data: u32, + pointer_to_relocations: u32, + pointer_to_line_numbers: u32, + number_of_relocations: u16, + number_of_line_numbers: u16, + characteristics: u32, +}; + +const Symbol = extern struct { + name: [8]u8, + value: u32, + sect_num: i16, + type: u16, + storage_class: i8, + num_aux: u8, + + pub fn getName(self: Symbol, object: *Object) []const u8 { + if (mem.readIntNative(u32, self.name[0..4]) == 0x0) { + const offset = mem.readIntNative(u32, self.name[4..]); + return object.getString(offset); + } else { + return mem.span(@ptrCast(&self.name)); + } + } +}; + +pub const IMAGE_SYM_CLASS_END_OF_FUNCTION = 0xff; +pub const IMAGE_SYM_CLASS_NULL = 0; +pub const IMAGE_SYM_CLASS_AUTOMATIC = 1; +pub const IMAGE_SYM_CLASS_EXTERNAL = 2; +pub const IMAGE_SYM_CLASS_STATIC = 3; +pub const IMAGE_SYM_CLASS_REGISTER = 4; +pub const IMAGE_SYM_CLASS_EXTERNAL_DEF = 5; +pub const IMAGE_SYM_CLASS_LABEL = 6; +pub const IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7; +pub const IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8; +pub const IMAGE_SYM_CLASS_ARGUMENT = 9; +pub const IMAGE_SYM_CLASS_STRUCT_TAG = 10; +pub const IMAGE_SYN_CLASS_MEMBER_OF_UNION = 11; +pub const IMAGE_SYM_CLASS_UNION_TAG = 12; +pub const IMAGE_SYM_CLASS_TYPE_DEFINITION = 13; +pub const IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14; +pub const IMAGE_SYM_CLASS_ENUM_TAG = 15; +pub const IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16; +pub const IMAGE_SYM_CLASS_REGISTER_PARAM = 17; +pub const IMAGE_SYM_CLASS_BIT_FIELD = 18; +pub const IMAGE_SYM_CLASS_BLOCK = 100; +pub const IMAGE_SYM_CLASS_FUNCTION = 101; +pub const IMAGE_SYM_CLASS_END_OF_STRUCT = 102; +pub const IMAGE_SYM_CLASS_FILE = 103; +pub const IMAGE_SYM_CLASS_SECTION = 104; +pub const IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105; +pub const IMAGE_SYM_CLASS_CLR_TOKEN = 107; + +// TODO +// comptime { +// assert(@sizeOf(Symbol) == 18); +// assert(@sizeOf(CoffHeader) == 20); +// } + +pub fn deinit(self: *Object, allocator: Allocator) void { + self.symtab.deinit(allocator); + self.shdrtab.deinit(allocator); + allocator.free(self.strtab); + allocator.free(self.name); +} + +pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { + const reader = self.file.reader(); + const header = try reader.readStruct(CoffHeader); + + if (header.size_of_optional_header != 0) { + log.debug("Optional header not expected in an object file", .{}); + return error.NotObject; + } + + if (header.machine != @intFromEnum(cpu_arch.toCoffMachine())) { + log.debug("Invalid architecture {any}, expected {any}", .{ + header.machine, + cpu_arch.toCoffMachine(), + }); + return error.InvalidCpuArch; + } + self.header = header; + + try self.parseShdrs(allocator); + try self.parseSymtab(allocator); + try self.parseStrtab(allocator); +} + +fn parseShdrs(self: *Object, allocator: Allocator) !void { + try self.shdrtab.ensureTotalCapacity(allocator, self.header.number_of_sections); + + var i: usize = 0; + while (i < self.header.number_of_sections) : (i += 1) { + const section = try self.file.reader().readStruct(SectionHeader); + self.shdrtab.appendAssumeCapacity(section); + } +} + +fn parseSymtab(self: *Object, allocator: Allocator) !void { + const offset = self.header.pointer_to_symbol_table; + try self.file.seekTo(offset); + + try self.symtab.ensureTotalCapacity(allocator, self.header.number_of_symbols); + + var i: usize = 0; + var num_aux: usize = 0; + while (i < self.header.number_of_symbols) : (i += 1) { + const symbol = try self.file.reader().readStruct(Symbol); + + // Ignore symbol if it has invalid section number + if (symbol.sect_num < 1 or symbol.sect_num > self.shdrtab.items.len) { + continue; + } + + // Ignore auxillary symbols + if (num_aux > 0) { + num_aux -= 1; + continue; + } + + // Check for upcoming auxillary symbols + if (symbol.num_aux != 0) { + num_aux = symbol.num_aux; + } + + self.symtab.appendAssumeCapacity(symbol); + } +} + +fn parseStrtab(self: *Object, allocator: Allocator) !void { + const string_table_size = (try self.file.reader().readIntNative(u32)) - @sizeOf(u32); + + self.strtab = try allocator.alloc(u8, string_table_size); + _ = try self.file.reader().read(self.strtab); +} + +pub fn getString(self: *Object, off: u32) []const u8 { + const local_offset = off - @sizeOf(u32); + assert(local_offset < self.symtab.items.len); + return mem.span(@as([*:0]const u8, @ptrCast(self.strtab.ptr + local_offset))); +} diff --git a/src/archive/archive/zld/Coff/Options.zig b/src/archive/archive/zld/Coff/Options.zig new file mode 100644 index 000000000000..c01da531e1dc --- /dev/null +++ b/src/archive/archive/zld/Coff/Options.zig @@ -0,0 +1,91 @@ +const Options = @This(); + +const std = @import("std"); +const builtin = @import("builtin"); +const io = std.io; +const mem = std.mem; +const process = std.process; + +const Allocator = mem.Allocator; +const CrossTarget = std.zig.CrossTarget; +const Coff = @import("../Coff.zig"); +const Zld = @import("../Zld.zig"); + +const usage = + \\Usage: {s} [files...] + \\ + \\General Options: + \\-l[name] Specify library to link against + \\-L[path] Specify library search dir + \\-o [path] Specify output path for the final artifact + \\-h, --help Print this help and exit + \\--debug-log [scope] Turn on debugging logs for [scope] (requires zld compiled with -Dlog) +; + +emit: Zld.Emit, +output_mode: Zld.OutputMode, +target: CrossTarget, +positionals: []const Zld.LinkObject, +libs: std.StringArrayHashMap(Zld.SystemLib), +lib_dirs: []const []const u8, + +pub fn parseArgs(arena: Allocator, ctx: Zld.MainCtx) !Options { + if (ctx.args.len == 0) { + ctx.printSuccess(usage, .{ctx.cmd}); + } + + var positionals = std.ArrayList(Zld.LinkObject).init(arena); + var libs = std.StringArrayHashMap(Zld.SystemLib).init(arena); + var lib_dirs = std.ArrayList([]const u8).init(arena); + var out_path: ?[]const u8 = null; + + const Iterator = struct { + args: []const []const u8, + i: usize = 0, + fn next(it: *@This()) ?[]const u8 { + if (it.i >= it.args.len) { + return null; + } + defer it.i += 1; + return it.args[it.i]; + } + }; + var args_iter = Iterator{ .args = ctx.args }; + + while (args_iter.next()) |arg| { + if (mem.eql(u8, arg, "--help") or mem.eql(u8, arg, "-h")) { + ctx.printSuccess(usage, .{ctx.cmd}); + } else if (mem.eql(u8, arg, "--debug-log")) { + const scope = args_iter.next() orelse ctx.printFailure("Expected log scope after {s}", .{arg}); + try ctx.log_scopes.append(scope); + } else if (mem.startsWith(u8, arg, "-l")) { + try libs.put(arg[2..], .{}); + } else if (mem.startsWith(u8, arg, "-L")) { + try lib_dirs.append(arg[2..]); + } else if (mem.eql(u8, arg, "-o")) { + out_path = args_iter.next() orelse + ctx.printFailure("Expected output path after {s}", .{arg}); + } else { + try positionals.append(.{ + .path = arg, + .must_link = true, + }); + } + } + + if (positionals.items.len == 0) { + ctx.printFailure("Expected at least one input .o file", .{}); + } + + return Options{ + .emit = .{ + .directory = std.fs.cwd(), + .sub_path = out_path orelse "a.out", + }, + .target = CrossTarget.fromTarget(builtin.target), + .output_mode = .exe, + .positionals = positionals.items, + .libs = libs, + .lib_dirs = lib_dirs.items, + }; +} diff --git a/src/archive/archive/zld/Elf.zig b/src/archive/archive/zld/Elf.zig new file mode 100644 index 000000000000..c89d66f3b03b --- /dev/null +++ b/src/archive/archive/zld/Elf.zig @@ -0,0 +1,1523 @@ +const Elf = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const elf = std.elf; +const fs = std.fs; +const gc = @import("Elf/gc.zig"); +const log = std.log.scoped(.elf); +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Archive = @import("Elf/Archive.zig"); +const Atom = @import("Elf/Atom.zig"); +pub const Object = @import("Elf/Object.zig"); +pub const Options = @import("Elf/Options.zig"); +const StringTable = @import("strtab.zig").StringTable; +const ThreadPool = @import("ThreadPool.zig"); +const Zld = @import("Zld.zig"); + +pub const base_tag = Zld.Tag.elf; + +base: Zld, +options: Options, + +archives: std.ArrayListUnmanaged(Archive) = .{}, +objects: std.ArrayListUnmanaged(Object) = .{}, + +header: ?elf.Elf64_Ehdr = null, +phdrs: std.ArrayListUnmanaged(elf.Elf64_Phdr) = .{}, + +sections: std.MultiArrayList(Section) = .{}, + +strtab: StringTable(.strtab) = .{}, +shstrtab: StringTable(.shstrtab) = .{}, + +phdr_seg_index: ?u16 = null, +load_r_seg_index: ?u16 = null, +load_re_seg_index: ?u16 = null, +load_rw_seg_index: ?u16 = null, +tls_seg_index: ?u16 = null, +gnu_stack_phdr_index: ?u16 = null, + +text_sect_index: ?u16 = null, +got_sect_index: ?u16 = null, +symtab_sect_index: ?u16 = null, +strtab_sect_index: ?u16 = null, +shstrtab_sect_index: ?u16 = null, + +locals: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, +globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, +unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, + +got_entries_map: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, *Atom) = .{}, + +managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, +atom_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, + +const Section = struct { + shdr: elf.Elf64_Shdr, + last_atom: ?*Atom, +}; + +/// Special st_other value used internally by zld to mark symbol +/// as GCed. +pub const STV_GC: u8 = std.math.maxInt(u8); + +pub const SymbolWithLoc = struct { + /// Index in the respective symbol table. + sym_index: u32, + + /// null means it's a synthetic global. + file: ?u32, +}; + +const default_base_addr: u64 = 0x200000; + +pub fn openPath(allocator: Allocator, options: Options, thread_pool: *ThreadPool) !*Elf { + const file = try options.emit.directory.createFile(options.emit.sub_path, .{ + .truncate = true, + .read = true, + .mode = if (builtin.os.tag == .windows) 0 else 0o777, + }); + errdefer file.close(); + + const self = try createEmpty(allocator, options, thread_pool); + errdefer allocator.destroy(self); + + self.base.file = file; + + try self.populateMetadata(); + + return self; +} + +fn createEmpty(gpa: Allocator, options: Options, thread_pool: *ThreadPool) !*Elf { + const self = try gpa.create(Elf); + + self.* = .{ + .base = .{ + .tag = .elf, + .allocator = gpa, + .file = undefined, + .thread_pool = thread_pool, + }, + .options = options, + }; + + return self; +} + +pub fn deinit(self: *Elf) void { + for (self.managed_atoms.items) |atom| { + atom.deinit(self.base.allocator); + self.base.allocator.destroy(atom); + } + self.managed_atoms.deinit(self.base.allocator); + for (self.globals.keys()) |key| { + self.base.allocator.free(key); + } + self.atom_table.deinit(self.base.allocator); + self.got_entries_map.deinit(self.base.allocator); + self.unresolved.deinit(self.base.allocator); + self.globals.deinit(self.base.allocator); + self.locals.deinit(self.base.allocator); + self.shstrtab.deinit(self.base.allocator); + self.strtab.deinit(self.base.allocator); + self.phdrs.deinit(self.base.allocator); + self.sections.deinit(self.base.allocator); + for (self.objects.items) |*object| { + object.deinit(self.base.allocator); + } + self.objects.deinit(self.base.allocator); + for (self.archives.items) |*archive| { + archive.deinit(self.base.allocator); + } + self.archives.deinit(self.base.allocator); +} + +pub fn closeFiles(self: *const Elf) void { + for (self.archives.items) |archive| { + archive.file.close(); + } +} + +fn resolveLib( + arena: Allocator, + search_dirs: []const []const u8, + name: []const u8, + ext: []const u8, +) !?[]const u8 { + const search_name = try std.fmt.allocPrint(arena, "lib{s}{s}", .{ name, ext }); + + for (search_dirs) |dir| { + const full_path = try fs.path.join(arena, &[_][]const u8{ dir, search_name }); + + // Check if the file exists. + const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + return full_path; + } + + return null; +} + +pub fn flush(self: *Elf) !void { + var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + var lib_dirs = std.ArrayList([]const u8).init(arena); + for (self.options.lib_dirs) |dir| { + // Verify that search path actually exists + var tmp = fs.cwd().openDir(dir, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + try lib_dirs.append(dir); + } + + var libs = std.StringArrayHashMap(Zld.SystemLib).init(arena); + var lib_not_found = false; + for (self.options.libs.keys()) |lib_name| { + for (&[_][]const u8{ ".dylib", ".a" }) |ext| { + if (try resolveLib(arena, lib_dirs.items, lib_name, ext)) |full_path| { + try libs.put(full_path, self.options.libs.get(lib_name).?); + break; + } + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + } + if (lib_not_found) { + log.warn("Library search paths:", .{}); + for (lib_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + var positionals = std.ArrayList([]const u8).init(arena); + try positionals.ensureTotalCapacity(self.options.positionals.len); + for (self.options.positionals) |obj| { + positionals.appendAssumeCapacity(obj.path); + } + + try self.parsePositionals(positionals.items); + try self.parseLibs(libs.keys()); + + for (self.objects.items, 0..) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(object_id)); + } + try self.resolveSymbolsInArchives(); + try self.resolveSpecialSymbols(); + + for (self.unresolved.keys()) |ndx| { + const global = self.globals.values()[ndx]; + const object = self.objects.items[global.file.?]; + const sym_name = self.getSymbolName(global); + log.err("undefined reference to symbol '{s}'", .{sym_name}); + log.err(" first referenced in '{s}'", .{object.name}); + } + if (self.unresolved.count() > 0) { + return error.UndefinedSymbolReference; + } + + for (self.objects.items) |*object| { + try object.scanInputSections(self); + } + + for (self.objects.items, 0..) |*object, object_id| { + try object.splitIntoAtoms(self.base.allocator, @as(u16, @intCast(object_id)), self); + } + + if (self.options.gc_sections) { + try gc.gcAtoms(self); + } + + try self.setStackSize(); + try self.allocateLoadRSeg(); + try self.allocateLoadRESeg(); + try self.allocateLoadRWSeg(); + try self.allocateNonAllocSections(); + try self.allocateAtoms(); + try self.setEntryPoint(); + + { + // TODO this should be put in its own logic but probably is linked to + // C++ handling so leaving it here until I gather more knowledge on + // those special symbols. + if (self.getSectionByName(".init_array") == null) { + if (self.globals.get("__init_array_start")) |global| { + assert(global.file == null); + const sym = &self.locals.items[global.sym_index]; + sym.st_value = self.header.?.e_entry; + sym.st_shndx = self.text_sect_index.?; + } + if (self.globals.get("__init_array_end")) |global| { + assert(global.file == null); + const sym = &self.locals.items[global.sym_index]; + sym.st_value = self.header.?.e_entry; + sym.st_shndx = self.text_sect_index.?; + } + } + if (self.getSectionByName(".fini_array") == null) { + if (self.globals.get("__fini_array_start")) |global| { + assert(global.file == null); + const sym = &self.locals.items[global.sym_index]; + sym.st_value = self.header.?.e_entry; + sym.st_shndx = self.text_sect_index.?; + } + if (self.globals.get("__fini_array_end")) |global| { + assert(global.file == null); + const sym = &self.locals.items[global.sym_index]; + sym.st_value = self.header.?.e_entry; + sym.st_shndx = self.text_sect_index.?; + } + } + } + + if (build_options.enable_logging) { + self.logSymtab(); + self.logSections(); + self.logAtoms(); + } + + try self.writeAtoms(); + try self.writePhdrs(); + try self.writeSymtab(); + try self.writeStrtab(); + try self.writeShStrtab(); + try self.writeShdrs(); + try self.writeHeader(); +} + +fn populateMetadata(self: *Elf) !void { + const gpa = self.base.allocator; + if (self.header == null) { + var header = elf.Elf64_Ehdr{ + .e_ident = undefined, + .e_type = switch (self.options.output_mode) { + .exe => elf.ET.EXEC, + .lib => elf.ET.DYN, + }, + .e_machine = self.options.target.cpu_arch.?.toElfMachine(), + .e_version = 1, + .e_entry = 0, + .e_phoff = @sizeOf(elf.Elf64_Ehdr), + .e_shoff = 0, + .e_flags = 0, + .e_ehsize = @sizeOf(elf.Elf64_Ehdr), + .e_phentsize = @sizeOf(elf.Elf64_Phdr), + .e_phnum = 0, + .e_shentsize = @sizeOf(elf.Elf64_Shdr), + .e_shnum = 0, + .e_shstrndx = 0, + }; + // Magic + mem.copy(u8, header.e_ident[0..4], "\x7fELF"); + // Class + header.e_ident[4] = elf.ELFCLASS64; + // Endianness + header.e_ident[5] = elf.ELFDATA2LSB; + // ELF version + header.e_ident[6] = 1; + // OS ABI, often set to 0 regardless of target platform + // ABI Version, possibly used by glibc but not by static executables + // padding + mem.set(u8, header.e_ident[7..][0..9], 0); + self.header = header; + } + if (self.phdr_seg_index == null) { + const offset = @sizeOf(elf.Elf64_Ehdr); + const size = @sizeOf(elf.Elf64_Phdr); + self.phdr_seg_index = @as(u16, @intCast(self.phdrs.items.len)); + try self.phdrs.append(gpa, .{ + .p_type = elf.PT_PHDR, + .p_flags = elf.PF_R, + .p_offset = offset, + .p_vaddr = offset + default_base_addr, + .p_paddr = offset + default_base_addr, + .p_filesz = size, + .p_memsz = size, + .p_align = @alignOf(elf.Elf64_Phdr), + }); + } + if (self.load_r_seg_index == null) { + self.load_r_seg_index = @as(u16, @intCast(self.phdrs.items.len)); + try self.phdrs.append(gpa, .{ + .p_type = elf.PT_LOAD, + .p_flags = elf.PF_R, + .p_offset = 0, + .p_vaddr = default_base_addr, + .p_paddr = default_base_addr, + .p_filesz = @sizeOf(elf.Elf64_Ehdr), + .p_memsz = @sizeOf(elf.Elf64_Ehdr), + .p_align = 0x1000, + }); + { + const phdr = &self.phdrs.items[self.phdr_seg_index.?]; + phdr.p_filesz += @sizeOf(elf.Elf64_Phdr); + phdr.p_memsz += @sizeOf(elf.Elf64_Phdr); + } + } + if (self.load_re_seg_index == null) { + self.load_re_seg_index = @as(u16, @intCast(self.phdrs.items.len)); + try self.phdrs.append(gpa, .{ + .p_type = elf.PT_LOAD, + .p_flags = elf.PF_R | elf.PF_X, + .p_offset = 0, + .p_vaddr = default_base_addr, + .p_paddr = default_base_addr, + .p_filesz = 0, + .p_memsz = 0, + .p_align = 0x1000, + }); + { + const phdr = &self.phdrs.items[self.phdr_seg_index.?]; + phdr.p_filesz += @sizeOf(elf.Elf64_Phdr); + phdr.p_memsz += @sizeOf(elf.Elf64_Phdr); + } + } + if (self.load_rw_seg_index == null) { + self.load_rw_seg_index = @as(u16, @intCast(self.phdrs.items.len)); + try self.phdrs.append(gpa, .{ + .p_type = elf.PT_LOAD, + .p_flags = elf.PF_R | elf.PF_W, + .p_offset = 0, + .p_vaddr = default_base_addr, + .p_paddr = default_base_addr, + .p_filesz = 0, + .p_memsz = 0, + .p_align = 0x1000, + }); + { + const phdr = &self.phdrs.items[self.phdr_seg_index.?]; + phdr.p_filesz += @sizeOf(elf.Elf64_Phdr); + phdr.p_memsz += @sizeOf(elf.Elf64_Phdr); + } + } + { + _ = try self.insertSection(.{ + .sh_name = 0, + .sh_type = elf.SHT_NULL, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 0, + .sh_entsize = 0, + }, ""); + } + // TODO remove this once GC is done prior to creating synthetic sections + if (self.got_sect_index == null) { + self.got_sect_index = try self.insertSection(.{ + .sh_name = 0, + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_WRITE | elf.SHF_ALLOC, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = @alignOf(u64), + .sh_entsize = 0, + }, ".got"); + } + if (self.symtab_sect_index == null) { + self.symtab_sect_index = try self.insertSection(.{ + .sh_name = 0, + .sh_type = elf.SHT_SYMTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = @alignOf(elf.Elf64_Sym), + .sh_entsize = @sizeOf(elf.Elf64_Sym), + }, ".symtab"); + } + if (self.strtab_sect_index == null) { + try self.strtab.buffer.append(gpa, 0); + self.strtab_sect_index = try self.insertSection(.{ + .sh_name = 0, + .sh_type = elf.SHT_STRTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, + }, ".strtab"); + } + if (self.shstrtab_sect_index == null) { + try self.shstrtab.buffer.append(gpa, 0); + self.shstrtab_sect_index = try self.insertSection(.{ + .sh_name = 0, + .sh_type = elf.SHT_STRTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, + }, ".shstrtab"); + } +} + +fn getSectionPrecedence(shdr: elf.Elf64_Shdr, shdr_name: []const u8) u4 { + const flags = shdr.sh_flags; + switch (shdr.sh_type) { + elf.SHT_NULL => return 0, + elf.SHT_PREINIT_ARRAY, + elf.SHT_INIT_ARRAY, + elf.SHT_FINI_ARRAY, + => return 2, + elf.SHT_PROGBITS => if (flags & elf.SHF_ALLOC != 0) { + if (flags & elf.SHF_EXECINSTR != 0) { + return 2; + } else if (flags & elf.SHF_WRITE != 0) { + return if (flags & elf.SHF_TLS != 0) 3 else 5; + } else { + return 1; + } + } else { + if (mem.startsWith(u8, shdr_name, ".debug")) { + return 7; + } else { + return 8; + } + }, + elf.SHT_NOBITS => return if (flags & elf.SHF_TLS != 0) 4 else 6, + elf.SHT_SYMTAB => return 0xa, + elf.SHT_STRTAB => return 0xb, + else => return 0xf, + } +} + +fn insertSection(self: *Elf, shdr: elf.Elf64_Shdr, shdr_name: []const u8) !u16 { + const precedence = getSectionPrecedence(shdr, shdr_name); + // Actually, the order doesn't really matter as long as the sections are correctly + // allocated within each respective segment. Of course, it is good practice to have + // the sections sorted, but it's a useful hack we can use for the debug builds in + // self-hosted Zig compiler. + const insertion_index = for (self.sections.items(.shdr), 0..) |oshdr, i| { + const oshdr_name = self.shstrtab.getAssumeExists(oshdr.sh_name); + if (getSectionPrecedence(oshdr, oshdr_name) > precedence) break @as(u16, @intCast(i)); + } else @as(u16, @intCast(self.sections.items(.shdr).len)); + log.debug("inserting section '{s}' at index {d}", .{ + shdr_name, + insertion_index, + }); + for (&[_]*?u16{ + &self.text_sect_index, + &self.got_sect_index, + &self.symtab_sect_index, + &self.strtab_sect_index, + &self.shstrtab_sect_index, + }) |maybe_index| { + const index = maybe_index.* orelse continue; + if (insertion_index <= index) maybe_index.* = index + 1; + } + try self.sections.insert(self.base.allocator, insertion_index, .{ + .shdr = .{ + .sh_name = try self.shstrtab.insert(self.base.allocator, shdr_name), + .sh_type = shdr.sh_type, + .sh_flags = shdr.sh_flags, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = shdr.sh_info, + .sh_addralign = shdr.sh_addralign, + .sh_entsize = shdr.sh_entsize, + }, + .last_atom = null, + }); + return insertion_index; +} + +pub fn getOutputSection(self: *Elf, shdr: elf.Elf64_Shdr, shdr_name: []const u8) !?u16 { + const flags = shdr.sh_flags; + const res: ?u16 = blk: { + if (flags & elf.SHF_EXCLUDE != 0) break :blk null; + const out_name: []const u8 = name: { + switch (shdr.sh_type) { + elf.SHT_NULL => break :blk 0, + elf.SHT_PROGBITS => { + if (flags & elf.SHF_ALLOC == 0) break :name shdr_name; + if (flags & elf.SHF_EXECINSTR != 0) { + if (mem.startsWith(u8, shdr_name, ".init")) { + break :name ".init"; + } else if (mem.startsWith(u8, shdr_name, ".fini")) { + break :name ".fini"; + } else if (mem.startsWith(u8, shdr_name, ".init_array")) { + break :name ".init_array"; + } else if (mem.startsWith(u8, shdr_name, ".fini_array")) { + break :name ".fini_array"; + } else { + break :name ".text"; + } + } + if (flags & elf.SHF_WRITE != 0) { + if (flags & elf.SHF_TLS != 0) { + if (self.tls_seg_index == null) { + self.tls_seg_index = @as(u16, @intCast(self.phdrs.items.len)); + try self.phdrs.append(self.base.allocator, .{ + .p_type = elf.PT_TLS, + .p_flags = elf.PF_R, + .p_offset = 0, + .p_vaddr = default_base_addr, + .p_paddr = default_base_addr, + .p_filesz = 0, + .p_memsz = 0, + .p_align = 0, + }); + } + break :name ".tdata"; + } else if (mem.startsWith(u8, shdr_name, ".data.rel.ro")) { + break :name ".data.rel.ro"; + } else { + break :name ".data"; + } + } + break :name ".rodata"; + }, + elf.SHT_NOBITS => { + if (flags & elf.SHF_TLS != 0) { + if (self.tls_seg_index == null) { + self.tls_seg_index = @as(u16, @intCast(self.phdrs.items.len)); + try self.phdrs.append(self.base.allocator, .{ + .p_type = elf.PT_TLS, + .p_flags = elf.PF_R, + .p_offset = 0, + .p_vaddr = default_base_addr, + .p_paddr = default_base_addr, + .p_filesz = 0, + .p_memsz = 0, + .p_align = 0, + }); + } + break :name ".tbss"; + } else { + break :name ".bss"; + } + }, + else => break :name shdr_name, + } + }; + const res = self.getSectionByName(out_name) orelse try self.insertSection(shdr, out_name); + if (mem.eql(u8, out_name, ".text")) { + if (self.text_sect_index == null) { + self.text_sect_index = res; + } + } + break :blk res; + }; + return res; +} + +fn parsePositionals(self: *Elf, files: []const []const u8) !void { + for (files) |file_name| { + const full_path = full_path: { + var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fs.realpath(file_name, &buffer); + break :full_path try self.base.allocator.dupe(u8, path); + }; + defer self.base.allocator.free(full_path); + log.debug("parsing input file path '{s}'", .{full_path}); + + if (try self.parseObject(full_path)) continue; + if (try self.parseArchive(full_path)) continue; + + log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); + } +} + +fn parseLibs(self: *Elf, libs: []const []const u8) !void { + for (libs) |lib| { + log.debug("parsing lib path '{s}'", .{lib}); + if (try self.parseArchive(lib)) continue; + + log.warn("unknown filetype for a library: '{s}'", .{lib}); + } +} + +fn parseObject(self: *Elf, path: []const u8) !bool { + const gpa = self.base.allocator; + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + defer file.close(); + + const name = try gpa.dupe(u8, path); + const cpu_arch = self.options.target.cpu_arch.?; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const data = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + + var object = Object{ + .name = name, + .data = data, + }; + + object.parse(gpa, cpu_arch) catch |err| switch (err) { + error.EndOfStream, error.NotObject => { + object.deinit(self.base.allocator); + return false; + }, + else => |e| return e, + }; + + try self.objects.append(gpa, object); + + return true; +} + +fn parseArchive(self: *Elf, path: []const u8) !bool { + const gpa = self.base.allocator; + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + errdefer file.close(); + + const name = try gpa.dupe(u8, path); + const reader = file.reader(); + + var archive = Archive{ + .name = name, + .file = file, + }; + + archive.parse(gpa, reader) catch |err| switch (err) { + error.EndOfStream, error.NotArchive => { + archive.deinit(gpa); + return false; + }, + else => |e| return e, + }; + + try self.archives.append(gpa, archive); + + return true; +} + +fn resolveSymbolsInObject(self: *Elf, object_id: u16) !void { + const object = self.objects.items[object_id]; + + log.debug("resolving symbols in {s}", .{object.name}); + + for (object.symtab.items, 0..) |sym, i| { + const sym_id = @as(u32, @intCast(i)); + const sym_name = self.getSymbolName(.{ .sym_index = sym_id, .file = object_id }); + const st_bind = sym.st_info >> 4; + const st_type = sym.st_info & 0xf; + + switch (st_bind) { + elf.STB_LOCAL => { + log.debug(" (symbol '{s}' local to object; skipping...)", .{sym_name}); + continue; + }, + elf.STB_WEAK, elf.STB_GLOBAL => { + const name = try self.base.allocator.dupe(u8, sym_name); + const glob_ndx = @as(u32, @intCast(self.globals.values().len)); + const res = try self.globals.getOrPut(self.base.allocator, name); + defer if (res.found_existing) self.base.allocator.free(name); + + if (!res.found_existing) { + res.value_ptr.* = .{ + .sym_index = sym_id, + .file = object_id, + }; + if (sym.st_shndx == elf.SHN_UNDEF and st_type == elf.STT_NOTYPE) { + try self.unresolved.putNoClobber(self.base.allocator, glob_ndx, {}); + } + continue; + } + + const global = res.value_ptr.*; + const linked_obj = self.objects.items[global.file.?]; + const linked_sym = linked_obj.symtab.items[global.sym_index]; + const linked_sym_bind = linked_sym.st_info >> 4; + + if (sym.st_shndx == elf.SHN_UNDEF and st_type == elf.STT_NOTYPE) { + log.debug(" (symbol '{s}' already defined; skipping...)", .{sym_name}); + continue; + } + + if (linked_sym.st_shndx != elf.SHN_UNDEF) { + if (linked_sym_bind == elf.STB_GLOBAL and st_bind == elf.STB_GLOBAL) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + log.err(" first definition in '{s}'", .{linked_obj.name}); + log.err(" next definition in '{s}'", .{object.name}); + return error.MultipleSymbolDefinitions; + } + + if (st_bind == elf.STB_WEAK) { + log.debug(" (symbol '{s}' already defined; skipping...)", .{sym_name}); + continue; + } + } + _ = self.unresolved.fetchSwapRemove(@as(u32, @intCast(self.globals.getIndex(name).?))); + + res.value_ptr.* = .{ + .sym_index = sym_id, + .file = object_id, + }; + }, + else => { + log.err("unhandled symbol binding type: {}", .{st_bind}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolBindType; + }, + } + } +} + +fn resolveSymbolsInArchives(self: *Elf) !void { + if (self.archives.items.len == 0) return; + + var next_sym: usize = 0; + loop: while (next_sym < self.unresolved.count()) { + const global = self.globals.values()[self.unresolved.keys()[next_sym]]; + const sym_name = self.getSymbolName(global); + + for (self.archives.items) |archive| { + // Check if the entry exists in a static archive. + const offsets = archive.toc.get(sym_name) orelse { + // No hit. + continue; + }; + assert(offsets.items.len > 0); + + const object_id = @as(u16, @intCast(self.objects.items.len)); + const object = try archive.parseObject( + self.base.allocator, + self.options.target.cpu_arch.?, + offsets.items[0], + ); + try self.objects.append(self.base.allocator, object); + try self.resolveSymbolsInObject(object_id); + + continue :loop; + } + + next_sym += 1; + } +} + +fn resolveSpecialSymbols(self: *Elf) !void { + var next_sym: usize = 0; + loop: while (next_sym < self.unresolved.count()) { + const global = &self.globals.values()[self.unresolved.keys()[next_sym]]; + const sym_name = self.getSymbolName(global.*); + + if (mem.eql(u8, sym_name, "__init_array_start") or + mem.eql(u8, sym_name, "__init_array_end") or + mem.eql(u8, sym_name, "__fini_array_start") or + mem.eql(u8, sym_name, "__fini_array_end") or + mem.eql(u8, sym_name, "_DYNAMIC")) + { + const local: elf.Elf64_Sym = if (mem.eql(u8, sym_name, "_DYNAMIC")) .{ + .st_name = try self.strtab.insert(self.base.allocator, sym_name), + .st_info = elf.STB_WEAK << 4, + .st_other = 0, + .st_shndx = 0, + .st_value = 0, + .st_size = 0, + } else .{ + .st_name = try self.strtab.insert(self.base.allocator, sym_name), + .st_info = 0, + .st_other = 0, + .st_shndx = 1, // TODO should this be hardcoded? + .st_value = 0, + .st_size = 0, + }; + const sym_index = @as(u32, @intCast(self.locals.items.len)); + try self.locals.append(self.base.allocator, local); + global.* = .{ + .sym_index = sym_index, + .file = null, + }; + _ = self.unresolved.fetchSwapRemove(@as(u32, @intCast(self.globals.getIndex(sym_name).?))); + + continue :loop; + } + + next_sym += 1; + } +} + +pub fn createGotAtom(self: *Elf, target: SymbolWithLoc) !*Atom { + if (self.got_sect_index == null) { + self.got_sect_index = try self.insertSection(.{ + .sh_name = 0, + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_WRITE | elf.SHF_ALLOC, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = @alignOf(u64), + .sh_entsize = 0, + }, ".got"); + } + + log.debug("creating GOT atom for target {}", .{target}); + + const atom = try Atom.createEmpty(self.base.allocator); + errdefer { + atom.deinit(self.base.allocator); + self.base.allocator.destroy(atom); + } + try self.managed_atoms.append(self.base.allocator, atom); + + atom.file = null; + atom.size = @sizeOf(u64); + atom.alignment = @alignOf(u64); + + var code = try self.base.allocator.alloc(u8, @sizeOf(u64)); + defer self.base.allocator.free(code); + mem.set(u8, code, 0); + try atom.code.appendSlice(self.base.allocator, code); + + const tsym_name = self.getSymbolName(target); + const r_sym = @as(u64, @intCast(target.sym_index)) << 32; + const r_addend: i64 = target.file orelse -1; + const r_info = r_sym | elf.R_X86_64_64; + try atom.relocs.append(self.base.allocator, .{ + .r_offset = 0, + .r_info = r_info, + .r_addend = r_addend, + }); + + const tmp_name = try std.fmt.allocPrint(self.base.allocator, ".got.{s}", .{tsym_name}); + defer self.base.allocator.free(tmp_name); + const sym_index = @as(u32, @intCast(self.locals.items.len)); + try self.locals.append(self.base.allocator, .{ + .st_name = try self.strtab.insert(self.base.allocator, tmp_name), + .st_info = (elf.STB_LOCAL << 4) | elf.STT_OBJECT, + .st_other = 1, + .st_shndx = 0, + .st_value = 0, + .st_size = @sizeOf(u64), + }); + atom.sym_index = sym_index; + + try self.atom_table.putNoClobber(self.base.allocator, atom.sym_index, atom); + try self.addAtomToSection(atom, self.got_sect_index.?); + + return atom; +} + +pub fn addAtomToSection(self: *Elf, atom: *Atom, sect_id: u16) !void { + const sym = atom.getSymbolPtr(self); + sym.st_shndx = sect_id; + var section = self.sections.get(sect_id); + if (section.shdr.sh_size > 0) { + section.last_atom.?.next = atom; + atom.prev = section.last_atom.?; + } + section.last_atom = atom; + const aligned_end_addr = mem.alignForwardGeneric(u64, section.shdr.sh_size, atom.alignment); + const padding = aligned_end_addr - section.shdr.sh_size; + section.shdr.sh_size += padding + atom.size; + section.shdr.sh_addralign = @max(section.shdr.sh_addralign, atom.alignment); + self.sections.set(sect_id, section); +} + +fn allocateSection(self: *Elf, shdr: *elf.Elf64_Shdr, phdr: *elf.Elf64_Phdr) !void { + const base_addr = phdr.p_vaddr + phdr.p_memsz; + shdr.sh_addr = mem.alignForwardGeneric(u64, base_addr, shdr.sh_addralign); + const p_memsz = shdr.sh_addr + shdr.sh_size - base_addr; + + const base_offset = phdr.p_offset + phdr.p_filesz; + shdr.sh_offset = mem.alignForwardGeneric(u64, base_offset, shdr.sh_addralign); + const p_filesz = shdr.sh_offset + shdr.sh_size - base_offset; + + if (shdr.sh_type == elf.SHT_NOBITS) { + log.debug("allocating section '{s}' from 0x{x} to 0x{x} (no physical size)", .{ + self.shstrtab.getAssumeExists(shdr.sh_name), + shdr.sh_addr, + shdr.sh_addr + shdr.sh_size, + }); + } else { + log.debug("allocating section '{s}' from 0x{x} to 0x{x} (0x{x} - 0x{x})", .{ + self.shstrtab.getAssumeExists(shdr.sh_name), + shdr.sh_addr, + shdr.sh_addr + shdr.sh_size, + shdr.sh_offset, + shdr.sh_offset + shdr.sh_size, + }); + phdr.p_filesz += p_filesz; + } + + phdr.p_memsz += p_memsz; +} + +const SegmentBase = struct { + offset: u64, + vaddr: u64, + init_size: u64 = 0, + alignment: ?u32 = null, +}; + +fn allocateSegment(self: *Elf, phdr_ndx: u16, shdr_ndxs: []const ?u16, base: SegmentBase) !void { + const phdr = &self.phdrs.items[phdr_ndx]; + + var min_align: u64 = 0; + for (shdr_ndxs) |maybe_shdr_ndx| { + const shdr_ndx = maybe_shdr_ndx orelse continue; + const shdr = self.sections.items(.shdr)[shdr_ndx]; + min_align = @max(min_align, shdr.sh_addralign); + } + + const p_align = base.alignment orelse min_align; + const p_offset = mem.alignForwardGeneric(u64, base.offset, min_align); + const p_vaddr = mem.alignForwardGeneric(u64, base.vaddr, p_align) + @rem(p_offset, p_align); + + phdr.p_offset = p_offset; + phdr.p_vaddr = p_vaddr; + phdr.p_paddr = p_vaddr; + phdr.p_filesz = base.init_size; + phdr.p_memsz = base.init_size; + phdr.p_align = p_align; + + // This assumes ordering of section headers matches ordering of sections in file + // so that the segments are contiguous in memory. + for (shdr_ndxs) |maybe_shdr_ndx| { + const shdr_ndx = maybe_shdr_ndx orelse continue; + const shdr = &self.sections.items(.shdr)[shdr_ndx]; + try self.allocateSection(shdr, phdr); + } + + log.debug("allocating segment of type {x} and flags {x}:", .{ phdr.p_type, phdr.p_flags }); + log.debug(" in file from 0x{x} to 0x{x}", .{ phdr.p_offset, phdr.p_offset + phdr.p_filesz }); + log.debug(" in memory from 0x{x} to 0x{x}", .{ phdr.p_vaddr, phdr.p_vaddr + phdr.p_memsz }); +} + +fn allocateLoadRSeg(self: *Elf) !void { + const init_size = @sizeOf(elf.Elf64_Ehdr) + self.phdrs.items.len * @sizeOf(elf.Elf64_Phdr); + try self.allocateSegment(self.load_r_seg_index.?, &.{ + self.getSectionByName(".rodata"), + }, .{ + .offset = 0, + .vaddr = default_base_addr, + .init_size = init_size, + .alignment = 0x1000, + }); +} + +fn allocateLoadRESeg(self: *Elf) !void { + const prev_seg = self.phdrs.items[self.load_r_seg_index.?]; + try self.allocateSegment(self.load_re_seg_index.?, &.{ + self.getSectionByName(".text"), + self.getSectionByName(".init"), + self.getSectionByName(".init_array"), + self.getSectionByName(".fini"), + self.getSectionByName(".fini_array"), + }, .{ + .offset = prev_seg.p_offset + prev_seg.p_filesz, + .vaddr = prev_seg.p_vaddr + prev_seg.p_memsz, + .alignment = 0x1000, + }); + + if (self.tls_seg_index) |tls_seg_index| blk: { + if (self.getSectionByName(".tdata")) |_| break :blk; // TLS segment contains tdata section, hence it will be part of RW + const phdr = self.phdrs.items[self.load_re_seg_index.?]; + try self.allocateSegment(tls_seg_index, &.{ + self.getSectionByName(".tdata"), + self.getSectionByName(".tbss"), + }, .{ + .offset = phdr.p_offset + phdr.p_filesz, + .vaddr = phdr.p_vaddr + phdr.p_memsz, + }); + } +} + +fn allocateLoadRWSeg(self: *Elf) !void { + const base: SegmentBase = base: { + if (self.tls_seg_index) |tls_seg_index| blk: { + if (self.getSectionByName(".tdata")) |_| break :blk; + const prev_seg = self.phdrs.items[tls_seg_index]; + break :base .{ + .offset = prev_seg.p_offset + prev_seg.p_filesz, + .vaddr = prev_seg.p_vaddr + prev_seg.p_memsz, + .alignment = 0x1000, + }; + } + const prev_seg = self.phdrs.items[self.load_re_seg_index.?]; + break :base .{ + .offset = prev_seg.p_offset + prev_seg.p_filesz, + .vaddr = prev_seg.p_vaddr + prev_seg.p_memsz, + .alignment = 0x1000, + }; + }; + try self.allocateSegment(self.load_rw_seg_index.?, &.{ + self.getSectionByName(".tdata"), + self.getSectionByName(".data.rel.ro"), + self.getSectionByName(".got"), + self.getSectionByName(".data"), + self.getSectionByName(".bss"), + }, base); + + const phdr = self.phdrs.items[self.load_rw_seg_index.?]; + + if (self.getSectionByName(".tdata")) |_| { + try self.allocateSegment(self.tls_seg_index.?, &.{ + self.getSectionByName(".tdata"), + self.getSectionByName(".tbss"), + }, .{ + .offset = phdr.p_offset, + .vaddr = phdr.p_vaddr, + }); + } +} + +fn allocateNonAllocSections(self: *Elf) !void { + var offset: u64 = 0; + for (self.sections.items(.shdr)) |*shdr| { + defer { + offset = shdr.sh_offset + shdr.sh_size; + } + + if (shdr.sh_type == elf.SHT_NULL) continue; + if (shdr.sh_flags & elf.SHF_ALLOC != 0) continue; + + shdr.sh_offset = mem.alignForwardGeneric(u64, offset, shdr.sh_addralign); + log.debug("setting '{s}' non-alloc section's offsets from 0x{x} to 0x{x}", .{ + self.shstrtab.getAssumeExists(shdr.sh_name), + shdr.sh_offset, + shdr.sh_offset + shdr.sh_size, + }); + } +} + +fn allocateAtoms(self: *Elf) !void { + const slice = self.sections.slice(); + for (slice.items(.last_atom), 0..) |last_atom, i| { + var atom = last_atom orelse continue; + const shdr_ndx = @as(u16, @intCast(i)); + const shdr = slice.items(.shdr)[shdr_ndx]; + + // Find the first atom + while (atom.prev) |prev| { + atom = prev; + } + + log.debug("allocating atoms in '{s}' section", .{self.shstrtab.getAssumeExists(shdr.sh_name)}); + + var base_addr: u64 = shdr.sh_addr; + while (true) { + base_addr = mem.alignForwardGeneric(u64, base_addr, atom.alignment); + + const sym = atom.getSymbolPtr(self); + sym.st_value = base_addr; + sym.st_shndx = shdr_ndx; + sym.st_size = atom.size; + + log.debug(" atom '{s}' allocated from 0x{x} to 0x{x}", .{ + atom.getName(self), + base_addr, + base_addr + atom.size, + }); + + // Update each symbol contained within the TextBlock + for (atom.contained.items) |sym_at_off| { + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); + contained_sym.st_value = base_addr + sym_at_off.offset; + contained_sym.st_shndx = shdr_ndx; + } + + base_addr += atom.size; + + if (atom.next) |next| { + atom = next; + } else break; + } + } +} + +pub fn logAtom(self: *Elf, atom: *const Atom, comptime logger: anytype) void { + const sym = atom.getSymbol(self); + const sym_name = atom.getName(self); + logger.debug(" ATOM(%{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({?}) in sect({d})", .{ + atom.sym_index, + sym_name, + sym.st_value, + sym.st_size, + atom.alignment, + atom.file, + sym.st_shndx, + }); + + for (atom.contained.items) |sym_off| { + const inner_sym = self.getSymbol(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + const inner_sym_name = self.getSymbolName(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ + sym_off.sym_index, + inner_sym_name, + inner_sym.st_value, + sym_off.offset, + }); + } +} + +fn logAtoms(self: *Elf) void { + const slice = self.sections.slice(); + for (slice.items(.last_atom), 0..) |last_atom, i| { + var atom = last_atom orelse continue; + const ndx = @as(u16, @intCast(i)); + const shdr = slice.items(.shdr)[ndx]; + + log.debug(">>> {s}", .{self.shstrtab.getAssumeExists(shdr.sh_name)}); + + while (atom.prev) |prev| { + atom = prev; + } + + while (true) { + self.logAtom(atom, log); + if (atom.next) |next| { + atom = next; + } else break; + } + } +} + +fn writeAtoms(self: *Elf) !void { + const slice = self.sections.slice(); + for (slice.items(.last_atom), 0..) |last_atom, i| { + var atom = last_atom orelse continue; + const shdr_ndx = @as(u16, @intCast(i)); + const shdr = slice.items(.shdr)[shdr_ndx]; + + // TODO zero prefill .bss and .tbss if have presence in file + if (shdr.sh_type == elf.SHT_NOBITS) continue; + + // Find the first atom + while (atom.prev) |prev| { + atom = prev; + } + + log.debug("writing atoms in '{s}' section", .{self.shstrtab.getAssumeExists(shdr.sh_name)}); + + var buffer = try self.base.allocator.alloc(u8, shdr.sh_size); + defer self.base.allocator.free(buffer); + mem.set(u8, buffer, 0); + + while (true) { + const sym = atom.getSymbol(self); + try atom.resolveRelocs(self); + const off = sym.st_value - shdr.sh_addr; + + log.debug(" writing atom '{s}' at offset 0x{x}", .{ atom.getName(self), shdr.sh_offset + off }); + + mem.copy(u8, buffer[off..][0..atom.size], atom.code.items); + + if (atom.next) |next| { + atom = next; + } else break; + } + + try self.base.file.pwriteAll(buffer, shdr.sh_offset); + } +} + +fn setEntryPoint(self: *Elf) !void { + if (self.options.output_mode != .exe) return; + const global = try self.getEntryPoint(); + const sym = self.getSymbol(global); + self.header.?.e_entry = sym.st_value; +} + +fn setStackSize(self: *Elf) !void { + const stack_size = self.options.stack_size orelse return; + const gnu_stack_phdr_index = self.gnu_stack_phdr_index orelse blk: { + const gnu_stack_phdr_index = @as(u16, @intCast(self.phdrs.items.len)); + try self.phdrs.append(self.base.allocator, .{ + .p_type = elf.PT_GNU_STACK, + .p_flags = elf.PF_R | elf.PF_W, + .p_offset = 0, + .p_vaddr = 0, + .p_paddr = 0, + .p_filesz = 0, + .p_memsz = 0, + .p_align = 0, + }); + self.gnu_stack_phdr_index = gnu_stack_phdr_index; + break :blk gnu_stack_phdr_index; + }; + const phdr = &self.phdrs.items[gnu_stack_phdr_index]; + phdr.p_memsz = stack_size; +} + +fn writeSymtab(self: *Elf) !void { + const offset: u64 = blk: { + const shdr = self.sections.items(.shdr)[self.symtab_sect_index.? - 1]; + break :blk shdr.sh_offset + shdr.sh_size; + }; + const shdr = &self.sections.items(.shdr)[self.symtab_sect_index.?]; + + var symtab = std.ArrayList(elf.Elf64_Sym).init(self.base.allocator); + defer symtab.deinit(); + try symtab.ensureUnusedCapacity(1); + symtab.appendAssumeCapacity(.{ + .st_name = 0, + .st_info = 0, + .st_other = 0, + .st_shndx = 0, + .st_value = 0, + .st_size = 0, + }); + + for (self.objects.items) |object| { + for (object.symtab.items, 0..) |sym, sym_id| { + if (sym.st_name == 0) continue; + const st_bind = sym.st_info >> 4; + const st_type = sym.st_info & 0xf; + if (st_bind != elf.STB_LOCAL) continue; + if (st_type == elf.STT_SECTION) continue; + if (st_type == elf.STT_NOTYPE) continue; + if (sym.st_other == @intFromEnum(elf.STV.INTERNAL)) continue; + if (sym.st_other == @intFromEnum(elf.STV.HIDDEN)) continue; + if (sym.st_other == STV_GC) continue; + + const sym_name = object.getSymbolName(@as(u32, @intCast(sym_id))); + var out_sym = sym; + out_sym.st_name = try self.strtab.insert(self.base.allocator, sym_name); + try symtab.append(out_sym); + } + } + + for (self.locals.items) |sym| { + const st_bind = sym.st_info >> 4; + if (st_bind != elf.STB_LOCAL) continue; + if (sym.st_other == @intFromEnum(elf.STV.INTERNAL)) continue; + if (sym.st_other == @intFromEnum(elf.STV.HIDDEN)) continue; + if (sym.st_other == STV_GC) continue; + try symtab.append(sym); + } + + // Denote start of globals + shdr.sh_info = @as(u32, @intCast(symtab.items.len)); + try symtab.ensureUnusedCapacity(self.globals.count()); + for (self.globals.values()) |global| { + var sym = self.getSymbol(global); + assert(sym.st_name > 0); + if (sym.st_other == STV_GC) continue; + // TODO refactor + if (sym.st_info >> 4 == elf.STB_LOCAL) continue; + const sym_name = self.getSymbolName(global); + sym.st_name = try self.strtab.insert(self.base.allocator, sym_name); + symtab.appendAssumeCapacity(sym); + } + + shdr.sh_offset = mem.alignForwardGeneric(u64, offset, @alignOf(elf.Elf64_Sym)); + shdr.sh_size = symtab.items.len * @sizeOf(elf.Elf64_Sym); + log.debug("writing '{s}' contents from 0x{x} to 0x{x}", .{ + self.shstrtab.getAssumeExists(shdr.sh_name), + shdr.sh_offset, + shdr.sh_offset + shdr.sh_size, + }); + try self.base.file.pwriteAll(mem.sliceAsBytes(symtab.items), shdr.sh_offset); +} + +fn writeStrtab(self: *Elf) !void { + const offset: u64 = blk: { + const shdr = self.sections.items(.shdr)[self.strtab_sect_index.? - 1]; + break :blk shdr.sh_offset + shdr.sh_size; + }; + const buffer = try self.strtab.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(buffer); + const shdr = &self.sections.items(.shdr)[self.strtab_sect_index.?]; + shdr.sh_offset = offset; + shdr.sh_size = buffer.len; + log.debug("writing '{s}' contents from 0x{x} to 0x{x}", .{ + self.shstrtab.getAssumeExists(shdr.sh_name), + shdr.sh_offset, + shdr.sh_offset + shdr.sh_size, + }); + try self.base.file.pwriteAll(buffer, shdr.sh_offset); +} + +fn writeShStrtab(self: *Elf) !void { + const offset: u64 = blk: { + const shdr = self.sections.items(.shdr)[self.shstrtab_sect_index.? - 1]; + break :blk shdr.sh_offset + shdr.sh_size; + }; + const buffer = try self.shstrtab.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(buffer); + const shdr = &self.sections.items(.shdr)[self.shstrtab_sect_index.?]; + shdr.sh_offset = offset; + shdr.sh_size = buffer.len; + log.debug("writing '.shstrtab' contents from 0x{x} to 0x{x}", .{ + shdr.sh_offset, + shdr.sh_offset + shdr.sh_size, + }); + try self.base.file.pwriteAll(buffer, shdr.sh_offset); +} + +fn writePhdrs(self: *Elf) !void { + const phdrs_size = self.phdrs.items.len * @sizeOf(elf.Elf64_Phdr); + log.debug("writing program headers from 0x{x} to 0x{x}", .{ + self.header.?.e_phoff, + self.header.?.e_phoff + phdrs_size, + }); + try self.base.file.pwriteAll(mem.sliceAsBytes(self.phdrs.items), self.header.?.e_phoff); +} + +fn writeShdrs(self: *Elf) !void { + self.sections.items(.shdr)[self.symtab_sect_index.?].sh_link = self.strtab_sect_index.?; + const offset: u64 = blk: { + const shdr = self.sections.items(.shdr)[self.sections.len - 1]; + break :blk shdr.sh_offset + shdr.sh_size; + }; + const shdrs_size = self.sections.items(.shdr).len * @sizeOf(elf.Elf64_Shdr); + const e_shoff = mem.alignForwardGeneric(u64, offset, @alignOf(elf.Elf64_Shdr)); + log.debug("writing section headers from 0x{x} to 0x{x}", .{ + e_shoff, + e_shoff + shdrs_size, + }); + try self.base.file.pwriteAll(mem.sliceAsBytes(self.sections.items(.shdr)), e_shoff); + self.header.?.e_shoff = e_shoff; +} + +fn writeHeader(self: *Elf) !void { + self.header.?.e_shstrndx = self.shstrtab_sect_index.?; + self.header.?.e_phnum = @as(u16, @intCast(self.phdrs.items.len)); + self.header.?.e_shnum = @as(u16, @intCast(self.sections.items(.shdr).len)); + log.debug("writing ELF header {} at 0x{x}", .{ self.header.?, 0 }); + try self.base.file.pwriteAll(mem.asBytes(&self.header.?), 0); +} + +pub fn getSectionByName(self: *Elf, name: []const u8) ?u16 { + for (self.sections.items(.shdr), 0..) |shdr, i| { + const this_name = self.shstrtab.getAssumeExists(shdr.sh_name); + if (mem.eql(u8, this_name, name)) return @as(u16, @intCast(i)); + } else return null; +} + +/// Returns pointer-to-symbol described by `sym_with_loc` descriptor. +pub fn getSymbolPtr(self: *Elf, sym_with_loc: SymbolWithLoc) *elf.Elf64_Sym { + if (sym_with_loc.file) |file| { + const object = &self.objects.items[file]; + return &object.symtab.items[sym_with_loc.sym_index]; + } else { + return &self.locals.items[sym_with_loc.sym_index]; + } +} + +/// Returns symbol described by `sym_with_loc` descriptor. +pub fn getSymbol(self: *Elf, sym_with_loc: SymbolWithLoc) elf.Elf64_Sym { + return self.getSymbolPtr(sym_with_loc).*; +} + +/// Returns name of the symbol described by `sym_with_loc` descriptor. +pub fn getSymbolName(self: *Elf, sym_with_loc: SymbolWithLoc) []const u8 { + if (sym_with_loc.file) |file| { + const object = self.objects.items[file]; + return object.getSymbolName(sym_with_loc.sym_index); + } else { + const sym = self.locals.items[sym_with_loc.sym_index]; + return self.strtab.getAssumeExists(sym.st_name); + } +} + +/// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor. +/// Returns null on failure. +pub fn getAtomForSymbol(self: *Elf, sym_with_loc: SymbolWithLoc) ?*Atom { + if (sym_with_loc.file) |file| { + const object = self.objects.items[file]; + return object.getAtomForSymbol(sym_with_loc.sym_index); + } else { + return self.atom_table.get(sym_with_loc.sym_index); + } +} + +/// Returns symbol localtion corresponding to the set entry point. +/// Asserts output mode is executable. +pub fn getEntryPoint(self: Elf) error{EntrypointNotFound}!SymbolWithLoc { + assert(self.options.output_mode == .exe); + const entry_name = self.options.entry orelse "_start"; + const global = self.globals.get(entry_name) orelse { + log.err("entrypoint '{s}' not found", .{entry_name}); + return error.EntrypointNotFound; + }; + return global; +} + +fn logSections(self: Elf) void { + log.debug("sections:", .{}); + for (self.sections.items(.shdr), 0..) |shdr, i| { + log.debug(" sect({d}): {s} @{x}, sizeof({x})", .{ + i, + self.shstrtab.getAssumeExists(shdr.sh_name), + shdr.sh_offset, + shdr.sh_size, + }); + } +} + +fn logSymtab(self: Elf) void { + for (self.objects.items) |object| { + log.debug("locals in {s}", .{object.name}); + for (object.symtab.items, 0..) |sym, i| { + // const st_type = sym.st_info & 0xf; + const st_bind = sym.st_info >> 4; + // if (st_bind != elf.STB_LOCAL or st_type != elf.STT_SECTION) continue; + if (st_bind != elf.STB_LOCAL) continue; + log.debug(" {d}: {s}: {}", .{ i, object.getSymbolName(@as(u32, @intCast(i))), sym }); + } + } + + log.debug("globals:", .{}); + for (self.globals.values()) |global| { + if (global.file) |file| { + const object = self.objects.items[file]; + const sym = object.symtab.items[global.sym_index]; + log.debug(" {d}: {s}: 0x{x}, {s}", .{ + global.sym_index, + object.getSymbolName(global.sym_index), + sym.st_value, + object.name, + }); + } else { + const sym = self.locals.items[global.sym_index]; + log.debug(" {d}: {s}: 0x{x}", .{ + global.sym_index, + self.strtab.getAssumeExists(sym.st_name), + sym.st_value, + }); + } + } +} diff --git a/src/archive/archive/zld/Elf/Archive.zig b/src/archive/archive/zld/Elf/Archive.zig new file mode 100644 index 000000000000..5d7c28221935 --- /dev/null +++ b/src/archive/archive/zld/Elf/Archive.zig @@ -0,0 +1,211 @@ +const Archive = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const elf = std.elf; +const fs = std.fs; +const log = std.log.scoped(.elf); +const mem = std.mem; + +const Allocator = mem.Allocator; +const Object = @import("Object.zig"); + +file: fs.File, +name: []const u8, + +/// Parsed table of contents. +/// Each symbol name points to a list of all definition +/// sites within the current static archive. +toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{}, + +extnames_strtab: std.ArrayListUnmanaged(u8) = .{}, + +// Archive files start with the ARMAG identifying string. Then follows a +// `struct ar_hdr', and as many bytes of member file data as its `ar_size' +// member indicates, for each member file. +/// String that begins an archive file. +const ARMAG: *const [SARMAG:0]u8 = "!\n"; +/// Size of that string. +const SARMAG: u4 = 8; + +/// String in ar_fmag at the end of each header. +const ARFMAG: *const [2:0]u8 = "`\n"; + +const SYM64NAME: *const [7:0]u8 = "/SYM64/"; + +const ar_hdr = extern struct { + /// Member file name, sometimes / terminated. + ar_name: [16]u8, + + /// File date, decimal seconds since Epoch. + ar_date: [12]u8, + + /// User ID, in ASCII format. + ar_uid: [6]u8, + + /// Group ID, in ASCII format. + ar_gid: [6]u8, + + /// File mode, in ASCII octal. + ar_mode: [8]u8, + + /// File size, in ASCII decimal. + ar_size: [10]u8, + + /// Always contains ARFMAG. + ar_fmag: [2]u8, + + fn date(self: ar_hdr) !u64 { + const value = getValue(&self.ar_date); + return std.fmt.parseInt(u64, value, 10); + } + + fn size(self: ar_hdr) !u32 { + const value = getValue(&self.ar_size); + return std.fmt.parseInt(u32, value, 10); + } + + fn getValue(raw: []const u8) []const u8 { + return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)}); + } + + fn read(reader: anytype) !ar_hdr { + const header = try reader.readStruct(ar_hdr); + if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) { + log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag }); + return error.NotArchive; + } + return header; + } +}; + +pub fn deinit(self: *Archive, allocator: Allocator) void { + self.extnames_strtab.deinit(allocator); + for (self.toc.keys()) |*key| { + allocator.free(key.*); + } + for (self.toc.values()) |*value| { + value.deinit(allocator); + } + self.toc.deinit(allocator); + allocator.free(self.name); +} + +pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { + const magic = try reader.readBytesNoEof(SARMAG); + if (!mem.eql(u8, &magic, ARMAG)) { + log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); + return error.NotArchive; + } + + { + // Parse lookup table + const hdr = try ar_hdr.read(reader); + const size = try hdr.size(); + const ar_name = ar_hdr.getValue(&hdr.ar_name); + + if (!mem.eql(u8, ar_name, "/")) { + log.err("expected symbol lookup table as first data section; instead found {s}", .{&hdr.ar_name}); + return error.NoSymbolLookupTableInArchive; + } + + var buffer = try allocator.alloc(u8, size); + defer allocator.free(buffer); + + try reader.readNoEof(buffer); + + var inner_stream = std.io.fixedBufferStream(buffer); + var inner_reader = inner_stream.reader(); + + const nsyms = try inner_reader.readIntBig(u32); + + var offsets = std.ArrayList(u32).init(allocator); + defer offsets.deinit(); + try offsets.ensureTotalCapacity(nsyms); + + var i: usize = 0; + while (i < nsyms) : (i += 1) { + const offset = try inner_reader.readIntBig(u32); + offsets.appendAssumeCapacity(offset); + } + + i = 0; + var pos: usize = try inner_stream.getPos(); + while (i < nsyms) : (i += 1) { + const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(buffer.ptr + pos)), 0); + const owned_name = try allocator.dupe(u8, sym_name); + const res = try self.toc.getOrPut(allocator, owned_name); + defer if (res.found_existing) allocator.free(owned_name); + + if (!res.found_existing) { + res.value_ptr.* = .{}; + } + + try res.value_ptr.append(allocator, offsets.items[i]); + pos += sym_name.len + 1; + } + } + + blk: { + // Try parsing extended names table + const hdr = try ar_hdr.read(reader); + const size = try hdr.size(); + const name = ar_hdr.getValue(&hdr.ar_name); + + if (!mem.eql(u8, name, "//")) { + break :blk; + } + + var buffer = try allocator.alloc(u8, size); + defer allocator.free(buffer); + + try reader.readNoEof(buffer); + try self.extnames_strtab.appendSlice(allocator, buffer); + } + + try reader.context.seekTo(0); +} + +fn getExtName(self: Archive, off: u32) []const u8 { + assert(off < self.extnames_strtab.items.len); + return mem.sliceTo(@as([*:'\n']const u8, @ptrCast(self.extnames_strtab.items.ptr + off)), 0); +} + +pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, offset: u32) !Object { + const reader = self.file.reader(); + try reader.context.seekTo(offset); + + const hdr = try ar_hdr.read(reader); + const name = blk: { + const name = ar_hdr.getValue(&hdr.ar_name); + if (name[0] == '/') { + const off = try std.fmt.parseInt(u32, name[1..], 10); + break :blk self.getExtName(off); + } + break :blk name; + }; + const object_name = name[0 .. name.len - 1]; // to account for trailing '/' + + log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); + + const full_name = blk: { + var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.os.realpath(self.name, &buffer); + break :blk try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); + }; + const object_size = try hdr.size(); + const data = try allocator.allocWithOptions(u8, object_size, @alignOf(u64), null); + const amt = try reader.readAll(data); + if (amt != object_size) { + return error.Io; + } + + var object = Object{ + .name = full_name, + .data = data, + }; + + try object.parse(allocator, cpu_arch); + + return object; +} diff --git a/src/archive/archive/zld/Elf/Atom.zig b/src/archive/archive/zld/Elf/Atom.zig new file mode 100644 index 000000000000..4e14bafed9b5 --- /dev/null +++ b/src/archive/archive/zld/Elf/Atom.zig @@ -0,0 +1,381 @@ +const Atom = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const elf = std.elf; +const log = std.log.scoped(.elf); +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Elf = @import("../Elf.zig"); + +/// Each decl always gets a local symbol with the fully qualified name. +/// The vaddr and size are found here directly. +/// The file offset is found by computing the vaddr offset from the section vaddr +/// the symbol references, and adding that to the file offset of the section. +/// If this field is 0, it means the codegen size = 0 and there is no symbol or +/// offset table entry. +sym_index: u32, + +/// null means global synthetic symbol table. +file: ?u32, + +/// List of symbols contained within this atom +contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// Code (may be non-relocated) this atom represents +code: std.ArrayListUnmanaged(u8) = .{}, + +/// Size of this atom +/// TODO is this really needed given that size is a field of a symbol? +size: u32, + +/// Alignment of this atom. Unlike in MachO, minimum alignment is 1. +alignment: u32, + +/// List of relocations belonging to this atom. +relocs: std.ArrayListUnmanaged(elf.Elf64_Rela) = .{}, + +/// Points to the previous and next neighbours +next: ?*Atom, +prev: ?*Atom, + +pub const SymbolAtOffset = struct { + sym_index: u32, + offset: u64, + + pub fn format( + self: SymbolAtOffset, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "{{ {d}: .offset = {d} }}", .{ self.sym_index, self.offset }); + } +}; + +pub fn createEmpty(allocator: Allocator) !*Atom { + const self = try allocator.create(Atom); + self.* = .{ + .sym_index = 0, + .file = undefined, + .size = 0, + .alignment = 0, + .prev = null, + .next = null, + }; + return self; +} + +pub fn deinit(self: *Atom, allocator: Allocator) void { + self.relocs.deinit(allocator); + self.code.deinit(allocator); + self.contained.deinit(allocator); +} + +pub fn format(self: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Atom {{ ", .{}); + try std.fmt.format(writer, " .sym_index = {d}, ", .{self.sym_index}); + try std.fmt.format(writer, " .file = {d}, ", .{self.file}); + try std.fmt.format(writer, " .contained = {any}, ", .{self.contained.items}); + try std.fmt.format(writer, " .code = {x}, ", .{std.fmt.fmtSliceHexLower(if (self.code.items.len > 64) + self.code.items[0..64] + else + self.code.items)}); + try std.fmt.format(writer, " .size = {d}, ", .{self.size}); + try std.fmt.format(writer, " .alignment = {d}, ", .{self.alignment}); + try std.fmt.format(writer, " .relocs = {any}, ", .{self.relocs.items}); + try std.fmt.format(writer, "}}", .{}); +} + +pub fn getSymbol(self: Atom, elf_file: *Elf) elf.Elf64_Sym { + return self.getSymbolPtr(elf_file).*; +} + +pub fn getSymbolPtr(self: Atom, elf_file: *Elf) *elf.Elf64_Sym { + return elf_file.getSymbolPtr(.{ + .sym_index = self.sym_index, + .file = self.file, + }); +} + +pub fn getSymbolWithLoc(self: Atom) Elf.SymbolWithLoc { + return .{ .sym_index = self.sym_index, .file = self.file }; +} + +pub fn getName(self: Atom, elf_file: *Elf) []const u8 { + return elf_file.getSymbolName(.{ + .sym_index = self.sym_index, + .file = self.file, + }); +} + +pub fn getTargetAtom(self: Atom, elf_file: *Elf, rel: elf.Elf64_Rela) ?*Atom { + const sym = self.getSymbol(elf_file); + const is_got_atom = if (elf_file.got_sect_index) |ndx| ndx == sym.st_shndx else false; + + const r_sym = rel.r_sym(); + const r_type = rel.r_type(); + + if (r_type == elf.R_X86_64_64 and is_got_atom) { + // Special handling as we have repurposed r_addend for out GOT atoms. + // Now, r_addend in those cases contains the index to the object file where + // the target symbol is defined. + const file: ?u32 = if (rel.r_addend > -1) @as(u32, @intCast(rel.r_addend)) else null; + return elf_file.getAtomForSymbol(.{ + .sym_index = r_sym, + .file = file, + }); + } + + const tsym_name = elf_file.getSymbolName(.{ + .sym_index = r_sym, + .file = self.file, + }); + log.debug(" (getTargetAtom: %{d}: {s}, r_type={d})", .{ r_sym, tsym_name, r_type }); + + switch (r_type) { + elf.R_X86_64_REX_GOTPCRELX, elf.R_X86_64_GOTPCRELX, elf.R_X86_64_GOTPCREL => { + const global = elf_file.globals.get(tsym_name).?; + const got_atom = elf_file.got_entries_map.get(global).?; + return got_atom; + }, + else => { + const tsym = elf_file.getSymbol(.{ + .sym_index = r_sym, + .file = self.file, + }); + const tsym_st_bind = tsym.st_info >> 4; + const tsym_st_type = tsym.st_info & 0xf; + const is_section = tsym_st_type == elf.STT_SECTION; + const is_local = is_section or tsym_st_bind == elf.STB_LOCAL; + + if (!is_local) { + const global = elf_file.globals.get(tsym_name).?; + return elf_file.getAtomForSymbol(global); + } + + return elf_file.getAtomForSymbol(.{ + .sym_index = r_sym, + .file = self.file, + }); + }, + } +} + +fn getTargetAddress(self: Atom, r_sym: u32, elf_file: *Elf) u64 { + const tsym = elf_file.getSymbol(.{ + .sym_index = r_sym, + .file = self.file, + }); + const tsym_name = elf_file.getSymbolName(.{ + .sym_index = r_sym, + .file = self.file, + }); + const tsym_st_bind = tsym.st_info >> 4; + const tsym_st_type = tsym.st_info & 0xf; + const is_section = tsym_st_type == elf.STT_SECTION; + const is_local = is_section or tsym_st_bind == elf.STB_LOCAL; + log.debug(" (getTargetAddress: %{d}: {s}, local? {})", .{ r_sym, tsym_name, is_local }); + + if (!is_local) { + const global = elf_file.globals.get(tsym_name).?; + const sym = elf_file.getSymbol(global); + return sym.st_value; + } + + return tsym.st_value; +} + +pub fn resolveRelocs(self: *Atom, elf_file: *Elf) !void { + const sym = self.getSymbol(elf_file); + const sym_name = self.getName(elf_file); + log.debug("resolving relocs in atom '{s}'", .{sym_name}); + + const is_got_atom = if (elf_file.got_sect_index) |ndx| ndx == sym.st_shndx else false; + + for (self.relocs.items) |rel| { + const r_sym = rel.r_sym(); + const r_type = rel.r_type(); + + if (r_type == elf.R_X86_64_64 and is_got_atom) { + // Special handling as we have repurposed r_addend for out GOT atoms. + // Now, r_addend in those cases contains the index to the object file where + // the target symbol is defined. + const file: ?u32 = if (rel.r_addend > -1) @as(u32, @intCast(rel.r_addend)) else null; + const tsym = elf_file.getSymbol(.{ + .sym_index = r_sym, + .file = file, + }); + const target = tsym.st_value; + const tsym_name = elf_file.getSymbolName(.{ + .sym_index = r_sym, + .file = file, + }); + log.debug("R_X86_64_64: (GOT) {x}: [() => 0x{x}] ({s})", .{ rel.r_offset, target, tsym_name }); + mem.writeIntLittle(u64, self.code.items[rel.r_offset..][0..8], target); + continue; + } + + const tsym = elf_file.getSymbol(.{ + .sym_index = r_sym, + .file = self.file, + }); + const tsym_name = elf_file.getSymbolName(.{ + .sym_index = r_sym, + .file = self.file, + }); + const tsym_st_type = tsym.st_info & 0xf; + + switch (r_type) { + elf.R_X86_64_NONE => {}, + elf.R_X86_64_64 => { + const target = @as(i64, @intCast(self.getTargetAddress(r_sym, elf_file))) + rel.r_addend; + log.debug("R_X86_64_64: {x}: [() => 0x{x}] ({s})", .{ rel.r_offset, target, tsym_name }); + mem.writeIntLittle(i64, self.code.items[rel.r_offset..][0..8], target); + }, + elf.R_X86_64_PC32 => { + const source = @as(i64, @intCast(sym.st_value + rel.r_offset)); + const target = @as(i64, @intCast(self.getTargetAddress(r_sym, elf_file))); + const displacement = @as(i32, @intCast(target - source + rel.r_addend)); + log.debug("R_X86_64_PC32: {x}: [0x{x} => 0x{x}] ({s})", .{ + rel.r_offset, + source, + target, + tsym_name, + }); + mem.writeIntLittle(i32, self.code.items[rel.r_offset..][0..4], displacement); + }, + elf.R_X86_64_PLT32 => { + const source = @as(i64, @intCast(sym.st_value + rel.r_offset)); + const target = @as(i64, @intCast(self.getTargetAddress(r_sym, elf_file))); + const displacement = @as(i32, @intCast(target - source + rel.r_addend)); + log.debug("R_X86_64_PLT32: {x}: [0x{x} => 0x{x}] ({s})", .{ + rel.r_offset, + source, + target, + tsym_name, + }); + mem.writeIntLittle(i32, self.code.items[rel.r_offset..][0..4], displacement); + }, + elf.R_X86_64_32 => { + const target = self.getTargetAddress(r_sym, elf_file); + const scaled = math.cast(u32, @as(i64, @intCast(target)) + rel.r_addend) orelse { + log.err("R_X86_64_32: target value overflows 32bits", .{}); + log.err(" target value 0x{x}", .{@as(i64, @intCast(target)) + rel.r_addend}); + log.err(" target symbol {s}", .{tsym_name}); + return error.RelocationOverflow; + }; + log.debug("R_X86_64_32: {x}: [() => 0x{x}] ({s})", .{ rel.r_offset, scaled, tsym_name }); + mem.writeIntLittle(u32, self.code.items[rel.r_offset..][0..4], scaled); + }, + elf.R_X86_64_32S => { + const target = self.getTargetAddress(r_sym, elf_file); + const scaled = math.cast(i32, @as(i64, @intCast(target)) + rel.r_addend) orelse { + log.err("R_X86_64_32: target value overflows 32bits", .{}); + log.err(" target value 0x{x}", .{@as(i64, @intCast(target)) + rel.r_addend}); + log.err(" target symbol {s}", .{tsym_name}); + return error.RelocationOverflow; + }; + log.debug("R_X86_64_32S: {x}: [() => 0x{x}] ({s})", .{ rel.r_offset, scaled, tsym_name }); + mem.writeIntLittle(i32, self.code.items[rel.r_offset..][0..4], scaled); + }, + elf.R_X86_64_REX_GOTPCRELX, elf.R_X86_64_GOTPCREL => outer: { + const source = @as(i64, @intCast(sym.st_value + rel.r_offset)); + const global = elf_file.globals.get(tsym_name).?; + const got_atom = elf_file.got_entries_map.get(global) orelse { + log.debug("TODO R_X86_64_REX_GOTPCRELX unhandled: no GOT entry found", .{}); + log.debug("TODO R_X86_64_REX_GOTPCRELX: {x}: [0x{x} => 0x{x}] ({s})", .{ + rel.r_offset, + source, + tsym.st_value, + tsym_name, + }); + break :outer; + }; + const target: i64 = blk: { + if (got_atom.file) |file| { + const actual_object = elf_file.objects.items[file]; + const actual_tsym = actual_object.symtab.items[got_atom.sym_index]; + break :blk @as(i64, @intCast(actual_tsym.st_value)); + } + const actual_tsym = elf_file.locals.items[got_atom.sym_index]; + break :blk @as(i64, @intCast(actual_tsym.st_value)); + }; + log.debug("R_X86_64_REX_GOTPCRELX: {x}: [0x{x} => 0x{x}] ({s})", .{ + rel.r_offset, + source, + target, + tsym_name, + }); + const displacement = @as(i32, @intCast(target - source + rel.r_addend)); + mem.writeIntLittle(i32, self.code.items[rel.r_offset..][0..4], displacement); + }, + elf.R_X86_64_TPOFF32 => { + assert(tsym_st_type == elf.STT_TLS); + const source = sym.st_value + rel.r_offset; + const target = self.getTargetAddress(r_sym, elf_file); + const base_addr: u64 = base_addr: { + const index = if (elf_file.getSectionByName(".tbss")) |index| + index + else + elf_file.getSectionByName(".tdata").?; + const shdr = elf_file.sections.items(.shdr)[index]; + break :base_addr shdr.sh_addr + shdr.sh_size; + }; + const tls_offset = @as(u32, @truncate(@as(u64, @bitCast(-@as(i64, @intCast(base_addr - target)) + rel.r_addend)))); + log.debug("R_X86_64_TPOFF32: {x}: [0x{x} => 0x{x} (TLS)] ({s})", .{ + rel.r_offset, + source, + tls_offset, + tsym_name, + }); + mem.writeIntLittle(u32, self.code.items[rel.r_offset..][0..4], tls_offset); + }, + elf.R_X86_64_DTPOFF64 => { + const source = sym.st_value + rel.r_offset; + // TODO I believe here we should emit a dynamic relocation pointing + // at a GOT cell. + log.debug("TODO R_X86_64_DTPOFF64: {x}: [0x{x} => 0x{x}] ({s})", .{ + rel.r_offset, + source, + tsym.st_value, + tsym_name, + }); + }, + elf.R_X86_64_GOTTPOFF => { + const source = sym.st_value + rel.r_offset; + log.debug("TODO R_X86_64_GOTTPOFF: {x}: [0x{x} => 0x{x}] ({s})", .{ + rel.r_offset, + source, + tsym.st_value, + tsym_name, + }); + }, + elf.R_X86_64_TLSGD => { + const source = sym.st_value + rel.r_offset; + log.debug("TODO R_X86_64_TLSGD: {x}: [0x{x} => 0x{x}] ({s})", .{ + rel.r_offset, + source, + tsym.st_value, + tsym_name, + }); + }, + else => { + const source = sym.st_value + rel.r_offset; + log.debug("TODO {d}: {x}: [0x{x} => 0x{x}] ({s})", .{ + r_type, + rel.r_offset, + source, + tsym.st_value, + tsym_name, + }); + }, + } + } +} diff --git a/src/archive/archive/zld/Elf/Object.zig b/src/archive/archive/zld/Elf/Object.zig new file mode 100644 index 000000000000..fc35d5259c5e --- /dev/null +++ b/src/archive/archive/zld/Elf/Object.zig @@ -0,0 +1,494 @@ +const Object = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const elf = std.elf; +const fs = std.fs; +const log = std.log.scoped(.elf); +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const Elf = @import("../Elf.zig"); + +const dis_x86_64 = @import("dis_x86_64"); +const Disassembler = dis_x86_64.Disassembler; +const Instruction = dis_x86_64.Instruction; +const RegisterOrMemory = dis_x86_64.RegisterOrMemory; + +name: []const u8, +data: []align(@alignOf(u64)) const u8, +header: elf.Elf64_Ehdr = undefined, +symtab_index: ?u16 = null, + +symtab: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, + +managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, +atom_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, + +pub fn deinit(self: *Object, allocator: Allocator) void { + self.symtab.deinit(allocator); + for (self.managed_atoms.items) |atom| { + atom.deinit(allocator); + allocator.destroy(atom); + } + self.managed_atoms.deinit(allocator); + self.atom_table.deinit(allocator); + + // ZAR MODIFICATION: + // We manage memory of file ourselves in zar - so + // freeing this here for that does not make much sense. + // allocator.free(self.name); + // allocator.free(self.data); +} + +pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { + var stream = std.io.fixedBufferStream(self.data); + const reader = stream.reader(); + + self.header = try reader.readStruct(elf.Elf64_Ehdr); + + if (!mem.eql(u8, self.header.e_ident[0..4], "\x7fELF")) { + log.debug("Invalid ELF magic {s}, expected \x7fELF", .{self.header.e_ident[0..4]}); + return error.NotObject; + } + if (self.header.e_ident[elf.EI_VERSION] != 1) { + log.debug("Unknown ELF version {d}, expected 1", .{self.header.e_ident[elf.EI_VERSION]}); + return error.NotObject; + } + if (self.header.e_ident[elf.EI_DATA] != elf.ELFDATA2LSB) { + log.err("TODO big endian support", .{}); + return error.TODOBigEndianSupport; + } + if (self.header.e_ident[elf.EI_CLASS] != elf.ELFCLASS64) { + log.err("TODO 32bit support", .{}); + return error.TODOElf32bitSupport; + } + if (self.header.e_type != elf.ET.REL) { + log.debug("Invalid file type {any}, expected ET.REL", .{self.header.e_type}); + return error.NotObject; + } + // ZAR MODIFICATION: This check doesn't serve any purpose for the needs of + // zar. + _ = cpu_arch; + // if (self.header.e_machine != cpu_arch.toElfMachine()) { + // log.debug("Invalid architecture {any}, expected {any}", .{ + // self.header.e_machine, + // cpu_arch.toElfMachine(), + // }); + // return error.InvalidCpuArch; + // } + if (self.header.e_version != 1) { + log.debug("Invalid ELF version {d}, expected 1", .{self.header.e_version}); + return error.NotObject; + } + + assert(self.header.e_entry == 0); + assert(self.header.e_phoff == 0); + assert(self.header.e_phnum == 0); + + if (self.header.e_shnum == 0) return; + + for (self.getShdrs(), 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_SYMTAB => { + self.symtab_index = @intCast(i); + const nsyms = @divExact(shdr.sh_size, @sizeOf(elf.Elf64_Sym)); + try self.symtab.appendSlice(allocator, @as( + [*]const elf.Elf64_Sym, + @ptrCast(@alignCast(&self.data[shdr.sh_offset])), + )[0..nsyms]); + }, + else => {}, + }; +} + +pub fn scanInputSections(self: *Object, elf_file: *Elf) !void { + for (self.getShdrs()) |shdr| switch (shdr.sh_type) { + elf.SHT_PROGBITS, elf.SHT_NOBITS => { + const shdr_name = self.getShString(shdr.sh_name); + if (shdr.sh_flags & elf.SHF_GROUP != 0) { + log.err("section '{s}' is part of a section group", .{shdr_name}); + return error.HandleSectionGroups; + } + + const tshdr_ndx = (try elf_file.getOutputSection(shdr, shdr_name)) orelse { + log.debug("unhandled section", .{}); + continue; + }; + const out_shdr = elf_file.sections.items(.shdr)[tshdr_ndx]; + log.debug("mapping '{s}' into output sect({d}, '{s}')", .{ + shdr_name, + tshdr_ndx, + elf_file.shstrtab.getAssumeExists(out_shdr.sh_name), + }); + }, + else => {}, + }; +} + +pub fn splitIntoAtoms(self: *Object, allocator: Allocator, object_id: u16, elf_file: *Elf) !void { + log.debug("parsing '{s}' into atoms", .{self.name}); + + var symbols_by_shndx = std.AutoHashMap(u16, std.ArrayList(u32)).init(allocator); + defer { + var it = symbols_by_shndx.valueIterator(); + while (it.next()) |value| { + value.deinit(); + } + symbols_by_shndx.deinit(); + } + + const shdrs = self.getShdrs(); + + var rel_shdrs = std.AutoHashMap(u16, u16).init(allocator); + defer rel_shdrs.deinit(); + + for (shdrs, 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_REL, elf.SHT_RELA => { + try rel_shdrs.putNoClobber(@as(u16, @intCast(shdr.sh_info)), @as(u16, @intCast(i))); + }, + else => {}, + }; + + for (shdrs, 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_PROGBITS, elf.SHT_NOBITS => { + try symbols_by_shndx.putNoClobber(@as(u16, @intCast(i)), std.ArrayList(u32).init(allocator)); + }, + else => {}, + }; + + for (self.getSourceSymtab(), 0..) |sym, sym_id| { + if (sym.st_shndx == elf.SHN_UNDEF) continue; + if (elf.SHN_LORESERVE <= sym.st_shndx and sym.st_shndx < elf.SHN_HIRESERVE) continue; + const map = symbols_by_shndx.getPtr(sym.st_shndx) orelse continue; + try map.append(@as(u32, @intCast(sym_id))); + } + + for (shdrs, 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_PROGBITS, elf.SHT_NOBITS => { + const ndx = @as(u16, @intCast(i)); + const shdr_name = self.getShString(shdr.sh_name); + + log.debug(" parsing section '{s}'", .{shdr_name}); + + const tshdr_ndx = (try elf_file.getOutputSection(shdr, shdr_name)) orelse { + log.debug("unhandled section", .{}); + continue; + }; + + const syms = symbols_by_shndx.get(ndx).?; + + const atom = try Atom.createEmpty(allocator); + errdefer { + atom.deinit(allocator); + allocator.destroy(atom); + } + try self.managed_atoms.append(allocator, atom); + + atom.file = object_id; + atom.size = @as(u32, @intCast(shdr.sh_size)); + atom.alignment = @as(u32, @intCast(shdr.sh_addralign)); + + // TODO if --gc-sections and there is exactly one contained symbol, + // we can prune the main one. For example, in this situation we + // get something like this: + // + // .text.__udivti3 + // => __udivti3 + // + // which can be pruned to: + // + // __udivti3 + var sym_index: ?u32 = null; + + for (syms.items) |sym_id| { + const sym = self.getSourceSymbol(sym_id).?; + const is_sect_sym = sym.st_info & 0xf == elf.STT_SECTION; + if (is_sect_sym) { + const osym = self.getSymbolPtr(sym_id); + osym.* = .{ + .st_name = 0, + .st_info = (elf.STB_LOCAL << 4) | elf.STT_OBJECT, + .st_other = 0, + .st_shndx = 0, + .st_value = 0, + .st_size = sym.st_size, + }; + sym_index = sym_id; + continue; + } + try atom.contained.append(allocator, .{ + .sym_index = sym_id, + .offset = sym.st_value, + }); + try self.atom_table.putNoClobber(allocator, sym_id, atom); + } + + atom.sym_index = sym_index orelse blk: { + const index = @as(u32, @intCast(self.symtab.items.len)); + try self.symtab.append(allocator, .{ + .st_name = 0, + .st_info = (elf.STB_LOCAL << 4) | elf.STT_OBJECT, + .st_other = 0, + .st_shndx = 0, + .st_value = 0, + .st_size = atom.size, + }); + break :blk index; + }; + try self.atom_table.putNoClobber(allocator, atom.sym_index, atom); + + var code = if (shdr.sh_type == elf.SHT_NOBITS) blk: { + var code = try allocator.alloc(u8, atom.size); + mem.set(u8, code, 0); + break :blk code; + } else try allocator.dupe(u8, self.getShdrContents(ndx)); + defer allocator.free(code); + + if (rel_shdrs.get(ndx)) |rel_ndx| { + const rel_shdr = shdrs[rel_ndx]; + const raw_relocs = self.getShdrContents(rel_ndx); + + const nrelocs = @divExact(rel_shdr.sh_size, rel_shdr.sh_entsize); + try atom.relocs.ensureTotalCapacityPrecise(allocator, nrelocs); + + var count: usize = 0; + while (count < nrelocs) : (count += 1) { + const bytes = raw_relocs[count * rel_shdr.sh_entsize ..][0..rel_shdr.sh_entsize]; + var rel = blk: { + if (rel_shdr.sh_type == elf.SHT_REL) { + const rel = @as(*const elf.Elf64_Rel, @ptrCast(@alignCast(bytes))).*; + // TODO parse addend from the placeholder + // const addend = mem.readIntLittle(i32, code[rel.r_offset..][0..4]); + // break :blk .{ + // .r_offset = rel.r_offset, + // .r_info = rel.r_info, + // .r_addend = addend, + // }; + log.err("TODO need to parse addend embedded in the relocation placeholder for SHT_REL", .{}); + log.err(" for relocation {}", .{rel}); + return error.TODOParseAddendFromPlaceholder; + } + + break :blk @as(*const elf.Elf64_Rela, @ptrCast(@alignCast(bytes))).*; + }; + + // While traversing relocations, synthesize any missing atom. + // TODO synthesize PLT atoms, GOT atoms, etc. + const tsym_name = self.getSourceSymbolName(rel.r_sym()); + switch (rel.r_type()) { + elf.R_X86_64_REX_GOTPCRELX => blk: { + const global = elf_file.globals.get(tsym_name).?; + if (isDefinitionAvailable(elf_file, global)) opt: { + // Link-time constant, try to optimize it away. + var disassembler = Disassembler.init(code[rel.r_offset - 3 ..]); + const maybe_inst = disassembler.next() catch break :opt; + const inst = maybe_inst orelse break :opt; + + // TODO can we optimise anything that isn't an RM encoding? + if (inst.enc != .rm) break :opt; + const rm = inst.data.rm; + if (rm.reg_or_mem != .mem) break :opt; + if (rm.reg_or_mem.mem.base != .rip) break :opt; + const dst = rm.reg; + const src = rm.reg_or_mem; + + var stream = std.io.fixedBufferStream(code[rel.r_offset - 3 ..][0..7]); + const writer = stream.writer(); + + switch (inst.tag) { + .mov => { + // rewrite to LEA + const new_inst = Instruction{ + .tag = .lea, + .enc = .rm, + .data = Instruction.Data.rm(dst, src), + }; + try new_inst.encode(writer); + + const r_sym = rel.r_sym(); + rel.r_info = (@as(u64, @intCast(r_sym)) << 32) | elf.R_X86_64_PC32; + log.debug("rewriting R_X86_64_REX_GOTPCRELX -> R_X86_64_PC32: MOV -> LEA", .{}); + break :blk; + }, + .cmp => { + // rewrite to CMP MI encoding + const new_inst = Instruction{ + .tag = .cmp, + .enc = .mi, + .data = Instruction.Data.mi(RegisterOrMemory.reg(dst), 0x0), + }; + try new_inst.encode(writer); + + const r_sym = rel.r_sym(); + rel.r_info = (@as(u64, @intCast(r_sym)) << 32) | elf.R_X86_64_32; + rel.r_addend = 0; + log.debug("rewriting R_X86_64_REX_GOTPCRELX -> R_X86_64_32: CMP r64, r/m64 -> CMP r/m64, imm32", .{}); + + break :blk; + }, + else => {}, + } + } + + if (elf_file.got_entries_map.contains(global)) break :blk; + log.debug("R_X86_64_REX_GOTPCRELX: creating GOT atom: [() -> {s}]", .{ + tsym_name, + }); + const got_atom = try elf_file.createGotAtom(global); + try elf_file.got_entries_map.putNoClobber(allocator, global, got_atom); + }, + elf.R_X86_64_GOTPCREL => blk: { + const global = elf_file.globals.get(tsym_name).?; + if (elf_file.got_entries_map.contains(global)) break :blk; + log.debug("R_X86_64_GOTPCREL: creating GOT atom: [() -> {s}]", .{ + tsym_name, + }); + const got_atom = try elf_file.createGotAtom(global); + try elf_file.got_entries_map.putNoClobber(allocator, global, got_atom); + }, + elf.R_X86_64_GOTTPOFF => blk: { + const global = elf_file.globals.get(tsym_name).?; + if (isDefinitionAvailable(elf_file, global)) { + // Link-time constant, try to optimize it away. + var disassembler = Disassembler.init(code[rel.r_offset - 3 ..]); + const maybe_inst = disassembler.next() catch break :blk; + const inst = maybe_inst orelse break :blk; + + if (inst.enc != .rm) break :blk; + const rm = inst.data.rm; + if (rm.reg_or_mem != .mem) break :blk; + if (rm.reg_or_mem.mem.base != .rip) break :blk; + const dst = rm.reg; + + var stream = std.io.fixedBufferStream(code[rel.r_offset - 3 ..][0..7]); + const writer = stream.writer(); + + switch (inst.tag) { + .mov => { + // rewrite to MOV MI encoding + const new_inst = Instruction{ + .tag = .mov, + .enc = .mi, + .data = Instruction.Data.mi(RegisterOrMemory.reg(dst), 0x0), + }; + try new_inst.encode(writer); + + const r_sym = rel.r_sym(); + rel.r_info = (@as(u64, @intCast(r_sym)) << 32) | elf.R_X86_64_TPOFF32; + rel.r_addend = 0; + log.debug("rewriting R_X86_64_GOTTPOFF -> R_X86_64_TPOFF32: MOV r64, r/m64 -> MOV r/m64, imm32", .{}); + }, + else => {}, + } + } + }, + elf.R_X86_64_DTPOFF64 => { + const global = elf_file.globals.get(tsym_name).?; + if (isDefinitionAvailable(elf_file, global)) { + // rewrite into TPOFF32 + const r_sym = rel.r_sym(); + rel.r_info = (@as(u64, @intCast(r_sym)) << 32) | elf.R_X86_64_TPOFF32; + rel.r_addend = 0; + log.debug("rewriting R_X86_64_DTPOFF64 -> R_X86_64_TPOFF32", .{}); + } + }, + else => {}, + } + + atom.relocs.appendAssumeCapacity(rel); + } + } + + try atom.code.appendSlice(allocator, code); + try elf_file.addAtomToSection(atom, tshdr_ndx); + }, + else => {}, + }; +} + +pub inline fn getShdrs(self: Object) []const elf.Elf64_Shdr { + return @as( + [*]const elf.Elf64_Shdr, + @ptrCast(@alignCast(&self.data[self.header.e_shoff])), + )[0..self.header.e_shnum]; +} + +inline fn getShdrContents(self: Object, index: u16) []const u8 { + const shdr = self.getShdrs()[index]; + return self.data[shdr.sh_offset..][0..shdr.sh_size]; +} + +pub fn getSourceSymtab(self: Object) []const elf.Elf64_Sym { + const index = self.symtab_index orelse return &[0]elf.Elf64_Sym{}; + const shdr = self.getShdrs()[index]; + const nsyms = @divExact(shdr.sh_size, @sizeOf(elf.Elf64_Sym)); + return @as( + [*]const elf.Elf64_Sym, + @ptrCast(@alignCast(&self.data[shdr.sh_offset])), + )[0..nsyms]; +} + +pub fn getSourceStrtab(self: Object) []const u8 { + const index = self.symtab_index orelse return &[0]u8{}; + const shdr = self.getShdrs()[index]; + return self.getShdrContents(@as(u16, @intCast(shdr.sh_link))); +} + +pub fn getSourceShstrtab(self: Object) []const u8 { + return self.getShdrContents(self.header.e_shstrndx); +} + +pub fn getSourceSymbol(self: Object, index: u32) ?elf.Elf64_Sym { + const symtab = self.getSourceSymtab(); + if (index >= symtab.len) return null; + return symtab[index]; +} + +pub fn getSourceSymbolName(self: Object, index: u32) []const u8 { + const sym = self.getSourceSymtab()[index]; + if (sym.st_info & 0xf == elf.STT_SECTION) { + const shdr = self.getShdrs()[sym.st_shndx]; + return self.getShString(shdr.sh_name); + } else { + return self.getString(sym.st_name); + } +} + +pub fn getSymbolPtr(self: *Object, index: u32) *elf.Elf64_Sym { + return &self.symtab.items[index]; +} + +pub fn getSymbol(self: Object, index: u32) elf.Elf64_Sym { + return self.symtab.items[index]; +} + +pub fn getSymbolName(self: Object, index: u32) []const u8 { + const sym = self.getSymbol(index); + return self.getString(sym.st_name); +} + +pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { + return self.atom_table.get(sym_index); +} + +pub fn getString(self: Object, off: u32) []const u8 { + const strtab = self.getSourceStrtab(); + assert(off < strtab.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + off)), 0); +} + +pub fn getShString(self: Object, off: u32) []const u8 { + const shstrtab = self.getSourceShstrtab(); + assert(off < shstrtab.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(shstrtab.ptr + off)), 0); +} + +fn isDefinitionAvailable(elf_file: *Elf, global: Elf.SymbolWithLoc) bool { + const sym = if (global.file) |file| sym: { + const object = elf_file.objects.items[file]; + break :sym object.symtab.items[global.sym_index]; + } else elf_file.locals.items[global.sym_index]; + return sym.st_info & 0xf != elf.STT_NOTYPE or sym.st_shndx != elf.SHN_UNDEF; +} diff --git a/src/archive/archive/zld/Elf/Options.zig b/src/archive/archive/zld/Elf/Options.zig new file mode 100644 index 000000000000..561b452e40c4 --- /dev/null +++ b/src/archive/archive/zld/Elf/Options.zig @@ -0,0 +1,143 @@ +const Options = @This(); + +const std = @import("std"); +const builtin = @import("builtin"); +const io = std.io; +const mem = std.mem; +const process = std.process; + +const Allocator = mem.Allocator; +const CrossTarget = std.zig.CrossTarget; +const Elf = @import("../Elf.zig"); +const Zld = @import("../Zld.zig"); + +const usage = + \\Usage: {s} [files...] + \\ + \\General Options: + \\--entry=[name], -e [name] Set name of the entry point symbol + \\--gc-sections Force removal of functions and data that are unreachable by the entry point or exported symbols + \\-l[name] Specify library to link against + \\-L[path] Specify library search dir + \\--rpath=[path], -R [path] Specify runtime path + \\--shared Create dynamic library + \\-o [path] Specify output path for the final artifact + \\-z [arg] Set linker extension flags + \\ stack-size=[value] Override default stack size + \\-h, --help Print this help and exit + \\--debug-log [scope] Turn on debugging logs for [scope] (requires zld compiled with -Dlog) + \\ + \\ld.zld: supported targets: elf64-x86-64 +; + +emit: Zld.Emit, +output_mode: Zld.OutputMode, +target: CrossTarget, +positionals: []const Zld.LinkObject, +libs: std.StringArrayHashMap(Zld.SystemLib), +lib_dirs: []const []const u8, +rpath_list: []const []const u8, +stack_size: ?u64 = null, +strip: bool = false, +entry: ?[]const u8 = null, +gc_sections: bool = false, + +pub fn parseArgs(arena: Allocator, ctx: Zld.MainCtx) !Options { + if (ctx.args.len == 0) { + ctx.printSuccess(usage, .{ctx.cmd}); + } + + var positionals = std.ArrayList(Zld.LinkObject).init(arena); + var libs = std.StringArrayHashMap(Zld.SystemLib).init(arena); + var lib_dirs = std.ArrayList([]const u8).init(arena); + var rpath_list = std.ArrayList([]const u8).init(arena); + var out_path: ?[]const u8 = null; + var stack_size: ?u64 = null; + var shared: bool = false; + var gc_sections: bool = false; + var entry: ?[]const u8 = null; + + const Iterator = struct { + args: []const []const u8, + i: usize = 0, + fn next(it: *@This()) ?[]const u8 { + if (it.i >= it.args.len) { + return null; + } + defer it.i += 1; + return it.args[it.i]; + } + }; + var args_iter = Iterator{ .args = ctx.args }; + + while (args_iter.next()) |arg| { + if (mem.eql(u8, arg, "--help") or mem.eql(u8, arg, "-h")) { + ctx.printSuccess(usage, .{ctx.cmd}); + } else if (mem.eql(u8, arg, "--debug-log")) { + const scope = args_iter.next() orelse ctx.printFailure("Expected log scope after {s}", .{arg}); + try ctx.log_scopes.append(scope); + } else if (mem.startsWith(u8, arg, "-l")) { + try libs.put(arg[2..], .{}); + } else if (mem.startsWith(u8, arg, "-L")) { + try lib_dirs.append(arg[2..]); + } else if (mem.eql(u8, arg, "-o")) { + out_path = args_iter.next() orelse + ctx.printFailure("Expected output path after {s}", .{arg}); + } else if (mem.eql(u8, arg, "-z")) { + const z_arg = args_iter.next() orelse + ctx.printFailure("Expected another argument after {s}", .{arg}); + if (mem.startsWith(u8, z_arg, "stack-size=")) { + stack_size = try std.fmt.parseInt(u64, z_arg["stack-size=".len..], 10); + } else { + std.log.warn("TODO unhandled argument '-z {s}'", .{z_arg}); + } + } else if (mem.startsWith(u8, arg, "-z")) { + std.log.warn("TODO unhandled argument '-z {s}'", .{arg["-z".len..]}); + } else if (mem.eql(u8, arg, "--gc-sections")) { + gc_sections = true; + } else if (mem.eql(u8, arg, "--as-needed")) { + std.log.warn("TODO unhandled argument '--as-needed'", .{}); + } else if (mem.eql(u8, arg, "--allow-shlib-undefined")) { + std.log.warn("TODO unhandled argument '--allow-shlib-undefined'", .{}); + } else if (mem.startsWith(u8, arg, "-O")) { + std.log.warn("TODO unhandled argument '-O{s}'", .{arg["-O".len..]}); + } else if (mem.eql(u8, arg, "--shared")) { + shared = true; + } else if (mem.startsWith(u8, arg, "--rpath=")) { + try rpath_list.append(arg["--rpath=".len..]); + } else if (mem.eql(u8, arg, "-R")) { + const rpath = args_iter.next() orelse + ctx.printFailure("Expected path after {s}", .{arg}); + try rpath_list.append(rpath); + } else if (mem.startsWith(u8, arg, "--entry=")) { + entry = arg["--entry=".len..]; + } else if (mem.eql(u8, arg, "-e")) { + entry = args_iter.next() orelse ctx.printFailure("Expected name after {s}", .{arg}); + } else { + try positionals.append(.{ + .path = arg, + .must_link = true, + }); + } + } + + if (positionals.items.len == 0) { + ctx.printFailure("Expected at least one input .o file", .{}); + } + + return Options{ + .emit = .{ + .directory = std.fs.cwd(), + .sub_path = out_path orelse "a.out", + }, + .target = CrossTarget.fromTarget(builtin.target), + .output_mode = if (shared) .lib else .exe, + .positionals = positionals.items, + .libs = libs, + .lib_dirs = lib_dirs.items, + .rpath_list = rpath_list.items, + .stack_size = stack_size, + .gc_sections = gc_sections, + .entry = entry, + }; +} diff --git a/src/archive/archive/zld/Elf/gc.zig b/src/archive/archive/zld/Elf/gc.zig new file mode 100644 index 000000000000..1ca75071e3fc --- /dev/null +++ b/src/archive/archive/zld/Elf/gc.zig @@ -0,0 +1,204 @@ +const std = @import("std"); +const assert = std.debug.assert; +const elf = std.elf; +const log = std.log.scoped(.gc); +const mem = std.mem; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const Elf = @import("../Elf.zig"); + +pub fn gcAtoms(elf_file: *Elf) !void { + const gpa = elf_file.base.allocator; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + var roots = std.AutoHashMap(*Atom, void).init(arena); + try collectRoots(&roots, elf_file); + + var alive = std.AutoHashMap(*Atom, void).init(arena); + try mark(roots, &alive, elf_file); + + try prune(arena, alive, elf_file); +} + +fn removeAtomFromSection(atom: *Atom, match: u16, elf_file: *Elf) void { + var section = elf_file.sections.get(match); + + // If we want to enable GC for incremental codepath, we need to take into + // account any padding that might have been left here. + section.shdr.sh_size -= atom.size; + + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; + } else { + if (atom.prev) |prev| { + section.last_atom = prev; + } else { + // The section will be GCed in the next step. + section.last_atom = null; + section.shdr.sh_size = 0; + } + } + + elf_file.sections.set(match, section); +} + +fn collectRoots(roots: *std.AutoHashMap(*Atom, void), elf_file: *Elf) !void { + const output_mode = elf_file.options.output_mode; + + switch (output_mode) { + .exe => { + for (&[_][]const u8{ "_init", "_fini" }) |sym_name| { + const global = elf_file.globals.get(sym_name) orelse continue; + const atom = elf_file.getAtomForSymbol(global).?; + _ = try roots.getOrPut(atom); + } + const global = try elf_file.getEntryPoint(); + const atom = elf_file.getAtomForSymbol(global).?; + _ = try roots.getOrPut(atom); + }, + else => |other| { + assert(other == .lib); + for (elf_file.globals.values()) |global| { + const sym = elf_file.getSymbol(global); + if (sym.st_shndx == elf.SHN_UNDEF) continue; + const atom = elf_file.getAtomForSymbol(global).?; + _ = try roots.getOrPut(atom); + } + }, + } + + for (elf_file.objects.items) |object| { + const shdrs = object.getShdrs(); + + for (object.managed_atoms.items) |atom| { + const sym = object.getSourceSymbol(atom.sym_index) orelse continue; + const shdr = shdrs[sym.st_shndx]; + const sh_name = object.getShString(shdr.sh_name); + const is_gc_root = blk: { + if (shdr.sh_type == elf.SHT_PREINIT_ARRAY) break :blk true; + if (shdr.sh_type == elf.SHT_INIT_ARRAY) break :blk true; + if (shdr.sh_type == elf.SHT_FINI_ARRAY) break :blk true; + if (mem.startsWith(u8, ".ctors", sh_name)) break :blk true; + if (mem.startsWith(u8, ".dtors", sh_name)) break :blk true; + if (mem.startsWith(u8, ".init", sh_name)) break :blk true; + if (mem.startsWith(u8, ".fini", sh_name)) break :blk true; + if (mem.startsWith(u8, ".jcr", sh_name)) break :blk true; + if (mem.indexOf(u8, sh_name, "KEEP") != null) break :blk true; + break :blk false; + }; + if (is_gc_root) { + _ = try roots.getOrPut(atom); + } + } + } +} + +fn markLive(atom: *Atom, alive: *std.AutoHashMap(*Atom, void), elf_file: *Elf) anyerror!void { + const gop = try alive.getOrPut(atom); + if (gop.found_existing) return; + + log.debug("marking live", .{}); + elf_file.logAtom(atom, log); + + for (atom.relocs.items) |rel| { + const target_atom = atom.getTargetAtom(elf_file, rel) orelse continue; + try markLive(target_atom, alive, elf_file); + } +} + +fn mark(roots: std.AutoHashMap(*Atom, void), alive: *std.AutoHashMap(*Atom, void), elf_file: *Elf) !void { + try alive.ensureUnusedCapacity(roots.count()); + + var it = roots.keyIterator(); + while (it.next()) |root| { + try markLive(root.*, alive, elf_file); + } +} + +fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), elf_file: *Elf) !void { + // Any section that ends up here will be updated, that is, + // its size and alignment recalculated. + var gc_sections = std.AutoHashMap(u16, void).init(arena); + + for (elf_file.objects.items) |object| { + for (object.managed_atoms.items) |atom| { + if (alive.contains(atom)) continue; + + const global = atom.getSymbolWithLoc(); + const sym = atom.getSymbolPtr(elf_file); + const tshdr = elf_file.sections.items(.shdr)[sym.st_shndx]; + const tshdr_name = elf_file.shstrtab.getAssumeExists(tshdr.sh_name); + + if (sym.st_other == Elf.STV_GC) continue; + if (mem.startsWith(u8, tshdr_name, ".debug")) continue; + if (mem.startsWith(u8, tshdr_name, ".comment")) continue; + + log.debug("pruning:", .{}); + elf_file.logAtom(atom, log); + sym.st_other = Elf.STV_GC; + removeAtomFromSection(atom, sym.st_shndx, elf_file); + _ = try gc_sections.put(sym.st_shndx, {}); + + for (atom.contained.items) |sym_off| { + const inner = elf_file.getSymbolPtr(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + inner.st_other = Elf.STV_GC; + } + + if (elf_file.got_entries_map.contains(global)) { + const got_atom = elf_file.got_entries_map.get(global).?; + const got_sym = got_atom.getSymbolPtr(elf_file); + got_sym.st_other = Elf.STV_GC; + } + } + + for (elf_file.got_entries_map.keys()) |sym_loc| { + const sym = elf_file.getSymbol(sym_loc); + if (sym.st_other != Elf.STV_GC) continue; + + // TODO tombstone + const atom = elf_file.got_entries_map.get(sym_loc).?; + removeAtomFromSection(atom, sym.st_shndx, elf_file); + _ = try gc_sections.put(sym.st_shndx, {}); + } + } + + var gc_sections_it = gc_sections.iterator(); + while (gc_sections_it.next()) |entry| { + const match = entry.key_ptr.*; + var section = elf_file.sections.get(match); + if (section.shdr.sh_size == 0) continue; // Pruning happens automatically in next step. + + section.shdr.sh_addralign = 0; + section.shdr.sh_size = 0; + + var atom = section.last_atom.?; + + while (atom.prev) |prev| { + atom = prev; + } + + while (true) { + const aligned_end_addr = mem.alignForwardGeneric(u64, section.shdr.sh_size, atom.alignment); + const padding = aligned_end_addr - section.shdr.sh_size; + section.shdr.sh_size += padding + atom.size; + section.shdr.sh_addralign = @max(section.shdr.sh_addralign, atom.alignment); + + if (atom.next) |next| { + atom = next; + } else break; + } + + elf_file.sections.set(match, section); + } + + // TODO we might want to prune empty sections next +} diff --git a/src/archive/archive/zld/MachO.zig b/src/archive/archive/zld/MachO.zig new file mode 100644 index 000000000000..38ffb0eba1bc --- /dev/null +++ b/src/archive/archive/zld/MachO.zig @@ -0,0 +1,4081 @@ +const MachO = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const fmt = std.fmt; +const fs = std.fs; +const log = std.log.scoped(.macho); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; + +const aarch64 = @import("aarch64.zig"); +const dead_strip = @import("MachO/dead_strip.zig"); +const eh_frame = @import("MachO/eh_frame.zig"); +const fat = @import("MachO/fat.zig"); +const load_commands = @import("MachO/load_commands.zig"); +const thunks = @import("MachO/thunks.zig"); +const trace = @import("tracy.zig").trace; + +const Allocator = mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const Archive = @import("MachO/Archive.zig"); +const Atom = @import("MachO/Atom.zig"); +const CodeSignature = @import("MachO/CodeSignature.zig"); +const Dylib = @import("MachO/Dylib.zig"); +const DwarfInfo = @import("MachO/DwarfInfo.zig"); +const Md5 = std.crypto.hash.Md5; +pub const Object = @import("MachO/Object.zig"); +pub const Options = @import("MachO/Options.zig"); +const LibStub = @import("tapi.zig").LibStub; +const StringTable = @import("strtab.zig").StringTable; +const ThreadPool = @import("ThreadPool.zig"); +const Trie = @import("MachO/Trie.zig"); +const UnwindInfo = @import("MachO/UnwindInfo.zig"); +const Zld = @import("Zld.zig"); + +const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, MachO.SymbolWithLoc); +const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, MachO.SymbolWithLoc); +const Rebase = @import("MachO/dyld_info/Rebase.zig"); + +pub const base_tag = Zld.Tag.macho; + +const Section = struct { + header: macho.section_64, + segment_index: u8, + first_atom_index: AtomIndex, + last_atom_index: AtomIndex, +}; + +base: Zld, +options: Options, + +dyld_info_cmd: macho.dyld_info_command = .{}, +symtab_cmd: macho.symtab_command = .{}, +dysymtab_cmd: macho.dysymtab_command = .{}, +function_starts_cmd: macho.linkedit_data_command = .{ .cmd = .FUNCTION_STARTS }, +data_in_code_cmd: macho.linkedit_data_command = .{ .cmd = .DATA_IN_CODE }, +uuid_cmd: macho.uuid_command = .{ + .uuid = [_]u8{0} ** 16, +}, +codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, + +/// Page size is dependent on the target cpu architecture. +/// For x86_64 that's 4KB, whereas for aarch64, that's 16KB. +page_size: u16, + +objects: std.ArrayListUnmanaged(Object) = .{}, +archives: std.ArrayListUnmanaged(Archive) = .{}, +dylibs: std.ArrayListUnmanaged(Dylib) = .{}, +dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, +referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, + +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.MultiArrayList(Section) = .{}, + +locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, + +entry_index: ?u32 = null, +mh_execute_header_index: ?u32 = null, +dso_handle_index: ?u32 = null, +dyld_stub_binder_index: ?u32 = null, +dyld_private_sym_index: ?u32 = null, +stub_helper_preamble_sym_index: ?u32 = null, + +strtab: StringTable(.strtab) = .{}, + +tlv_ptr_entries: std.ArrayListUnmanaged(IndirectPointer) = .{}, +tlv_ptr_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, + +got_entries: std.ArrayListUnmanaged(IndirectPointer) = .{}, +got_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, + +stubs: std.ArrayListUnmanaged(IndirectPointer) = .{}, +stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, + +thunk_table: std.AutoHashMapUnmanaged(AtomIndex, thunks.ThunkIndex) = .{}, +thunks: std.ArrayListUnmanaged(thunks.Thunk) = .{}, + +atoms: std.ArrayListUnmanaged(Atom) = .{}, + +pub const AtomIndex = u32; + +pub const IndirectPointer = struct { + target: SymbolWithLoc, + atom_index: AtomIndex, + + pub fn getTargetSymbol(self: @This(), macho_file: *MachO) macho.nlist_64 { + return macho_file.getSymbol(self.target); + } + + pub fn getTargetSymbolName(self: @This(), macho_file: *MachO) []const u8 { + return macho_file.getSymbolName(self.target); + } + + pub fn getAtomSymbol(self: @This(), macho_file: *MachO) macho.nlist_64 { + const atom = macho_file.getAtom(self.atom_index); + return macho_file.getSymbol(atom.getSymbolWithLoc()); + } +}; + +pub const SymbolWithLoc = extern struct { + // Index into the respective symbol table. + sym_index: u32, + + // 0 means it's a synthetic global. + file: u32 = 0, + + pub fn getFile(self: SymbolWithLoc) ?u32 { + if (self.file == 0) return null; + return self.file - 1; + } + + pub fn eql(self: SymbolWithLoc, other: SymbolWithLoc) bool { + return self.file == other.file and self.sym_index == other.sym_index; + } +}; + +const SymbolResolver = struct { + arena: Allocator, + table: std.StringHashMap(u32), + unresolved: std.AutoArrayHashMap(u32, void), +}; + +/// Default path to dyld +const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; + +/// Default virtual memory offset corresponds to the size of __PAGEZERO segment and +/// start of __TEXT segment. +const default_pagezero_vmsize: u64 = 0x100000000; + +/// We commit 0x1000 = 4096 bytes of space to the header and +/// the table of load commands. This should be plenty for any +/// potential future extensions. +const default_headerpad_size: u32 = 0x1000; + +pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); + +pub fn openPath(allocator: Allocator, options: Options, thread_pool: *ThreadPool) !*MachO { + const file = try options.emit.directory.createFile(options.emit.sub_path, .{ + .truncate = true, + .read = true, + .mode = if (builtin.os.tag == .windows) 0 else 0o777, + }); + errdefer file.close(); + + const self = try createEmpty(allocator, options, thread_pool); + errdefer self.base.destroy(); + + self.base.file = file; + + return self; +} + +fn createEmpty(gpa: Allocator, options: Options, thread_pool: *ThreadPool) !*MachO { + const self = try gpa.create(MachO); + const cpu_arch = options.target.cpu_arch.?; + const page_size: u16 = if (cpu_arch == .aarch64) 0x4000 else 0x1000; + + self.* = .{ + .base = .{ + .tag = .macho, + .allocator = gpa, + .file = undefined, + .thread_pool = thread_pool, + }, + .options = options, + .page_size = page_size, + }; + + return self; +} + +pub fn flush(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.allocator; + var arena_allocator = ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const syslibroot = self.options.syslibroot; + const cpu_arch = self.options.target.cpu_arch.?; + const os_tag = self.options.target.os_tag.?; + const abi = self.options.target.abi.?; + + try self.atoms.append(gpa, Atom.empty); // AtomIndex at 0 is reserved as null atom + try self.strtab.buffer.append(gpa, 0); + + var lib_not_found = false; + var framework_not_found = false; + + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); + try positionals.ensureUnusedCapacity(self.options.positionals.len); + + var must_link_archives = std.StringArrayHashMap(void).init(arena); + try must_link_archives.ensureUnusedCapacity(self.options.positionals.len); + + for (self.options.positionals) |obj| { + if (must_link_archives.contains(obj.path)) continue; + if (obj.must_link) { + _ = must_link_archives.getOrPutAssumeCapacity(obj.path); + } else { + positionals.appendAssumeCapacity(obj.path); + } + } + + // Shared and static libraries passed via `-l` flag. + var lib_dirs = std.ArrayList([]const u8).init(arena); + for (self.options.lib_dirs) |dir| { + if (try resolveSearchDir(arena, dir, syslibroot)) |search_dir| { + try lib_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); + } + } + + var libs = std.StringArrayHashMap(Zld.SystemLib).init(arena); + + // Assume ld64 default -search_paths_first if no strategy specified. + const search_strategy = self.options.search_strategy orelse .paths_first; + outer: for (self.options.libs.keys()) |lib_name| { + switch (search_strategy) { + .paths_first => { + // Look in each directory for a dylib (stub first), and then for archive + for (lib_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { + if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { + try libs.put(full_path, self.options.libs.get(lib_name).?); + continue :outer; + } + } + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + }, + .dylibs_first => { + // First, look for a dylib in each search dir + for (lib_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { + if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { + try libs.put(full_path, self.options.libs.get(lib_name).?); + continue :outer; + } + } + } else for (lib_dirs.items) |dir| { + if (try resolveLib(arena, dir, lib_name, ".a")) |full_path| { + try libs.put(full_path, self.options.libs.get(lib_name).?); + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + } + }, + } + } + + if (lib_not_found) { + log.warn("Library search paths:", .{}); + for (lib_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + // frameworks + var framework_dirs = std.ArrayList([]const u8).init(arena); + for (self.options.framework_dirs) |dir| { + if (try resolveSearchDir(arena, dir, syslibroot)) |search_dir| { + try framework_dirs.append(search_dir); + } else { + log.warn("directory not found for '-F{s}'", .{dir}); + } + } + + outer: for (self.options.frameworks.keys()) |f_name| { + for (framework_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { + if (try resolveFramework(arena, dir, f_name, ext)) |full_path| { + const info = self.options.frameworks.get(f_name).?; + try libs.put(full_path, .{ + .needed = info.needed, + .weak = info.weak, + }); + continue :outer; + } + } + } else { + log.warn("framework not found for '-framework {s}'", .{f_name}); + framework_not_found = true; + } + } + + if (framework_not_found) { + log.warn("Framework search paths:", .{}); + for (framework_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + var dependent_libs = std.fifo.LinearFifo(struct { + id: Dylib.Id, + parent: u16, + }, .Dynamic).init(arena); + + try self.parsePositionals(positionals.items, syslibroot, &dependent_libs); + try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); + try self.parseLibs(libs.keys(), libs.values(), syslibroot, &dependent_libs); + try self.parseDependentLibs(syslibroot, &dependent_libs); + + var resolver = SymbolResolver{ + .arena = arena, + .table = std.StringHashMap(u32).init(arena), + .unresolved = std.AutoArrayHashMap(u32, void).init(arena), + }; + try self.resolveSymbols(&resolver); + + if (resolver.unresolved.count() > 0) { + return error.UndefinedSymbolReference; + } + if (lib_not_found) { + return error.LibraryNotFound; + } + if (framework_not_found) { + return error.FrameworkNotFound; + } + + if (self.options.output_mode == .exe) { + const entry_name = self.options.entry orelse "_main"; + const global_index = resolver.table.get(entry_name) orelse { + log.err("entrypoint '{s}' not found", .{entry_name}); + return error.MissingMainEntrypoint; + }; + self.entry_index = global_index; + } + + try self.splitIntoAtoms(); + + if (self.options.dead_strip) { + try dead_strip.gcAtoms(self); + } + + try self.createDyldPrivateAtom(); + try self.createTentativeDefAtoms(); + try self.createStubHelperPreambleAtom(); + + for (self.objects.items) |object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const header = self.sections.items(.header)[sym.n_sect - 1]; + if (header.isZerofill()) continue; + + const relocs = Atom.getAtomRelocs(self, atom_index); + try Atom.scanAtomRelocs(self, atom_index, relocs); + } + } + + try eh_frame.scanRelocs(self); + try UnwindInfo.scanRelocs(self); + + try self.createDyldStubBinderGotAtom(); + + try self.calcSectionSizes(); + + var unwind_info = UnwindInfo{ .gpa = self.base.allocator }; + defer unwind_info.deinit(); + try unwind_info.collect(self); + + try eh_frame.calcSectionSize(self, &unwind_info); + try unwind_info.calcSectionSize(self); + + try self.pruneAndSortSections(); + try self.createSegments(); + try self.allocateSegments(); + + try self.allocateSpecialSymbols(); + + if (build_options.enable_logging) { + self.logSymtab(); + self.logSegments(); + self.logSections(); + self.logAtoms(); + } + + try self.writeAtoms(); + try eh_frame.write(self, &unwind_info); + try unwind_info.write(self); + try self.writeLinkeditSegmentData(); + + // If the last section of __DATA segment is zerofill section, we need to ensure + // that the free space between the end of the last non-zerofill section of __DATA + // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will + // copy-paste this space into memory for quicker zerofill operation. + if (self.getSegmentByName("__DATA")) |data_seg_id| blk: { + var physical_zerofill_start: ?u64 = null; + const section_indexes = self.getSectionIndexes(data_seg_id); + for (self.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { + if (header.isZerofill() and header.size > 0) break; + physical_zerofill_start = header.offset + header.size; + } else break :blk; + const start = physical_zerofill_start orelse break :blk; + const linkedit = self.getLinkeditSegmentPtr(); + const size = linkedit.fileoff - start; + if (size > 0) { + log.debug("zeroing out zerofill area of length {x} at {x}", .{ size, start }); + var padding = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(padding); + mem.set(u8, padding, 0); + try self.base.file.pwriteAll(padding, start); + } + } + + const requires_codesig = blk: { + if (self.options.entitlements) |_| break :blk true; + if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; + break :blk false; + }; + var codesig: ?CodeSignature = if (requires_codesig) blk: { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = fs.path.basename(self.options.emit.sub_path); + if (self.options.entitlements) |path| { + try codesig.addEntitlements(gpa, path); + } + try self.writeCodeSignaturePadding(&codesig); + break :blk codesig; + } else null; + defer if (codesig) |*csig| csig.deinit(gpa); + + // Write load commands + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + + try self.writeSegmentHeaders(lc_writer); + const linkedit_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len - @sizeOf(macho.segment_command_64))); + + try lc_writer.writeStruct(self.dyld_info_cmd); + try lc_writer.writeStruct(self.function_starts_cmd); + try lc_writer.writeStruct(self.data_in_code_cmd); + + const symtab_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); + try lc_writer.writeStruct(self.symtab_cmd); + try lc_writer.writeStruct(self.dysymtab_cmd); + + try load_commands.writeDylinkerLC(lc_writer); + + if (self.options.output_mode == .exe) { + const seg_id = self.getSegmentByName("__TEXT").?; + const seg = self.segments.items[seg_id]; + const global = self.getEntryPoint(); + const sym = self.getSymbol(global); + try lc_writer.writeStruct(macho.entry_point_command{ + .entryoff = @as(u32, @intCast(sym.n_value - seg.vmaddr)), + .stacksize = self.options.stack_size orelse 0, + }); + } else { + assert(self.options.output_mode == .lib); + try load_commands.writeDylibIdLC(&self.options, lc_writer); + } + + try load_commands.writeRpathLCs(self.base.allocator, &self.options, lc_writer); + try lc_writer.writeStruct(macho.source_version_command{ + .version = 0, + }); + try load_commands.writeBuildVersionLC(&self.options, lc_writer); + + const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); + try lc_writer.writeStruct(self.uuid_cmd); + + try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), lc_writer); + + var codesig_cmd_offset: ?u32 = null; + if (requires_codesig) { + codesig_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); + try lc_writer.writeStruct(self.codesig_cmd); + } + + const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); + try self.base.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); + try self.writeHeader(ncmds, @as(u32, @intCast(lc_buffer.items.len))); + + try self.writeUuid(.{ + .linkedit_cmd_offset = linkedit_cmd_offset, + .symtab_cmd_offset = symtab_cmd_offset, + .uuid_cmd_offset = uuid_cmd_offset, + .codesig_cmd_offset = codesig_cmd_offset, + }); + + if (codesig) |*csig| { + try self.writeCodeSignature(csig); // code signing always comes last + + if (comptime builtin.target.isDarwin()) { + const dir = self.options.emit.directory; + const path = self.options.emit.sub_path; + try dir.copyFile(path, dir, path, .{}); + } + } +} + +fn resolveSearchDir( + arena: Allocator, + dir: []const u8, + syslibroot: ?[]const u8, +) !?[]const u8 { + var candidates = std.ArrayList([]const u8).init(arena); + + if (fs.path.isAbsolute(dir)) { + if (syslibroot) |root| { + const common_dir = if (builtin.os.tag == .windows) blk: { + // We need to check for disk designator and strip it out from dir path so + // that we can concat dir with syslibroot. + // TODO we should backport this mechanism to 'MachO.Dylib.parseDependentLibs()' + const disk_designator = fs.path.diskDesignatorWindows(dir); + + if (mem.indexOf(u8, dir, disk_designator)) |where| { + break :blk dir[where + disk_designator.len ..]; + } + + break :blk dir; + } else dir; + const full_path = try fs.path.join(arena, &[_][]const u8{ root, common_dir }); + try candidates.append(full_path); + } + } + + try candidates.append(dir); + + for (candidates.items) |candidate| { + // Verify that search path actually exists + var tmp = fs.cwd().openDir(candidate, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + return candidate; + } + + return null; +} + +fn resolveSearchDirs(arena: Allocator, dirs: []const []const u8, syslibroot: ?[]const u8, out_dirs: anytype) !void { + for (dirs) |dir| { + if (try resolveSearchDir(arena, dir, syslibroot)) |search_dir| { + try out_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); + } + } +} + +fn resolveLib( + arena: Allocator, + search_dir: []const u8, + name: []const u8, + ext: []const u8, +) !?[]const u8 { + const search_name = try std.fmt.allocPrint(arena, "lib{s}{s}", .{ name, ext }); + const full_path = try fs.path.join(arena, &[_][]const u8{ search_dir, search_name }); + + // Check if the file exists. + const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => return null, + else => |e| return e, + }; + defer tmp.close(); + + return full_path; +} + +fn resolveFramework( + arena: Allocator, + search_dir: []const u8, + name: []const u8, + ext: []const u8, +) !?[]const u8 { + const search_name = try std.fmt.allocPrint(arena, "{s}{s}", .{ name, ext }); + const prefix_path = try std.fmt.allocPrint(arena, "{s}.framework", .{name}); + const full_path = try fs.path.join(arena, &[_][]const u8{ search_dir, prefix_path, search_name }); + + // Check if the file exists. + const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => return null, + else => |e| return e, + }; + defer tmp.close(); + + return full_path; +} + +fn parseObject(self: *MachO, path: []const u8) !bool { + const gpa = self.base.allocator; + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + defer file.close(); + + const name = try gpa.dupe(u8, path); + const cpu_arch = self.options.target.cpu_arch.?; + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); + }; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + + var object = Object{ + .name = name, + .mtime = mtime, + .contents = contents, + }; + + object.parse(gpa, cpu_arch) catch |err| switch (err) { + error.EndOfStream, error.NotObject => { + object.deinit(gpa); + return false; + }, + else => |e| return e, + }; + + try self.objects.append(gpa, object); + + return true; +} + +fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { + const gpa = self.base.allocator; + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + errdefer file.close(); + + const name = try gpa.dupe(u8, path); + const cpu_arch = self.options.target.cpu_arch.?; + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try reader.context.seekTo(fat_offset); + + var archive = Archive{ + .file = file, + .fat_offset = fat_offset, + .name = name, + }; + + archive.parse(gpa, reader) catch |err| switch (err) { + error.EndOfStream, error.NotArchive => { + archive.deinit(gpa); + return false; + }, + else => |e| return e, + }; + + if (force_load) { + // Get all offsets from the ToC + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); + defer offsets.deinit(); + for (archive.toc.values()) |offs| { + for (offs.items) |off| { + _ = try offsets.getOrPut(off); + } + } + for (offsets.keys()) |off| { + const object = try archive.parseObject(gpa, cpu_arch, off); + try self.objects.append(gpa, object); + } + } else { + try self.archives.append(gpa, archive); + } + + return true; +} + +const ParseDylibError = error{ + OutOfMemory, + EmptyStubFile, + MismatchedCpuArchitecture, + UnsupportedCpuArchitecture, + EndOfStream, +} || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; + +const DylibCreateOpts = struct { + syslibroot: ?[]const u8, + id: ?Dylib.Id = null, + dependent: bool = false, + needed: bool = false, + weak: bool = false, +}; + +pub fn parseDylib( + self: *MachO, + path: []const u8, + dependent_libs: anytype, + opts: DylibCreateOpts, +) ParseDylibError!bool { + const gpa = self.base.allocator; + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + defer file.close(); + + const cpu_arch = self.options.target.cpu_arch.?; + const file_stat = try file.stat(); + var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + + const reader = file.reader(); + const lib_offset = try fat.getLibraryOffset(reader, cpu_arch); + try file.seekTo(lib_offset); + file_size -= lib_offset; + + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + defer gpa.free(contents); + + const dylib_id = @as(u16, @intCast(self.dylibs.items.len)); + var dylib = Dylib{ .weak = opts.weak }; + + dylib.parseFromBinary( + gpa, + cpu_arch, + dylib_id, + dependent_libs, + path, + contents, + ) catch |err| switch (err) { + error.EndOfStream, error.NotDylib => { + try file.seekTo(0); + + var lib_stub = LibStub.loadFromFile(gpa, file) catch { + dylib.deinit(gpa); + return false; + }; + defer lib_stub.deinit(); + + try dylib.parseFromStub( + gpa, + self.options.target, + lib_stub, + dylib_id, + dependent_libs, + path, + ); + }, + else => |e| return e, + }; + + if (opts.id) |id| { + if (dylib.id.?.current_version < id.compatibility_version) { + log.warn("found dylib is incompatible with the required minimum version", .{}); + log.warn(" dylib: {s}", .{id.name}); + log.warn(" required minimum version: {}", .{id.compatibility_version}); + log.warn(" dylib version: {}", .{dylib.id.?.current_version}); + + // TODO maybe this should be an error and facilitate auto-cleanup? + dylib.deinit(gpa); + return false; + } + } + + const gop = try self.dylibs_map.getOrPut(gpa, dylib.id.?.name); + if (gop.found_existing) { + dylib.deinit(gpa); + return true; + } + gop.value_ptr.* = dylib_id; + try self.dylibs.append(gpa, dylib); + + const should_link_dylib_even_if_unreachable = blk: { + if (self.options.dead_strip_dylibs and !opts.needed) break :blk false; + break :blk !(opts.dependent or self.referenced_dylibs.contains(dylib_id)); + }; + + if (should_link_dylib_even_if_unreachable) { + try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); + } + + return true; +} + +fn parsePositionals(self: *MachO, files: []const []const u8, syslibroot: ?[]const u8, dependent_libs: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (files) |file_name| { + const full_path = full_path: { + var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; + break :full_path try std.fs.realpath(file_name, &buffer); + }; + log.debug("parsing input file path '{s}'", .{full_path}); + + if (try self.parseObject(full_path)) continue; + if (try self.parseArchive(full_path, false)) continue; + if (try self.parseDylib(full_path, dependent_libs, .{ + .syslibroot = syslibroot, + })) continue; + + log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); + } +} + +fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (files) |file_name| { + const full_path = full_path: { + var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; + break :full_path try fs.realpath(file_name, &buffer); + }; + log.debug("parsing and force loading static archive '{s}'", .{full_path}); + + if (try self.parseArchive(full_path, true)) continue; + log.debug("unknown filetype: expected static archive: '{s}'", .{file_name}); + } +} + +fn parseLibs( + self: *MachO, + lib_names: []const []const u8, + lib_infos: []const Zld.SystemLib, + syslibroot: ?[]const u8, + dependent_libs: anytype, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (lib_names, 0..) |lib, i| { + const lib_info = lib_infos[i]; + log.debug("parsing lib path '{s}'", .{lib}); + if (try self.parseDylib(lib, dependent_libs, .{ + .syslibroot = syslibroot, + .needed = lib_info.needed, + .weak = lib_info.weak, + })) continue; + if (try self.parseArchive(lib, false)) continue; + + log.warn("unknown filetype for a library: '{s}'", .{lib}); + } +} + +fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + // At this point, we can now parse dependents of dylibs preserving the inclusion order of: + // 1) anything on the linker line is parsed first + // 2) afterwards, we parse dependents of the included dylibs + // TODO this should not be performed if the user specifies `-flat_namespace` flag. + // See ld64 manpages. + var arena_alloc = std.heap.ArenaAllocator.init(self.base.allocator); + const arena = arena_alloc.allocator(); + defer arena_alloc.deinit(); + + while (dependent_libs.readItem()) |dep_id| { + defer dep_id.id.deinit(self.base.allocator); + + if (self.dylibs_map.contains(dep_id.id.name)) continue; + + const weak = self.dylibs.items[dep_id.parent].weak; + const has_ext = blk: { + const basename = fs.path.basename(dep_id.id.name); + break :blk mem.lastIndexOfScalar(u8, basename, '.') != null; + }; + const extension = if (has_ext) fs.path.extension(dep_id.id.name) else ""; + const without_ext = if (has_ext) blk: { + const index = mem.lastIndexOfScalar(u8, dep_id.id.name, '.') orelse unreachable; + break :blk dep_id.id.name[0..index]; + } else dep_id.id.name; + + for (&[_][]const u8{ extension, ".tbd" }) |ext| { + const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ without_ext, ext }); + const full_path = if (syslibroot) |root| try fs.path.join(arena, &.{ root, with_ext }) else with_ext; + + log.debug("trying dependency at fully resolved path {s}", .{full_path}); + + const did_parse_successfully = try self.parseDylib(full_path, dependent_libs, .{ + .id = dep_id.id, + .syslibroot = syslibroot, + .dependent = true, + .weak = weak, + }); + if (did_parse_successfully) break; + } else { + log.warn("unable to resolve dependency {s}", .{dep_id.id.name}); + } + } +} + +pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { + const segname = sect.segName(); + const sectname = sect.sectName(); + const res: ?u8 = blk: { + if (mem.eql(u8, "__LLVM", segname)) { + log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + break :blk null; + } + // We handle unwind info separately. + if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { + break :blk null; + } + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + break :blk null; + } + + if (sect.isCode()) { + break :blk self.getSectionByName("__TEXT", "__text") orelse try self.initSection( + "__TEXT", + "__text", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); + } + + if (sect.isDebug()) { + break :blk null; + } + + switch (sect.type()) { + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + => { + break :blk self.getSectionByName("__TEXT", "__const") orelse try self.initSection( + "__TEXT", + "__const", + .{}, + ); + }, + macho.S_CSTRING_LITERALS => { + if (mem.startsWith(u8, sectname, "__objc")) { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + .{}, + ); + } + break :blk self.getSectionByName("__TEXT", "__cstring") orelse try self.initSection( + "__TEXT", + "__cstring", + .{ .flags = macho.S_CSTRING_LITERALS }, + ); + }, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( + "__DATA_CONST", + sectname, + .{ .flags = sect.flags }, + ); + }, + macho.S_LITERAL_POINTERS, + macho.S_ZEROFILL, + macho.S_THREAD_LOCAL_VARIABLES, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + macho.S_THREAD_LOCAL_REGULAR, + macho.S_THREAD_LOCAL_ZEROFILL, + => { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + .{ .flags = sect.flags }, + ); + }, + macho.S_COALESCED => { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + .{}, + ); + }, + macho.S_REGULAR => { + if (mem.eql(u8, segname, "__TEXT")) { + if (mem.eql(u8, sectname, "__rodata") or + mem.eql(u8, sectname, "__typelink") or + mem.eql(u8, sectname, "__itablink") or + mem.eql(u8, sectname, "__gosymtab") or + mem.eql(u8, sectname, "__gopclntab")) + { + break :blk self.getSectionByName("__DATA_CONST", "__const") orelse try self.initSection( + "__DATA_CONST", + "__const", + .{}, + ); + } + } + if (mem.eql(u8, segname, "__DATA")) { + if (mem.eql(u8, sectname, "__const") or + mem.eql(u8, sectname, "__cfstring") or + mem.eql(u8, sectname, "__objc_classlist") or + mem.eql(u8, sectname, "__objc_imageinfo")) + { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse + try self.initSection( + "__DATA_CONST", + sectname, + .{}, + ); + } else if (mem.eql(u8, sectname, "__data")) { + break :blk self.getSectionByName("__DATA", "__data") orelse + try self.initSection( + "__DATA", + "__data", + .{}, + ); + } + } + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + .{}, + ); + }, + else => break :blk null, + } + }; + return res; +} + +pub fn addAtomToSection(self: *MachO, atom_index: AtomIndex) void { + const atom = self.getAtomPtr(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + var section = self.sections.get(sym.n_sect - 1); + if (section.header.size > 0) { + const last_atom = self.getAtomPtr(section.last_atom_index); + last_atom.next_index = atom_index; + atom.prev_index = section.last_atom_index; + } else { + section.first_atom_index = atom_index; + } + section.last_atom_index = atom_index; + section.header.size += atom.size; + self.sections.set(sym.n_sect - 1, section); +} + +pub fn createEmptyAtom(self: *MachO, sym_index: u32, size: u64, alignment: u32) !AtomIndex { + const gpa = self.base.allocator; + const index = @as(AtomIndex, @intCast(self.atoms.items.len)); + const atom = try self.atoms.addOne(gpa); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = size; + atom.alignment = alignment; + + log.debug("creating ATOM(%{d}) at index {d}", .{ sym_index, index }); + + return index; +} + +pub fn createGotAtom(self: *MachO) !AtomIndex { + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__DATA_CONST", "__got") orelse + try self.initSection("__DATA_CONST", "__got", .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; +} + +fn writeGotPointer(self: *MachO, got_index: u32, writer: anytype) !void { + const target_addr = blk: { + const entry = self.got_entries.items[got_index]; + const sym = entry.getTargetSymbol(self); + break :blk sym.n_value; + }; + try writer.writeIntLittle(u64, target_addr); +} + +pub fn createTlvPtrAtom(self: *MachO) !AtomIndex { + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__thread_ptrs"), + .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + })).?; + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; +} + +fn createDyldStubBinderGotAtom(self: *MachO) !void { + const sym_index = self.dyld_stub_binder_index orelse return; + const gpa = self.base.allocator; + const target = SymbolWithLoc{ .sym_index = sym_index }; + const atom_index = try self.createGotAtom(); + const got_index = @as(u32, @intCast(self.got_entries.items.len)); + try self.got_entries.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try self.got_table.putNoClobber(gpa, target, got_index); +} + +fn createDyldPrivateAtom(self: *MachO) !void { + if (self.dyld_stub_binder_index == null) return; + + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__DATA", "__data") orelse try self.initSection("__DATA", "__data", .{}); + sym.n_sect = sect_id + 1; + + self.dyld_private_sym_index = sym_index; + + self.addAtomToSection(atom_index); +} + +fn createStubHelperPreambleAtom(self: *MachO) !void { + if (self.dyld_stub_binder_index == null) return; + + const cpu_arch = self.options.target.cpu_arch.?; + const size: u64 = switch (cpu_arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + const alignment: u32 = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, size, alignment); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__TEXT", "__stub_helper") orelse + try self.initSection("__TEXT", "__stub_helper", .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }); + sym.n_sect = sect_id + 1; + + self.stub_helper_preamble_sym_index = sym_index; + + self.addAtomToSection(atom_index); +} + +fn writeStubHelperPreambleCode(self: *MachO, writer: anytype) !void { + const cpu_arch = self.options.target.cpu_arch.?; + const source_addr = blk: { + const sym = self.getSymbol(.{ .sym_index = self.stub_helper_preamble_sym_index.? }); + break :blk sym.n_value; + }; + const dyld_private_addr = blk: { + const sym = self.getSymbol(.{ .sym_index = self.dyld_private_sym_index.? }); + break :blk sym.n_value; + }; + const dyld_stub_binder_got_addr = blk: { + const index = self.got_table.get(.{ .sym_index = self.dyld_stub_binder_index.? }).?; + const entry = self.got_entries.items[index]; + break :blk entry.getAtomSymbol(self).n_value; + }; + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d }); + { + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 3, dyld_private_addr, 0); + try writer.writeIntLittle(i32, disp); + } + try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 }); + { + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 11, dyld_stub_binder_got_addr, 0); + try writer.writeIntLittle(i32, disp); + } + }, + .aarch64 => { + { + const pages = Atom.calcNumberOfPages(source_addr, dyld_private_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x17, pages).toU32()); + } + { + const off = try Atom.calcPageOffset(dyld_private_addr, .arithmetic); + try writer.writeIntLittle(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32()); + } + try writer.writeIntLittle(u32, aarch64.Instruction.stp( + .x16, + .x17, + aarch64.Register.sp, + aarch64.Instruction.LoadStorePairOffset.pre_index(-16), + ).toU32()); + { + const pages = Atom.calcNumberOfPages(source_addr + 12, dyld_stub_binder_got_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); + } + { + const off = try Atom.calcPageOffset(dyld_stub_binder_got_addr, .load_store_64); + try writer.writeIntLittle(u32, aarch64.Instruction.ldr( + .x16, + .x16, + aarch64.Instruction.LoadStoreOffset.imm(off), + ).toU32()); + } + try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } +} + +pub fn createStubHelperAtom(self: *MachO) !AtomIndex { + const cpu_arch = self.options.target.cpu_arch.?; + const stub_size: u4 = switch (cpu_arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const alignment: u2 = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; + + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, stub_size, alignment); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_sect = macho.N_SECT; + + const sect_id = self.getSectionByName("__TEXT", "__stub_helper").?; + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; +} + +fn writeStubHelperCode(self: *MachO, atom_index: AtomIndex, writer: anytype) !void { + const cpu_arch = self.options.target.cpu_arch.?; + const source_addr = blk: { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + const target_addr = blk: { + const sym = self.getSymbol(.{ .sym_index = self.stub_helper_preamble_sym_index.? }); + break :blk sym.n_value; + }; + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0x68, 0x0, 0x0, 0x0, 0x0, 0xe9 }); + { + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 6, target_addr, 0); + try writer.writeIntLittle(i32, disp); + } + }, + .aarch64 => { + const stub_size: u4 = 3 * @sizeOf(u32); + const literal = blk: { + const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); + break :blk math.cast(u18, div_res) orelse return error.Overflow; + }; + try writer.writeIntLittle(u32, aarch64.Instruction.ldrLiteral( + .w16, + literal, + ).toU32()); + { + const disp = try Atom.calcPcRelativeDisplacementArm64(source_addr + 4, target_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.b(disp).toU32()); + } + try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 }); + }, + else => unreachable, + } +} + +pub fn createLazyPointerAtom(self: *MachO) !AtomIndex { + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse + try self.initSection("__DATA", "__la_symbol_ptr", .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; +} + +fn writeLazyPointer(self: *MachO, stub_helper_index: u32, writer: anytype) !void { + const target_addr = blk: { + const sect_id = self.getSectionByName("__TEXT", "__stub_helper").?; + var atom_index = self.sections.items(.first_atom_index)[sect_id]; + var count: u32 = 0; + while (count < stub_helper_index + 1) : (count += 1) { + const atom = self.getAtom(atom_index); + if (atom.next_index) |next_index| { + atom_index = next_index; + } + } + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + try writer.writeIntLittle(u64, target_addr); +} + +pub fn createStubAtom(self: *MachO) !AtomIndex { + const cpu_arch = self.options.target.cpu_arch.?; + const alignment: u2 = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (cpu_arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, stub_size, alignment); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__TEXT", "__stubs") orelse + try self.initSection("__TEXT", "__stubs", .{ + .flags = macho.S_SYMBOL_STUBS | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }); + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; +} + +fn writeStubCode(self: *MachO, atom_index: AtomIndex, stub_index: u32, writer: anytype) !void { + const cpu_arch = self.options.target.cpu_arch.?; + const source_addr = blk: { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + const target_addr = blk: { + // TODO: cache this at stub atom creation; they always go in pairs anyhow + const la_sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr").?; + var la_atom_index = self.sections.items(.first_atom_index)[la_sect_id]; + var count: u32 = 0; + while (count < stub_index) : (count += 1) { + const la_atom = self.getAtom(la_atom_index); + la_atom_index = la_atom.next_index.?; + } + const atom = self.getAtom(la_atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0xff, 0x25 }); + { + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 2, target_addr, 0); + try writer.writeIntLittle(i32, disp); + } + }, + .aarch64 => { + { + const pages = Atom.calcNumberOfPages(source_addr, target_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); + } + { + const off = try Atom.calcPageOffset(target_addr, .load_store_64); + try writer.writeIntLittle(u32, aarch64.Instruction.ldr( + .x16, + .x16, + aarch64.Instruction.LoadStoreOffset.imm(off), + ).toU32()); + } + try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } +} + +fn createTentativeDefAtoms(self: *MachO) !void { + const gpa = self.base.allocator; + + for (self.globals.items) |global| { + const sym = self.getSymbolPtr(global); + if (!sym.tentative()) continue; + if (sym.n_desc == N_DEAD) continue; + + log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({?})", .{ + global.sym_index, self.getSymbolName(global), global.file, + }); + + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative definition. + const size = sym.n_value; + const alignment = (sym.n_desc >> 8) & 0x0f; + const n_sect = (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__bss"), + .flags = macho.S_ZEROFILL, + })).? + 1; + + sym.* = .{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = n_sect, + .n_desc = 0, + .n_value = 0, + }; + + const atom_index = try self.createEmptyAtom(global.sym_index, size, alignment); + const atom = self.getAtomPtr(atom_index); + atom.file = global.file; + + self.addAtomToSection(atom_index); + + assert(global.getFile() != null); + const object = &self.objects.items[global.getFile().?]; + try object.atoms.append(gpa, atom_index); + object.atom_by_index_table[global.sym_index] = atom_index; + } +} + +fn resolveSymbols(self: *MachO, resolver: *SymbolResolver) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.objects.items, 0..) |_, object_id| { + try self.resolveSymbolsInObject(@as(u16, @intCast(object_id)), resolver); + } + + try self.resolveSymbolsInArchives(resolver); + try self.resolveDyldStubBinder(resolver); + try self.resolveSymbolsInDylibs(resolver); + try self.createMhExecuteHeaderSymbol(resolver); + try self.createDsoHandleSymbol(resolver); + try self.resolveSymbolsAtLoading(resolver); +} + +fn resolveSymbolsInObject(self: *MachO, object_id: u16, resolver: *SymbolResolver) !void { + const object = &self.objects.items[object_id]; + const in_symtab = object.in_symtab orelse return; + + log.debug("resolving symbols in '{s}'", .{object.name}); + + var sym_index: u32 = 0; + while (sym_index < in_symtab.len) : (sym_index += 1) { + const sym = &object.symtab[sym_index]; + const sym_name = object.getSymbolName(sym_index); + + if (sym.stab()) { + log.err("unhandled symbol type: stab", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.indr()) { + log.err("unhandled symbol type: indirect", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.abs()) { + log.err("unhandled symbol type: absolute", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.sect() and !sym.ext()) { + log.debug("symbol '{s}' local to object {s}; skipping...", .{ + sym_name, + object.name, + }); + continue; + } + + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id + 1 }; + + const global_index = resolver.table.get(sym_name) orelse { + const gpa = self.base.allocator; + const global_index = @as(u32, @intCast(self.globals.items.len)); + try self.globals.append(gpa, sym_loc); + try resolver.table.putNoClobber(sym_name, global_index); + if (sym.undf() and !sym.tentative()) { + try resolver.unresolved.putNoClobber(global_index, {}); + } + continue; + }; + const global = &self.globals.items[global_index]; + const global_sym = self.getSymbol(global.*); + + // Cases to consider: sym vs global_sym + // 1. strong(sym) and strong(global_sym) => error + // 2. strong(sym) and weak(global_sym) => sym + // 3. strong(sym) and tentative(global_sym) => sym + // 4. strong(sym) and undf(global_sym) => sym + // 5. weak(sym) and strong(global_sym) => global_sym + // 6. weak(sym) and tentative(global_sym) => sym + // 7. weak(sym) and undf(global_sym) => sym + // 8. tentative(sym) and strong(global_sym) => global_sym + // 9. tentative(sym) and weak(global_sym) => global_sym + // 10. tentative(sym) and tentative(global_sym) => pick larger + // 11. tentative(sym) and undf(global_sym) => sym + // 12. undf(sym) and * => global_sym + // + // Reduces to: + // 1. strong(sym) and strong(global_sym) => error + // 2. * and strong(global_sym) => global_sym + // 3. weak(sym) and weak(global_sym) => global_sym + // 4. tentative(sym) and tentative(global_sym) => pick larger + // 5. undf(sym) and * => global_sym + // 6. else => sym + + const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); + const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); + const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); + const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); + + if (sym_is_strong and global_is_strong) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + if (global.getFile()) |file| { + log.err(" first definition in '{s}'", .{self.objects.items[file].name}); + } + log.err(" next definition in '{s}'", .{self.objects.items[object_id].name}); + return error.MultipleSymbolDefinitions; + } + + const update_global = blk: { + if (global_is_strong) break :blk false; + if (sym_is_weak and global_is_weak) break :blk false; + if (sym.tentative() and global_sym.tentative()) { + if (global_sym.n_value >= sym.n_value) break :blk false; + } + if (sym.undf() and !sym.tentative()) break :blk false; + break :blk true; + }; + + if (update_global) { + const global_object = &self.objects.items[global.getFile().?]; + global_object.globals_lookup[global.sym_index] = global_index; + _ = resolver.unresolved.swapRemove(resolver.table.get(sym_name).?); + global.* = sym_loc; + } else { + object.globals_lookup[sym_index] = global_index; + } + } +} + +fn resolveSymbolsInArchives(self: *MachO, resolver: *SymbolResolver) !void { + if (self.archives.items.len == 0) return; + + const gpa = self.base.allocator; + const cpu_arch = self.options.target.cpu_arch.?; + var next_sym: usize = 0; + loop: while (next_sym < resolver.unresolved.count()) { + const global = self.globals.items[resolver.unresolved.keys()[next_sym]]; + const sym_name = self.getSymbolName(global); + + for (self.archives.items) |archive| { + // Check if the entry exists in a static archive. + const offsets = archive.toc.get(sym_name) orelse { + // No hit. + continue; + }; + assert(offsets.items.len > 0); + + const object_id = @as(u16, @intCast(self.objects.items.len)); + const object = try archive.parseObject(gpa, cpu_arch, offsets.items[0]); + try self.objects.append(gpa, object); + try self.resolveSymbolsInObject(object_id, resolver); + + continue :loop; + } + + next_sym += 1; + } +} + +fn resolveSymbolsInDylibs(self: *MachO, resolver: *SymbolResolver) !void { + if (self.dylibs.items.len == 0) return; + + var next_sym: usize = 0; + loop: while (next_sym < resolver.unresolved.count()) { + const global_index = resolver.unresolved.keys()[next_sym]; + const global = self.globals.items[global_index]; + const sym = self.getSymbolPtr(global); + const sym_name = self.getSymbolName(global); + + for (self.dylibs.items, 0..) |dylib, id| { + if (!dylib.symbols.contains(sym_name)) continue; + + const dylib_id = @as(u16, @intCast(id)); + if (!self.referenced_dylibs.contains(dylib_id)) { + try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); + } + + const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; + sym.n_type |= macho.N_EXT; + sym.n_desc = @as(u16, @intCast(ordinal + 1)) * macho.N_SYMBOL_RESOLVER; + + if (dylib.weak) { + sym.n_desc |= macho.N_WEAK_REF; + } + + assert(resolver.unresolved.swapRemove(global_index)); + continue :loop; + } + + next_sym += 1; + } +} + +fn resolveSymbolsAtLoading(self: *MachO, resolver: *SymbolResolver) !void { + var next_sym: usize = 0; + while (next_sym < resolver.unresolved.count()) { + const global_index = resolver.unresolved.keys()[next_sym]; + const global = self.globals.items[global_index]; + const sym = self.getSymbolPtr(global); + const sym_name = self.getSymbolName(global); + + if (sym.discarded()) { + sym.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + _ = resolver.unresolved.swapRemove(global_index); + continue; + } else if (self.options.allow_undef) { + const n_desc = @as( + u16, + @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @as(i16, @intCast(macho.N_SYMBOL_RESOLVER))), + ); + sym.n_type = macho.N_EXT; + sym.n_desc = n_desc; + _ = resolver.unresolved.swapRemove(global_index); + continue; + } + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + if (global.getFile()) |file| { + log.err(" first referenced in '{s}'", .{self.objects.items[file].name}); + } + + next_sym += 1; + } +} + +fn createMhExecuteHeaderSymbol(self: *MachO, resolver: *SymbolResolver) !void { + if (self.options.output_mode != .exe) return; + if (resolver.table.get("__mh_execute_header")) |global_index| { + const global = self.globals.items[global_index]; + const sym = self.getSymbol(global); + self.mh_execute_header_index = global_index; + if (!sym.undf() and !(sym.pext() or sym.weakDef())) return; + } + + const gpa = self.base.allocator; + const sym_index = try self.allocateSymbol(); + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; + const sym = self.getSymbolPtr(sym_loc); + sym.n_strx = try self.strtab.insert(gpa, "__mh_execute_header"); + sym.n_type = macho.N_SECT | macho.N_EXT; + sym.n_desc = macho.REFERENCED_DYNAMICALLY; + + if (resolver.table.get("__mh_execute_header")) |global_index| { + const global = &self.globals.items[global_index]; + const global_object = &self.objects.items[global.getFile().?]; + global_object.globals_lookup[global.sym_index] = global_index; + global.* = sym_loc; + self.mh_execute_header_index = global_index; + } else { + const global_index = @as(u32, @intCast(self.globals.items.len)); + try self.globals.append(gpa, sym_loc); + self.mh_execute_header_index = global_index; + } +} + +fn createDsoHandleSymbol(self: *MachO, resolver: *SymbolResolver) !void { + const global_index = resolver.table.get("___dso_handle") orelse return; + const global = &self.globals.items[global_index]; + self.dso_handle_index = global_index; + if (!self.getSymbol(global.*).undf()) return; + + const gpa = self.base.allocator; + const sym_index = try self.allocateSymbol(); + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; + const sym = self.getSymbolPtr(sym_loc); + sym.n_strx = try self.strtab.insert(gpa, "___dso_handle"); + sym.n_type = macho.N_SECT | macho.N_EXT; + sym.n_desc = macho.N_WEAK_DEF; + + const global_object = &self.objects.items[global.getFile().?]; + global_object.globals_lookup[global.sym_index] = global_index; + _ = resolver.unresolved.swapRemove(resolver.table.get("___dso_handle").?); + global.* = sym_loc; +} + +fn resolveDyldStubBinder(self: *MachO, resolver: *SymbolResolver) !void { + if (self.dyld_stub_binder_index != null) return; + if (resolver.unresolved.count() == 0) return; // no need for a stub binder if we don't have any imports + + const gpa = self.base.allocator; + const sym_name = "dyld_stub_binder"; + const sym_index = try self.allocateSymbol(); + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; + const sym = self.getSymbolPtr(sym_loc); + sym.n_strx = try self.strtab.insert(gpa, sym_name); + sym.n_type = macho.N_UNDF; + + const global = SymbolWithLoc{ .sym_index = sym_index }; + try self.globals.append(gpa, global); + + for (self.dylibs.items, 0..) |dylib, id| { + if (!dylib.symbols.contains(sym_name)) continue; + + const dylib_id = @as(u16, @intCast(id)); + if (!self.referenced_dylibs.contains(dylib_id)) { + try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); + } + + const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; + sym.n_type |= macho.N_EXT; + sym.n_desc = @as(u16, @intCast(ordinal + 1)) * macho.N_SYMBOL_RESOLVER; + self.dyld_stub_binder_index = sym_index; + + break; + } + + if (self.dyld_stub_binder_index == null) { + log.err("undefined reference to symbol '{s}'", .{sym_name}); + return error.UndefinedSymbolReference; + } +} + +pub fn deinit(self: *MachO) void { + const gpa = self.base.allocator; + + self.tlv_ptr_entries.deinit(gpa); + self.tlv_ptr_table.deinit(gpa); + self.got_entries.deinit(gpa); + self.got_table.deinit(gpa); + self.stubs.deinit(gpa); + self.stubs_table.deinit(gpa); + self.thunk_table.deinit(gpa); + + for (self.thunks.items) |*thunk| { + thunk.deinit(gpa); + } + self.thunks.deinit(gpa); + + self.strtab.deinit(gpa); + self.locals.deinit(gpa); + self.globals.deinit(gpa); + + for (self.objects.items) |*object| { + object.deinit(gpa); + } + self.objects.deinit(gpa); + for (self.archives.items) |*archive| { + archive.deinit(gpa); + } + self.archives.deinit(gpa); + for (self.dylibs.items) |*dylib| { + dylib.deinit(gpa); + } + self.dylibs.deinit(gpa); + self.dylibs_map.deinit(gpa); + self.referenced_dylibs.deinit(gpa); + + self.segments.deinit(gpa); + self.sections.deinit(gpa); + self.atoms.deinit(gpa); +} + +pub fn closeFiles(self: *const MachO) void { + for (self.archives.items) |archive| { + archive.file.close(); + } +} + +fn createSegments(self: *MachO) !void { + const pagezero_vmsize = self.options.pagezero_size orelse default_pagezero_vmsize; + const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); + if (self.options.output_mode != .lib and aligned_pagezero_vmsize > 0) { + if (aligned_pagezero_vmsize != pagezero_vmsize) { + log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); + log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); + } + try self.segments.append(self.base.allocator, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__PAGEZERO"), + .vmsize = aligned_pagezero_vmsize, + }); + } + + // __TEXT segment is non-optional + { + const protection = getSegmentMemoryProtection("__TEXT"); + try self.segments.append(self.base.allocator, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__TEXT"), + .maxprot = protection, + .initprot = protection, + }); + } + + for (self.sections.items(.header), 0..) |header, sect_id| { + if (header.size == 0) continue; // empty section + + const segname = header.segName(); + const segment_id = self.getSegmentByName(segname) orelse blk: { + log.debug("creating segment '{s}'", .{segname}); + const segment_id = @as(u8, @intCast(self.segments.items.len)); + const protection = getSegmentMemoryProtection(segname); + try self.segments.append(self.base.allocator, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString(segname), + .maxprot = protection, + .initprot = protection, + }); + break :blk segment_id; + }; + const segment = &self.segments.items[segment_id]; + segment.cmdsize += @sizeOf(macho.section_64); + segment.nsects += 1; + self.sections.items(.segment_index)[sect_id] = segment_id; + } + + // __LINKEDIT always comes last + { + const protection = getSegmentMemoryProtection("__LINKEDIT"); + try self.segments.append(self.base.allocator, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__LINKEDIT"), + .maxprot = protection, + .initprot = protection, + }); + } +} + +pub fn allocateSymbol(self: *MachO) !u32 { + try self.locals.ensureUnusedCapacity(self.base.allocator, 1); + log.debug(" (allocating symbol index {d})", .{self.locals.items.len}); + const index = @as(u32, @intCast(self.locals.items.len)); + _ = self.locals.addOneAssumeCapacity(); + self.locals.items[index] = .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + return index; +} + +fn allocateSpecialSymbols(self: *MachO) !void { + for (&[_]?u32{ + self.dso_handle_index, + self.mh_execute_header_index, + }) |maybe_index| { + const global_index = maybe_index orelse continue; + const global = self.globals.items[global_index]; + if (global.getFile() != null) continue; + const name = self.getSymbolName(global); + const sym = self.getSymbolPtr(global); + const segment_index = self.getSegmentByName("__TEXT").?; + const seg = self.segments.items[segment_index]; + sym.n_sect = 1; + sym.n_value = seg.vmaddr; + log.debug("allocating {s} at the start of {s}", .{ + name, + seg.segName(), + }); + } +} + +fn splitIntoAtoms(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.objects.items, 0..) |*object, object_id| { + try object.splitIntoAtoms(self, @as(u31, @intCast(object_id))); + } +} + +fn writeAtoms(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.allocator; + const slice = self.sections.slice(); + + for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom_index = first_atom_index; + + if (atom_index == 0) continue; + if (header.isZerofill()) continue; + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); + + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); + + var count: u32 = 0; + while (true) : (count += 1) { + const atom = self.getAtom(atom_index); + const this_sym = self.getSymbol(atom.getSymbolWithLoc()); + const padding_size: usize = if (atom.next_index) |next_index| blk: { + const next_sym = self.getSymbol(self.getAtom(next_index).getSymbolWithLoc()); + const size = next_sym.n_value - (this_sym.n_value + atom.size); + break :blk math.cast(usize, size) orelse return error.Overflow; + } else 0; + + log.debug(" (adding ATOM(%{d}, '{s}') from object({?}) to buffer)", .{ + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + atom.getFile(), + }); + if (padding_size > 0) { + log.debug(" (with padding {x})", .{padding_size}); + } + + const offset = buffer.items.len; + + // TODO: move writing synthetic sections into a separate function + if (atom.getFile() == null) outer: { + if (self.dyld_private_sym_index) |sym_index| { + if (atom.sym_index == sym_index) { + buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); + break :outer; + } + } + switch (header.type()) { + macho.S_NON_LAZY_SYMBOL_POINTERS => { + try self.writeGotPointer(count, buffer.writer()); + }, + macho.S_LAZY_SYMBOL_POINTERS => { + try self.writeLazyPointer(count, buffer.writer()); + }, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { + buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); + }, + else => { + if (self.stub_helper_preamble_sym_index) |sym_index| { + if (sym_index == atom.sym_index) { + try self.writeStubHelperPreambleCode(buffer.writer()); + break :outer; + } + } + if (header.type() == macho.S_SYMBOL_STUBS) { + try self.writeStubCode(atom_index, count, buffer.writer()); + } else if (mem.eql(u8, header.sectName(), "__stub_helper")) { + try self.writeStubHelperCode(atom_index, buffer.writer()); + } else if (header.isCode()) { + // A thunk + try thunks.writeThunkCode(self, atom_index, buffer.writer()); + } else unreachable; + }, + } + } else { + const code = Atom.getAtomCode(self, atom_index); + const relocs = Atom.getAtomRelocs(self, atom_index); + buffer.appendSliceAssumeCapacity(code); + try Atom.resolveRelocs( + self, + atom_index, + buffer.items[offset..][0..atom.size], + relocs, + ); + } + + var i: usize = 0; + while (i < padding_size) : (i += 1) { + // TODO with NOPs + buffer.appendAssumeCapacity(0); + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else { + assert(buffer.items.len == header.size); + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try self.base.file.pwriteAll(buffer.items, header.offset); + break; + } + } + } +} + +fn pruneAndSortSections(self: *MachO) !void { + const gpa = self.base.allocator; + + const SortSection = struct { + pub fn lessThan(_: void, lhs: Section, rhs: Section) bool { + return getSectionPrecedence(lhs.header) < getSectionPrecedence(rhs.header); + } + }; + + const slice = self.sections.slice(); + var sections = std.ArrayList(Section).init(gpa); + defer sections.deinit(); + try sections.ensureTotalCapacity(slice.len); + + { + var i: u8 = 0; + while (i < slice.len) : (i += 1) { + const section = self.sections.get(i); + log.debug("section {s},{s} {d}", .{ + section.header.segName(), + section.header.sectName(), + section.first_atom_index, + }); + if (section.header.size == 0) { + log.debug("pruning section {s},{s}", .{ + section.header.segName(), + section.header.sectName(), + }); + continue; + } + sections.appendAssumeCapacity(section); + } + } + + std.sort.sort(Section, sections.items, {}, SortSection.lessThan); + + self.sections.shrinkRetainingCapacity(0); + for (sections.items) |out| { + self.sections.appendAssumeCapacity(out); + } +} + +fn calcSectionSizes(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const slice = self.sections.slice(); + for (slice.items(.header), 0..) |*header, sect_id| { + if (header.size == 0) continue; + if (self.requiresThunks()) { + if (header.isCode() and !(header.type() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; + } + + var atom_index = slice.items(.first_atom_index)[sect_id]; + if (atom_index == 0) continue; + + header.size = 0; + header.@"align" = 0; + + while (true) { + const atom = self.getAtom(atom_index); + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const atom_offset = mem.alignForwardGeneric(u64, header.size, atom_alignment); + const padding = atom_offset - header.size; + + const sym = self.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = atom_offset; + + header.size += padding + atom.size; + header.@"align" = @max(header.@"align", atom.alignment); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + if (self.requiresThunks()) { + for (slice.items(.header), 0..) |header, sect_id| { + if (!header.isCode()) continue; + if (header.type() == macho.S_SYMBOL_STUBS) continue; + if (mem.eql(u8, header.sectName(), "__stub_helper")) continue; + + // Create jump/branch range extenders if needed. + try thunks.createThunks(self, @as(u8, @intCast(sect_id))); + } + } +} + +fn allocateSegments(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.segments.items, 0..) |*segment, segment_index| { + const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT"); + const base_size = if (is_text_segment) try load_commands.calcMinHeaderPad(self.base.allocator, &self.options, .{ + .segments = self.segments.items, + .dylibs = self.dylibs.items, + .referenced_dylibs = self.referenced_dylibs.keys(), + }) else 0; + try self.allocateSegment(@as(u8, @intCast(segment_index)), base_size); + + // TODO + // if (is_text_segment) blk: { + // const indexes = self.getSectionIndexes(@intCast(u8, segment_index)); + // if (indexes.start == indexes.end) break :blk; + + // // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. + // var min_alignment: u32 = 0; + // for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + // const alignment = try math.powi(u32, 2, header.@"align"); + // min_alignment = math.max(min_alignment, alignment); + // } + + // assert(min_alignment > 0); + // const last_header = self.sections.items(.header)[indexes.end - 1]; + // const shift: u32 = shift: { + // const diff = segment.filesize - last_header.offset - last_header.size; + // const factor = @divTrunc(diff, min_alignment); + // break :shift @intCast(u32, factor * min_alignment); + // }; + + // if (shift > 0) { + // for (self.sections.items(.header)[indexes.start..indexes.end]) |*header| { + // header.offset += shift; + // header.addr += shift; + // } + // } + // } + } +} + +fn getSegmentAllocBase(self: MachO, segment_index: u8) struct { vmaddr: u64, fileoff: u64 } { + if (segment_index > 0) { + const prev_segment = self.segments.items[segment_index - 1]; + return .{ + .vmaddr = prev_segment.vmaddr + prev_segment.vmsize, + .fileoff = prev_segment.fileoff + prev_segment.filesize, + }; + } + return .{ .vmaddr = 0, .fileoff = 0 }; +} + +fn allocateSegment(self: *MachO, segment_index: u8, init_size: u64) !void { + const segment = &self.segments.items[segment_index]; + + if (mem.eql(u8, segment.segName(), "__PAGEZERO")) return; // allocated upon creation + + const base = self.getSegmentAllocBase(segment_index); + segment.vmaddr = base.vmaddr; + segment.fileoff = base.fileoff; + segment.filesize = init_size; + segment.vmsize = init_size; + + // Allocate the sections according to their alignment at the beginning of the segment. + const indexes = self.getSectionIndexes(segment_index); + var start = init_size; + + const slice = self.sections.slice(); + for (slice.items(.header)[indexes.start..indexes.end], 0..) |*header, sect_id| { + const alignment = try math.powi(u32, 2, header.@"align"); + const start_aligned = mem.alignForwardGeneric(u64, start, alignment); + const n_sect = @as(u8, @intCast(indexes.start + sect_id + 1)); + + header.offset = if (header.isZerofill()) + 0 + else + @as(u32, @intCast(segment.fileoff + start_aligned)); + header.addr = segment.vmaddr + start_aligned; + + var atom_index = slice.items(.first_atom_index)[indexes.start + sect_id]; + if (atom_index > 0) { + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ + n_sect, + header.segName(), + header.sectName(), + }); + + while (true) { + const atom = self.getAtom(atom_index); + const sym = self.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value += header.addr; + sym.n_sect = n_sect; + + log.debug(" ATOM(%{d}, '{s}') @{x}", .{ + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + sym.n_value, + }); + + if (atom.getFile() != null) { + // Update each symbol contained within the atom + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |sym_loc| { + const inner_sym = self.getSymbolPtr(sym_loc); + inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( + self, + atom_index, + sym_loc.sym_index, + ); + inner_sym.n_sect = n_sect; + } + + // If there is a section alias, update it now too + if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { + const alias = self.getSymbolPtr(sym_loc); + alias.n_value = sym.n_value; + alias.n_sect = n_sect; + } + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + start = start_aligned + header.size; + + if (!header.isZerofill()) { + segment.filesize = start; + } + segment.vmsize = start; + } + + segment.filesize = mem.alignForwardGeneric(u64, segment.filesize, self.page_size); + segment.vmsize = mem.alignForwardGeneric(u64, segment.vmsize, self.page_size); +} + +const InitSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, +}; + +pub fn initSection( + self: *MachO, + segname: []const u8, + sectname: []const u8, + opts: InitSectionOpts, +) !u8 { + const gpa = self.base.allocator; + log.debug("creating section '{s},{s}'", .{ segname, sectname }); + const index = @as(u8, @intCast(self.sections.slice().len)); + try self.sections.append(gpa, .{ + .segment_index = undefined, // Segments will be created automatically later down the pipeline. + .header = .{ + .sectname = makeStaticString(sectname), + .segname = makeStaticString(segname), + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }, + .first_atom_index = 0, + .last_atom_index = 0, + }); + return index; +} + +fn getSegmentPrecedence(segname: []const u8) u4 { + if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; + if (mem.eql(u8, segname, "__TEXT")) return 0x1; + if (mem.eql(u8, segname, "__DATA_CONST")) return 0x2; + if (mem.eql(u8, segname, "__DATA")) return 0x3; + if (mem.eql(u8, segname, "__LINKEDIT")) return 0x5; + return 0x4; +} + +fn getSegmentMemoryProtection(segname: []const u8) macho.vm_prot_t { + if (mem.eql(u8, segname, "__PAGEZERO")) return macho.PROT.NONE; + if (mem.eql(u8, segname, "__TEXT")) return macho.PROT.READ | macho.PROT.EXEC; + if (mem.eql(u8, segname, "__LINKEDIT")) return macho.PROT.READ; + return macho.PROT.READ | macho.PROT.WRITE; +} + +fn getSectionPrecedence(header: macho.section_64) u8 { + const segment_precedence: u4 = getSegmentPrecedence(header.segName()); + const section_precedence: u4 = blk: { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; + if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; + break :blk 0x2; + } + switch (header.type()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => break :blk 0x0, + macho.S_MOD_INIT_FUNC_POINTERS => break :blk 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => break :blk 0x2, + macho.S_ZEROFILL => break :blk 0xf, + macho.S_THREAD_LOCAL_REGULAR => break :blk 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => break :blk 0xe, + else => { + if (mem.eql(u8, "__unwind_info", header.sectName())) break :blk 0xe; + if (mem.eql(u8, "__eh_frame", header.sectName())) break :blk 0xf; + break :blk 0x3; + }, + } + }; + return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; +} + +fn writeSegmentHeaders(self: *MachO, writer: anytype) !void { + for (self.segments.items, 0..) |seg, i| { + const indexes = self.getSectionIndexes(@as(u8, @intCast(i))); + var out_seg = seg; + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; + + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } + + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; + + try writer.writeStruct(out_seg); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + try writer.writeStruct(header); + } + } +} + +fn writeLinkeditSegmentData(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + try self.writeDyldInfoData(); + try self.writeFunctionStarts(); + try self.writeDataInCode(); + try self.writeSymtabs(); + + const seg = self.getLinkeditSegmentPtr(); + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); +} + +fn collectRebaseDataFromContainer( + self: *MachO, + sect_id: u8, + rebase: *Rebase, + container: anytype, +) !void { + const slice = self.sections.slice(); + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(sect_id); + + try rebase.entries.ensureUnusedCapacity(self.base.allocator, container.items.len); + + for (container.items) |entry| { + const target_sym = entry.getTargetSymbol(self); + if (target_sym.undf()) continue; + + const atom_sym = entry.getAtomSymbol(self); + const base_offset = atom_sym.n_value - seg.vmaddr; + + log.debug(" | rebase at {x}", .{atom_sym.n_value}); + + rebase.entries.appendAssumeCapacity(.{ + .offset = base_offset, + .segment_id = segment_index, + }); + } +} + +fn collectRebaseData(self: *MachO, rebase: *Rebase) !void { + log.debug("collecting rebase data", .{}); + + // First, unpack GOT entries + if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { + try self.collectRebaseDataFromContainer(sect_id, rebase, self.got_entries); + } + + const gpa = self.base.allocator; + const slice = self.sections.slice(); + + // Next, unpact lazy pointers + // TODO: save la_ptr in a container so that we can re-use the helper + if (self.getSectionByName("__DATA", "__la_symbol_ptr")) |sect_id| { + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(sect_id); + var atom_index = slice.items(.first_atom_index)[sect_id]; + + try rebase.entries.ensureUnusedCapacity(gpa, self.stubs.items.len); + + while (true) { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const base_offset = sym.n_value - seg.vmaddr; + + log.debug(" | rebase at {x}", .{sym.n_value}); + + rebase.entries.appendAssumeCapacity(.{ + .offset = base_offset, + .segment_id = segment_index, + }); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + // Finally, unpack the rest. + for (slice.items(.header), 0..) |header, sect_id| { + switch (header.type()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => {}, + else => continue, + } + + const segment_index = slice.items(.segment_index)[sect_id]; + const segment = self.getSegment(@as(u8, @intCast(sect_id))); + if (segment.maxprot & macho.PROT.WRITE == 0) continue; + + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); + + const cpu_arch = self.options.target.cpu_arch.?; + var atom_index = slice.items(.first_atom_index)[sect_id]; + if (atom_index == 0) continue; + + while (true) { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + + const should_rebase = blk: { + if (self.dyld_private_sym_index) |sym_index| { + if (atom.getFile() == null and atom.sym_index == sym_index) break :blk false; + } + break :blk !sym.undf(); + }; + + if (should_rebase) { + log.debug(" ATOM({d}, %{d}, '{s}')", .{ + atom_index, + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + }); + + const object = self.objects.items[atom.getFile().?]; + const base_rel_offset: i32 = blk: { + const source_sym = object.getSourceSymbol(atom.sym_index) orelse break :blk 0; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + break :blk @as(i32, @intCast(source_sym.n_value - source_sect.addr)); + }; + const relocs = Atom.getAtomRelocs(self, atom_index); + + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + if (rel_type != .ARM64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + .x86_64 => { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + if (rel_type != .X86_64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + else => unreachable, + } + const target = Atom.parseRelocTarget(self, atom_index, rel); + const target_sym = self.getSymbol(target); + if (target_sym.undf()) continue; + + const base_offset = @as(i32, @intCast(sym.n_value - segment.vmaddr)); + const rel_offset = rel.r_address - base_rel_offset; + const offset = @as(u64, @intCast(base_offset + rel_offset)); + log.debug(" | rebase at {x}", .{offset + segment.vmaddr}); + + try rebase.entries.append(gpa, .{ + .offset = offset, + .segment_id = segment_index, + }); + } + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + try rebase.finalize(gpa); +} + +fn collectBindDataFromContainer(self: *MachO, sect_id: u8, bind: *Bind, container: anytype) !void { + const slice = self.sections.slice(); + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(sect_id); + + const gpa = self.base.allocator; + try bind.entries.ensureUnusedCapacity(gpa, container.items.len); + + for (container.items) |entry| { + const bind_sym_name = entry.getTargetSymbolName(self); + const bind_sym = entry.getTargetSymbol(self); + if (bind_sym.sect()) continue; + + const sym = entry.getAtomSymbol(self); + const base_offset = sym.n_value - seg.vmaddr; + const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); + log.debug("bind at {x}, import('{s}') in dylib({d})", .{ + seg.vmaddr + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + } + bind.entries.appendAssumeCapacity(.{ + .target = entry.target, + .offset = base_offset, + .segment_id = segment_index, + .addend = 0, + }); + } +} + +fn collectBindData(self: *MachO, bind: *Bind) !void { + log.debug("collecting bind data", .{}); + + const gpa = self.base.allocator; + + // First, unpack GOT section + if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { + try self.collectBindDataFromContainer(sect_id, bind, self.got_entries); + } + + // Next, unpack TLV pointers section + if (self.getSectionByName("__DATA", "__thread_ptrs")) |sect_id| { + try self.collectBindDataFromContainer(sect_id, bind, self.tlv_ptr_entries); + } + + // Finally, unpack the rest. + const slice = self.sections.slice(); + for (slice.items(.header), 0..) |header, sect_id| { + switch (header.type()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => {}, + else => continue, + } + + const segment_index = slice.items(.segment_index)[sect_id]; + const segment = self.getSegment(@as(u8, @intCast(sect_id))); + if (segment.maxprot & macho.PROT.WRITE == 0) continue; + + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); + + const cpu_arch = self.options.target.cpu_arch.?; + var atom_index = slice.items(.first_atom_index)[sect_id]; + if (atom_index == 0) continue; + + while (true) { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + + log.debug(" ATOM({d}, %{d}, '{s}')", .{ atom_index, atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); + + const should_bind = blk: { + if (self.dyld_private_sym_index) |sym_index| { + if (atom.getFile() == null and atom.sym_index == sym_index) break :blk false; + } + break :blk true; + }; + + if (should_bind) { + const object = self.objects.items[atom.getFile().?]; + const base_rel_offset: i32 = blk: { + const source_sym = object.getSourceSymbol(atom.sym_index) orelse break :blk 0; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + break :blk @as(i32, @intCast(source_sym.n_value - source_sect.addr)); + }; + const relocs = Atom.getAtomRelocs(self, atom_index); + + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + if (rel_type != .ARM64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + .x86_64 => { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + if (rel_type != .X86_64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + else => unreachable, + } + + const global = Atom.parseRelocTarget(self, atom_index, rel); + const bind_sym_name = self.getSymbolName(global); + const bind_sym = self.getSymbol(global); + if (!bind_sym.undf()) continue; + + const base_offset = sym.n_value - segment.vmaddr; + const rel_offset = @as(u32, @intCast(rel.r_address - base_rel_offset)); + const offset = @as(u64, @intCast(base_offset + rel_offset)); + + const code = Atom.getAtomCode(self, atom_index); + const addend = mem.readIntLittle(i64, code[rel_offset..][0..8]); + + const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); + log.debug("bind at {x}, import('{s}') in dylib({d})", .{ + segment.vmaddr + offset, + bind_sym_name, + dylib_ordinal, + }); + log.debug(" | with addend {x}", .{addend}); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + } + try bind.entries.append(gpa, .{ + .target = global, + .offset = offset, + .segment_id = segment_index, + .addend = addend, + }); + } + } + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + try bind.finalize(gpa, self); +} + +fn collectLazyBindData(self: *MachO, lazy_bind: *LazyBind) !void { + const sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse return; + + log.debug("collecting lazy bind data", .{}); + + const slice = self.sections.slice(); + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(sect_id); + var atom_index = slice.items(.first_atom_index)[sect_id]; + + // TODO: we actually don't need to store lazy pointer atoms as they are synthetically generated by the linker + const gpa = self.base.allocator; + try lazy_bind.entries.ensureUnusedCapacity(gpa, self.stubs.items.len); + + var count: u32 = 0; + while (true) : (count += 1) { + const atom = self.getAtom(atom_index); + + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); + + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const base_offset = sym.n_value - seg.vmaddr; + + const stub_entry = self.stubs.items[count]; + const bind_sym = stub_entry.getTargetSymbol(self); + const bind_sym_name = stub_entry.getTargetSymbolName(self); + const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); + log.debug(" | lazy bind at {x}, import('{s}') in dylib({d})", .{ + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + } + lazy_bind.entries.appendAssumeCapacity(.{ + .offset = base_offset, + .segment_id = segment_index, + .target = stub_entry.target, + .addend = 0, + }); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + + try lazy_bind.finalize(gpa, self); +} + +fn collectExportData(self: *MachO, trie: *Trie) !void { + const gpa = self.base.allocator; + + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("collecting export data", .{}); + + const segment_index = self.getSegmentByName("__TEXT").?; + const exec_segment = self.segments.items[segment_index]; + const base_address = exec_segment.vmaddr; + + if (self.options.output_mode == .exe) { + for (&[_]SymbolWithLoc{ + self.getEntryPoint(), + self.globals.items[self.mh_execute_header_index.?], + }) |global| { + const sym = self.getSymbol(global); + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } else { + assert(self.options.output_mode == .lib); + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; + + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } + + try trie.finalize(gpa); +} + +fn writeDyldInfoData(self: *MachO) !void { + const gpa = self.base.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + try self.collectRebaseData(&rebase); + + var bind = Bind{}; + defer bind.deinit(gpa); + try self.collectBindData(&bind); + + var lazy_bind = LazyBind{}; + defer lazy_bind.deinit(gpa); + try self.collectLazyBindData(&lazy_bind); + + var trie = Trie{}; + defer trie.deinit(gpa); + try self.collectExportData(&trie); + + const link_seg = self.getLinkeditSegmentPtr(); + assert(mem.isAlignedGeneric(u64, link_seg.fileoff, @alignOf(u64))); + const rebase_off = link_seg.fileoff; + const rebase_size = rebase.size(); + const rebase_size_aligned = mem.alignForwardGeneric(u64, rebase_size, @alignOf(u64)); + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size_aligned }); + + const bind_off = rebase_off + rebase_size_aligned; + const bind_size = bind.size(); + const bind_size_aligned = mem.alignForwardGeneric(u64, bind_size, @alignOf(u64)); + log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size_aligned }); + + const lazy_bind_off = bind_off + bind_size_aligned; + const lazy_bind_size = lazy_bind.size(); + const lazy_bind_size_aligned = mem.alignForwardGeneric(u64, lazy_bind_size, @alignOf(u64)); + log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ + lazy_bind_off, + lazy_bind_off + lazy_bind_size_aligned, + }); + + const export_off = lazy_bind_off + lazy_bind_size_aligned; + const export_size = trie.size; + const export_size_aligned = mem.alignForwardGeneric(u64, export_size, @alignOf(u64)); + log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size_aligned }); + + const needed_size = export_off + export_size_aligned - rebase_off; + link_seg.filesize = needed_size; + assert(mem.isAlignedGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64))); + + var buffer = try gpa.alloc(u8, needed_size); + defer gpa.free(buffer); + mem.set(u8, buffer, 0); + + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + try rebase.write(writer); + try stream.seekTo(bind_off - rebase_off); + + try bind.write(writer); + try stream.seekTo(lazy_bind_off - rebase_off); + + try lazy_bind.write(writer); + try stream.seekTo(export_off - rebase_off); + + _ = try trie.write(writer); + + log.debug("writing dyld info from 0x{x} to 0x{x}", .{ + rebase_off, + rebase_off + needed_size, + }); + + try self.base.file.pwriteAll(buffer, rebase_off); + try self.populateLazyBindOffsetsInStubHelper(lazy_bind); + + self.dyld_info_cmd.rebase_off = @as(u32, @intCast(rebase_off)); + self.dyld_info_cmd.rebase_size = @as(u32, @intCast(rebase_size_aligned)); + self.dyld_info_cmd.bind_off = @as(u32, @intCast(bind_off)); + self.dyld_info_cmd.bind_size = @as(u32, @intCast(bind_size_aligned)); + self.dyld_info_cmd.lazy_bind_off = @as(u32, @intCast(lazy_bind_off)); + self.dyld_info_cmd.lazy_bind_size = @as(u32, @intCast(lazy_bind_size_aligned)); + self.dyld_info_cmd.export_off = @as(u32, @intCast(export_off)); + self.dyld_info_cmd.export_size = @as(u32, @intCast(export_size_aligned)); +} + +fn populateLazyBindOffsetsInStubHelper(self: *MachO, lazy_bind: LazyBind) !void { + if (lazy_bind.size() == 0) return; + + const stub_helper_section_index = self.getSectionByName("__TEXT", "__stub_helper").?; + assert(self.stub_helper_preamble_sym_index != null); + + const section = self.sections.get(stub_helper_section_index); + const stub_offset: u4 = switch (self.options.target.cpu_arch.?) { + .x86_64 => 1, + .aarch64 => 2 * @sizeOf(u32), + else => unreachable, + }; + const header = section.header; + var atom_index = section.first_atom_index; + atom_index = self.getAtom(atom_index).next_index.?; // skip preamble + + var index: usize = 0; + while (true) { + const atom = self.getAtom(atom_index); + const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); + const file_offset = header.offset + atom_sym.n_value - header.addr + stub_offset; + const bind_offset = lazy_bind.offsets.items[index]; + + log.debug("writing lazy bind offset 0x{x} in stub helper at 0x{x}", .{ bind_offset, file_offset }); + + try self.base.file.pwriteAll(mem.asBytes(&bind_offset), file_offset); + + if (atom.next_index) |next_index| { + atom_index = next_index; + index += 1; + } else break; + } +} + +const asc_u64 = std.sort.asc(u64); + +fn writeFunctionStarts(self: *MachO) !void { + const text_seg_index = self.getSegmentByName("__TEXT") orelse return; + const text_sect_index = self.getSectionByName("__TEXT", "__text") orelse return; + const text_seg = self.segments.items[text_seg_index]; + + const gpa = self.base.allocator; + + // We need to sort by address first + var addresses = std.ArrayList(u64).init(gpa); + defer addresses.deinit(); + try addresses.ensureTotalCapacityPrecise(self.globals.items.len); + + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; + + const sect_id = sym.n_sect - 1; + if (sect_id != text_sect_index) continue; + + addresses.appendAssumeCapacity(sym.n_value); + } + + std.sort.sort(u64, addresses.items, {}, asc_u64); + + var offsets = std.ArrayList(u32).init(gpa); + defer offsets.deinit(); + try offsets.ensureTotalCapacityPrecise(addresses.items.len); + + var last_off: u32 = 0; + for (addresses.items) |addr| { + const offset = @as(u32, @intCast(addr - text_seg.vmaddr)); + const diff = offset - last_off; + + if (diff == 0) continue; + + offsets.appendAssumeCapacity(diff); + last_off = offset; + } + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + + const max_size = @as(usize, @intCast(offsets.items.len * @sizeOf(u64))); + try buffer.ensureTotalCapacity(max_size); + + for (offsets.items) |offset| { + try std.leb.writeULEB128(buffer.writer(), offset); + } + + const link_seg = self.getLinkeditSegmentPtr(); + const offset = link_seg.fileoff + link_seg.filesize; + assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); + const needed_size = buffer.items.len; + const needed_size_aligned = mem.alignForwardGeneric(u64, needed_size, @alignOf(u64)); + const padding = needed_size_aligned - needed_size; + if (padding > 0) { + try buffer.ensureUnusedCapacity(padding); + buffer.appendNTimesAssumeCapacity(0, padding); + } + link_seg.filesize = offset + needed_size_aligned - link_seg.fileoff; + + log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); + + try self.base.file.pwriteAll(buffer.items, offset); + + self.function_starts_cmd.dataoff = @as(u32, @intCast(offset)); + self.function_starts_cmd.datasize = @as(u32, @intCast(needed_size_aligned)); +} + +fn filterDataInCode( + dices: []const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, +) []const macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= self.addr; + } + }; + + const start = MachO.lsearch(macho.data_in_code_entry, dices, Predicate{ .addr = start_addr }); + const end = MachO.lsearch(macho.data_in_code_entry, dices[start..], Predicate{ .addr = end_addr }) + start; + + return dices[start..end]; +} + +fn writeDataInCode(self: *MachO) !void { + var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); + defer out_dice.deinit(); + + const text_sect_id = self.getSectionByName("__TEXT", "__text") orelse return; + const text_sect_header = self.sections.items(.header)[text_sect_id]; + + for (self.objects.items) |object| { + if (!object.hasDataInCode()) continue; + const dice = object.data_in_code.items; + try out_dice.ensureUnusedCapacity(dice.len); + + for (object.exec_atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + if (sym.n_desc == N_DEAD) continue; + + const source_addr = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_value + else blk: { + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const source_sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + break :blk object.getSourceSection(source_sect_id).addr; + }; + const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); + const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse + return error.Overflow; + + for (filtered_dice) |single| { + const offset = math.cast(u32, single.offset - source_addr + base) orelse + return error.Overflow; + out_dice.appendAssumeCapacity(.{ + .offset = offset, + .length = single.length, + .kind = single.kind, + }); + } + } + } + + const seg = self.getLinkeditSegmentPtr(); + const offset = seg.fileoff + seg.filesize; + assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); + const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); + const needed_size_aligned = mem.alignForwardGeneric(u64, needed_size, @alignOf(u64)); + seg.filesize = offset + needed_size_aligned - seg.fileoff; + + const buffer = try self.base.allocator.alloc(u8, needed_size_aligned); + defer self.base.allocator.free(buffer); + mem.set(u8, buffer, 0); + mem.copy(u8, buffer, mem.sliceAsBytes(out_dice.items)); + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); + + try self.base.file.pwriteAll(buffer, offset); + + self.data_in_code_cmd.dataoff = @as(u32, @intCast(offset)); + self.data_in_code_cmd.datasize = @as(u32, @intCast(needed_size_aligned)); +} + +fn writeSymtabs(self: *MachO) !void { + var ctx = try self.writeSymtab(); + defer ctx.imports_table.deinit(); + try self.writeDysymtab(ctx); + try self.writeStrtab(); +} + +fn writeSymtab(self: *MachO) !SymtabCtx { + const gpa = self.base.allocator; + + var locals = std.ArrayList(macho.nlist_64).init(gpa); + defer locals.deinit(); + + for (self.objects.items) |object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + const sym = self.getSymbol(sym_loc); + if (sym.n_strx == 0) continue; // no name, skip + if (sym.ext()) continue; // an export lands in its own symtab section, skip + if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); + try locals.append(out_sym); + } + } + + var exports = std.ArrayList(macho.nlist_64).init(gpa); + defer exports.deinit(); + + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; // import, skip + if (sym.n_desc == N_DEAD) continue; + + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); + try exports.append(out_sym); + } + + var imports = std.ArrayList(macho.nlist_64).init(gpa); + defer imports.deinit(); + + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); + + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (!sym.undf()) continue; // not an import, skip + if (sym.n_desc == N_DEAD) continue; + + const new_index = @as(u32, @intCast(imports.items.len)); + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); + try imports.append(out_sym); + try imports_table.putNoClobber(global, new_index); + } + + // We generate stabs last in order to ensure that the strtab always has debug info + // strings trailing + if (!self.options.strip) { + for (self.objects.items) |object| { + try self.generateSymbolStabs(object, &locals); + } + } + + const nlocals = @as(u32, @intCast(locals.items.len)); + const nexports = @as(u32, @intCast(exports.items.len)); + const nimports = @as(u32, @intCast(imports.items.len)); + const nsyms = nlocals + nexports + nimports; + + const seg = self.getLinkeditSegmentPtr(); + const offset = seg.fileoff + seg.filesize; + assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); + const needed_size = nsyms * @sizeOf(macho.nlist_64); + seg.filesize = offset + needed_size - seg.fileoff; + assert(mem.isAlignedGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64))); + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(needed_size); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); + + log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + try self.base.file.pwriteAll(buffer.items, offset); + + self.symtab_cmd.symoff = @as(u32, @intCast(offset)); + self.symtab_cmd.nsyms = nsyms; + + return SymtabCtx{ + .nlocalsym = nlocals, + .nextdefsym = nexports, + .nundefsym = nimports, + .imports_table = imports_table, + }; +} + +fn writeStrtab(self: *MachO) !void { + const seg = self.getLinkeditSegmentPtr(); + const offset = seg.fileoff + seg.filesize; + assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); + const needed_size = self.strtab.buffer.items.len; + const needed_size_aligned = mem.alignForwardGeneric(u64, needed_size, @alignOf(u64)); + seg.filesize = offset + needed_size_aligned - seg.fileoff; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); + + const buffer = try self.base.allocator.alloc(u8, needed_size_aligned); + defer self.base.allocator.free(buffer); + mem.set(u8, buffer, 0); + mem.copy(u8, buffer, self.strtab.buffer.items); + + try self.base.file.pwriteAll(buffer, offset); + + self.symtab_cmd.stroff = @as(u32, @intCast(offset)); + self.symtab_cmd.strsize = @as(u32, @intCast(needed_size_aligned)); +} + +const SymtabCtx = struct { + nlocalsym: u32, + nextdefsym: u32, + nundefsym: u32, + imports_table: std.AutoHashMap(SymbolWithLoc, u32), +}; + +fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void { + const gpa = self.base.allocator; + const nstubs = @as(u32, @intCast(self.stubs.items.len)); + const ngot_entries = @as(u32, @intCast(self.got_entries.items.len)); + const nindirectsyms = nstubs * 2 + ngot_entries; + const iextdefsym = ctx.nlocalsym; + const iundefsym = iextdefsym + ctx.nextdefsym; + + const seg = self.getLinkeditSegmentPtr(); + const offset = seg.fileoff + seg.filesize; + assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); + const needed_size = nindirectsyms * @sizeOf(u32); + const needed_size_aligned = mem.alignForwardGeneric(u64, needed_size, @alignOf(u64)); + seg.filesize = offset + needed_size_aligned - seg.fileoff; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); + + var buf = std.ArrayList(u8).init(gpa); + defer buf.deinit(); + try buf.ensureTotalCapacity(needed_size_aligned); + const writer = buf.writer(); + + if (self.getSectionByName("__TEXT", "__stubs")) |sect_id| { + const stubs = &self.sections.items(.header)[sect_id]; + stubs.reserved1 = 0; + for (self.stubs.items) |entry| { + const target_sym = entry.getTargetSymbol(self); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } + } + + if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { + const got = &self.sections.items(.header)[sect_id]; + got.reserved1 = nstubs; + for (self.got_entries.items) |entry| { + const target_sym = entry.getTargetSymbol(self); + if (target_sym.undf()) { + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } else { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + } + } + } + + if (self.getSectionByName("__DATA", "__la_symbol_ptr")) |sect_id| { + const la_symbol_ptr = &self.sections.items(.header)[sect_id]; + la_symbol_ptr.reserved1 = nstubs + ngot_entries; + for (self.stubs.items) |entry| { + const target_sym = entry.getTargetSymbol(self); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } + } + + const padding = needed_size_aligned - needed_size; + if (padding > 0) { + buf.appendNTimesAssumeCapacity(0, padding); + } + + assert(buf.items.len == needed_size_aligned); + + try self.base.file.pwriteAll(buf.items, offset); + + self.dysymtab_cmd.nlocalsym = ctx.nlocalsym; + self.dysymtab_cmd.iextdefsym = iextdefsym; + self.dysymtab_cmd.nextdefsym = ctx.nextdefsym; + self.dysymtab_cmd.iundefsym = iundefsym; + self.dysymtab_cmd.nundefsym = ctx.nundefsym; + self.dysymtab_cmd.indirectsymoff = @as(u32, @intCast(offset)); + self.dysymtab_cmd.nindirectsyms = nindirectsyms; +} + +fn writeUuid(self: *MachO, args: struct { + linkedit_cmd_offset: u32, + symtab_cmd_offset: u32, + uuid_cmd_offset: u32, + codesig_cmd_offset: ?u32, +}) !void { + // We set the max file size to the actual strtab buffer length to exclude any strtab padding. + const max_file_end = @as(u32, @intCast(self.symtab_cmd.stroff + self.strtab.buffer.items.len)); + + const FileSubsection = struct { + start: u32, + end: u32, + }; + + var subsections: [5]FileSubsection = undefined; + var count: usize = 0; + + // Exclude LINKEDIT segment command as it contains file size that includes stabs contribution + // and code signature. + subsections[count] = .{ + .start = 0, + .end = args.linkedit_cmd_offset, + }; + count += 1; + + // Exclude SYMTAB and DYSYMTAB commands for the same reason. + subsections[count] = .{ + .start = subsections[count - 1].end + @sizeOf(macho.segment_command_64), + .end = args.symtab_cmd_offset, + }; + count += 1; + + // Exclude CODE_SIGNATURE command (if present). + if (args.codesig_cmd_offset) |offset| { + subsections[count] = .{ + .start = subsections[count - 1].end + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), + .end = offset, + }; + count += 1; + } + + if (!self.options.strip) { + // Exclude region comprising all symbol stabs. + const nlocals = self.dysymtab_cmd.nlocalsym; + + const locals = try self.base.allocator.alloc(macho.nlist_64, nlocals); + defer self.base.allocator.free(locals); + + const locals_buf = @as([*]u8, @ptrCast(locals.ptr))[0 .. @sizeOf(macho.nlist_64) * nlocals]; + const amt = try self.base.file.preadAll(locals_buf, self.symtab_cmd.symoff); + if (amt != locals_buf.len) return error.InputOutput; + + const istab: usize = for (locals, 0..) |local, i| { + if (local.stab()) break i; + } else locals.len; + const nstabs = locals.len - istab; + + if (nstabs == 0) { + subsections[count] = .{ + .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) + @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) + else + @sizeOf(macho.linkedit_data_command), + .end = max_file_end, + }; + count += 1; + } else { + // Exclude a subsection of the strtab with names of the stabs. + // We do not care about anything succeeding strtab as it is the code signature data which is + // not part of the UUID calculation anyway. + const stab_stroff = locals[istab].n_strx; + + subsections[count] = .{ + .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) + @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) + else + @sizeOf(macho.linkedit_data_command), + .end = @as(u32, @intCast(self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64))), + }; + count += 1; + + subsections[count] = .{ + .start = subsections[count - 1].end + @as(u32, @intCast(nstabs * @sizeOf(macho.nlist_64))), + .end = self.symtab_cmd.stroff + stab_stroff, + }; + count += 1; + } + } else { + subsections[count] = .{ + .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) + @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) + else + @sizeOf(macho.linkedit_data_command), + .end = max_file_end, + }; + count += 1; + } + + const chunk_size = 0x4000; + + var hasher = Md5.init(.{}); + var buffer: [chunk_size]u8 = undefined; + + for (subsections[0..count]) |cut| { + const size = cut.end - cut.start; + const num_chunks = mem.alignForward(size, chunk_size) / chunk_size; + + var i: usize = 0; + while (i < num_chunks) : (i += 1) { + const fstart = cut.start + i * chunk_size; + const fsize = if (fstart + chunk_size > cut.end) + cut.end - fstart + else + chunk_size; + const amt = try self.base.file.preadAll(buffer[0..fsize], fstart); + if (amt != fsize) return error.InputOutput; + + hasher.update(buffer[0..fsize]); + } + } + + hasher.final(&self.uuid_cmd.uuid); + conformUuid(&self.uuid_cmd.uuid); + + const in_file = args.uuid_cmd_offset + @sizeOf(macho.load_command); + try self.base.file.pwriteAll(&self.uuid_cmd.uuid, in_file); +} + +inline fn conformUuid(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; +} + +fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { + const seg = self.getLinkeditSegmentPtr(); + // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file + // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); + const needed_size = code_sig.estimateSize(offset); + seg.filesize = offset + needed_size - seg.fileoff; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + // Pad out the space. We need to do this to calculate valid hashes for everything in the file + // except for code signature data. + try self.base.file.pwriteAll(&[_]u8{0}, offset + needed_size - 1); + + self.codesig_cmd.dataoff = @as(u32, @intCast(offset)); + self.codesig_cmd.datasize = @as(u32, @intCast(needed_size)); +} + +fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { + const seg_id = self.getSegmentByName("__TEXT").?; + const seg = self.segments.items[seg_id]; + + var buffer = std.ArrayList(u8).init(self.base.allocator); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(code_sig.size()); + try code_sig.writeAdhocSignature(self, .{ + .file = self.base.file, + .exec_seg_base = seg.fileoff, + .exec_seg_limit = seg.filesize, + .file_size = self.codesig_cmd.dataoff, + .output_mode = self.options.output_mode, + }, buffer.writer()); + assert(buffer.items.len == code_sig.size()); + + log.debug("writing code signature from 0x{x} to 0x{x}", .{ + self.codesig_cmd.dataoff, + self.codesig_cmd.dataoff + buffer.items.len, + }); + + try self.base.file.pwriteAll(buffer.items, self.codesig_cmd.dataoff); +} + +/// Writes Mach-O file header. +fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { + var header: macho.mach_header_64 = .{}; + header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; + + switch (self.options.target.cpu_arch.?) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => return error.UnsupportedCpuArchitecture, + } + + switch (self.options.output_mode) { + .exe => { + header.filetype = macho.MH_EXECUTE; + }, + .lib => { + // By this point, it can only be a dylib. + header.filetype = macho.MH_DYLIB; + header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; + }, + } + + if (self.getSectionByName("__DATA", "__thread_vars")) |sect_id| { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + if (self.sections.items(.header)[sect_id].size > 0) { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + } + } + + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; + + log.debug("writing Mach-O header {}", .{header}); + + try self.base.file.pwriteAll(mem.asBytes(&header), 0); +} + +pub fn makeStaticString(bytes: []const u8) [16]u8 { + var buf = [_]u8{0} ** 16; + assert(bytes.len <= buf.len); + mem.copy(u8, &buf, bytes); + return buf; +} + +pub fn getAtomPtr(self: *MachO, atom_index: AtomIndex) *Atom { + assert(atom_index < self.atoms.items.len); + return &self.atoms.items[atom_index]; +} + +pub fn getAtom(self: MachO, atom_index: AtomIndex) Atom { + assert(atom_index < self.atoms.items.len); + return self.atoms.items[atom_index]; +} + +fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { + for (self.segments.items, 0..) |seg, i| { + if (mem.eql(u8, segname, seg.segName())) return @as(u8, @intCast(i)); + } else return null; +} + +pub fn getSegment(self: MachO, sect_id: u8) macho.segment_command_64 { + const index = self.sections.items(.segment_index)[sect_id]; + return self.segments.items[index]; +} + +pub fn getSegmentPtr(self: *MachO, sect_id: u8) *macho.segment_command_64 { + const index = self.sections.items(.segment_index)[sect_id]; + return &self.segments.items[index]; +} + +pub fn getLinkeditSegmentPtr(self: *MachO) *macho.segment_command_64 { + assert(self.segments.items.len > 0); + const seg = &self.segments.items[self.segments.items.len - 1]; + assert(mem.eql(u8, seg.segName(), "__LINKEDIT")); + return seg; +} + +pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) ?u8 { + // TODO investigate caching with a hashmap + for (self.sections.items(.header), 0..) |header, i| { + if (mem.eql(u8, header.segName(), segname) and mem.eql(u8, header.sectName(), sectname)) + return @as(u8, @intCast(i)); + } else return null; +} + +pub fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { + var start: u8 = 0; + const nsects = for (self.segments.items, 0..) |seg, i| { + if (i == segment_index) break @as(u8, @intCast(seg.nsects)); + start += @as(u8, @intCast(seg.nsects)); + } else 0; + return .{ .start = start, .end = start + nsects }; +} + +pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { + const sym = self.getSymbol(sym_with_loc); + if (!sym.sect()) return false; + if (sym.ext()) return false; + const sym_name = self.getSymbolName(sym_with_loc); + return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); +} + +/// Returns pointer-to-symbol described by `sym_with_loc` descriptor. +pub fn getSymbolPtr(self: *MachO, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { + if (sym_with_loc.getFile()) |file| { + const object = &self.objects.items[file]; + return &object.symtab[sym_with_loc.sym_index]; + } else { + return &self.locals.items[sym_with_loc.sym_index]; + } +} + +/// Returns symbol described by `sym_with_loc` descriptor. +pub fn getSymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { + if (sym_with_loc.getFile()) |file| { + const object = &self.objects.items[file]; + return object.symtab[sym_with_loc.sym_index]; + } else { + return self.locals.items[sym_with_loc.sym_index]; + } +} + +/// Returns name of the symbol described by `sym_with_loc` descriptor. +pub fn getSymbolName(self: *const MachO, sym_with_loc: SymbolWithLoc) []const u8 { + if (sym_with_loc.getFile()) |file| { + const object = self.objects.items[file]; + return object.getSymbolName(sym_with_loc.sym_index); + } else { + const sym = self.locals.items[sym_with_loc.sym_index]; + return self.strtab.get(sym.n_strx).?; + } +} + +/// Returns GOT atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getGotAtomIndexForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?AtomIndex { + const index = self.got_table.get(sym_with_loc) orelse return null; + const entry = self.got_entries.items[index]; + return entry.atom_index; +} + +/// Returns stubs atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getStubsAtomIndexForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?AtomIndex { + const index = self.stubs_table.get(sym_with_loc) orelse return null; + const entry = self.stubs.items[index]; + return entry.atom_index; +} + +/// Returns TLV pointer atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getTlvPtrAtomIndexForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?AtomIndex { + const index = self.tlv_ptr_table.get(sym_with_loc) orelse return null; + const entry = self.tlv_ptr_entries.items[index]; + return entry.atom_index; +} + +/// Returns symbol location corresponding to the set entrypoint. +/// Asserts output mode is executable. +pub fn getEntryPoint(self: MachO) SymbolWithLoc { + assert(self.options.output_mode == .exe); + const global_index = self.entry_index.?; + return self.globals.items[global_index]; +} + +inline fn requiresThunks(self: MachO) bool { + return self.options.target.cpu_arch.? == .aarch64; +} + +/// Binary search +pub fn bsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + var min: usize = 0; + var max: usize = haystack.len; + while (min < max) { + const index = (min + max) / 2; + const curr = haystack[index]; + if (predicate.predicate(curr)) { + min = index + 1; + } else { + max = index; + } + } + return min; +} + +/// Linear search +pub fn lsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + var i: usize = 0; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; + } + return i; +} + +pub fn generateSymbolStabs(self: *MachO, object: Object, locals: *std.ArrayList(macho.nlist_64)) !void { + assert(!self.options.strip); + + log.debug("generating stabs for '{s}'", .{object.name}); + + const gpa = self.base.allocator; + var debug_info = object.parseDwarfInfo(); + + var lookup = DwarfInfo.AbbrevLookupTable.init(gpa); + defer lookup.deinit(); + try lookup.ensureUnusedCapacity(std.math.maxInt(u8)); + + // We assume there is only one CU. + var cu_it = debug_info.getCompileUnitIterator(); + const compile_unit = while (try cu_it.next()) |cu| { + try debug_info.genAbbrevLookupByKind(cu.cuh.debug_abbrev_offset, &lookup); + break cu; + } else { + log.debug("no compile unit found in debug info in {s}; skipping", .{object.name}); + return; + }; + + var abbrev_it = compile_unit.getAbbrevEntryIterator(debug_info); + const cu_entry: DwarfInfo.AbbrevEntry = while (try abbrev_it.next(lookup)) |entry| switch (entry.tag) { + dwarf.TAG.compile_unit => break entry, + else => continue, + } else { + log.debug("missing DWARF_TAG_compile_unit tag in {s}; skipping", .{object.name}); + return; + }; + + var maybe_tu_name: ?[]const u8 = null; + var maybe_tu_comp_dir: ?[]const u8 = null; + var attr_it = cu_entry.getAttributeIterator(debug_info, compile_unit.cuh); + + while (try attr_it.next()) |attr| switch (attr.name) { + dwarf.AT.comp_dir => maybe_tu_comp_dir = attr.getString(debug_info, compile_unit.cuh) orelse continue, + dwarf.AT.name => maybe_tu_name = attr.getString(debug_info, compile_unit.cuh) orelse continue, + else => continue, + }; + + if (maybe_tu_name == null or maybe_tu_comp_dir == null) { + log.debug("missing DWARF_AT_comp_dir and DWARF_AT_name attributes {s}; skipping", .{object.name}); + return; + } + + const tu_name = maybe_tu_name.?; + const tu_comp_dir = maybe_tu_comp_dir.?; + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_comp_dir), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_name), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime, + }); + + var stabs_buf: [4]macho.nlist_64 = undefined; + + var name_lookup: ?DwarfInfo.SubprogramLookupByName = if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS == 0) blk: { + var name_lookup = DwarfInfo.SubprogramLookupByName.init(gpa); + errdefer name_lookup.deinit(); + try name_lookup.ensureUnusedCapacity(@as(u32, @intCast(object.atoms.items.len))); + try debug_info.genSubprogramLookupByName(compile_unit, lookup, &name_lookup); + break :blk name_lookup; + } else null; + defer if (name_lookup) |*nl| nl.deinit(); + + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const stabs = try self.generateSymbolStabsForSymbol( + atom_index, + atom.getSymbolWithLoc(), + name_lookup, + &stabs_buf, + ); + try locals.appendSlice(stabs); + + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |sym_loc| { + const contained_stabs = try self.generateSymbolStabsForSymbol( + atom_index, + sym_loc, + name_lookup, + &stabs_buf, + ); + try locals.appendSlice(contained_stabs); + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); +} + +fn generateSymbolStabsForSymbol( + self: *MachO, + atom_index: AtomIndex, + sym_loc: SymbolWithLoc, + lookup: ?DwarfInfo.SubprogramLookupByName, + buf: *[4]macho.nlist_64, +) ![]const macho.nlist_64 { + const gpa = self.base.allocator; + const object = self.objects.items[sym_loc.getFile().?]; + const sym = self.getSymbol(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + const header = self.sections.items(.header)[sym.n_sect - 1]; + + if (sym.n_strx == 0) return buf[0..0]; + if (self.symbolIsTemp(sym_loc)) return buf[0..0]; + + if (!header.isCode()) { + // Since we are not dealing with machine code, it's either a global or a static depending + // on the linkage scope. + if (sym.sect() and sym.ext()) { + // Global gets an N_GSYM stab type. + buf[0] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_GSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = 0, + }; + } else { + // Local static gets an N_STSYM stab type. + buf[0] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + } + return buf[0..1]; + } + + const size: u64 = size: { + if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { + break :size self.getAtom(atom_index).size; + } + + // Since we don't have subsections to work with, we need to infer the size of each function + // the slow way by scanning the debug info for matching symbol names and extracting + // the symbol's DWARF_AT_low_pc and DWARF_AT_high_pc values. + const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; + const subprogram = lookup.?.get(sym_name[1..]) orelse return buf[0..0]; + + if (subprogram.addr <= source_sym.n_value and source_sym.n_value < subprogram.addr + subprogram.size) { + break :size subprogram.size; + } else { + log.debug("no stab found for {s}", .{sym_name}); + return buf[0..0]; + } + }; + + buf[0] = .{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[1] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[2] = .{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }; + buf[3] = .{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = size, + }; + + return buf; +} + +fn logSegments(self: *MachO) void { + log.debug("segments:", .{}); + for (self.segments.items, 0..) |segment, i| { + log.debug(" segment({d}): {s} @{x} ({x}), sizeof({x})", .{ + i, + segment.segName(), + segment.fileoff, + segment.vmaddr, + segment.vmsize, + }); + } +} + +fn logSections(self: *MachO) void { + log.debug("sections:", .{}); + for (self.sections.items(.header), 0..) |header, i| { + log.debug(" sect({d}): {s},{s} @{x} ({x}), sizeof({x})", .{ + i + 1, + header.segName(), + header.sectName(), + header.offset, + header.addr, + header.size, + }); + } +} + +fn logSymAttributes(sym: macho.nlist_64, buf: []u8) []const u8 { + if (sym.sect()) { + buf[0] = 's'; + } + if (sym.ext()) { + if (sym.weakDef() or sym.pext()) { + buf[1] = 'w'; + } else { + buf[1] = 'e'; + } + } + if (sym.tentative()) { + buf[2] = 't'; + } + if (sym.undf()) { + buf[3] = 'u'; + } + return buf[0..]; +} + +fn logSymtab(self: *MachO) void { + var buf: [4]u8 = undefined; + + const scoped_log = std.log.scoped(.symtab); + + scoped_log.debug("locals:", .{}); + for (self.objects.items, 0..) |object, id| { + scoped_log.debug(" object({d}): {s}", .{ id, object.name }); + for (object.symtab, 0..) |sym, sym_id| { + if (object.in_symtab == null) continue; + mem.set(u8, &buf, '_'); + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ + sym_id, + object.getSymbolName(@as(u32, @intCast(sym_id))), + sym.n_value, + sym.n_sect, + logSymAttributes(sym, &buf), + }); + } + } + + scoped_log.debug(" object(-1)", .{}); + for (self.locals.items, 0..) |sym, sym_id| { + if (sym.undf()) continue; + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ + sym_id, + self.strtab.get(sym.n_strx).?, + sym.n_value, + sym.n_sect, + logSymAttributes(sym, &buf), + }); + } + + scoped_log.debug("exports:", .{}); + for (self.globals.items, 0..) |global, i| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s} (def in object({?}))", .{ + i, + self.getSymbolName(global), + sym.n_value, + sym.n_sect, + logSymAttributes(sym, &buf), + global.getFile(), + }); + } + + scoped_log.debug("imports:", .{}); + for (self.globals.items, 0..) |global, i| { + const sym = self.getSymbol(global); + if (!sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; + const ord = @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER); + scoped_log.debug(" %{d}: {s} @{x} in ord({d}), {s}", .{ + i, + self.getSymbolName(global), + sym.n_value, + ord, + logSymAttributes(sym, &buf), + }); + } + + scoped_log.debug("GOT entries:", .{}); + for (self.got_entries.items, 0..) |entry, i| { + const atom_sym = entry.getAtomSymbol(self); + const target_sym = entry.getTargetSymbol(self); + const target_sym_name = entry.getTargetSymbolName(self); + if (target_sym.undf()) { + scoped_log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + target_sym_name, + }); + } else { + scoped_log.debug(" {d}@{x} => local(%{d}) in object({?}) {s}", .{ + i, + atom_sym.n_value, + entry.target.sym_index, + entry.target.getFile(), + logSymAttributes(target_sym, buf[0..4]), + }); + } + } + + scoped_log.debug("__thread_ptrs entries:", .{}); + for (self.tlv_ptr_entries.items, 0..) |entry, i| { + const atom_sym = entry.getAtomSymbol(self); + const target_sym = entry.getTargetSymbol(self); + const target_sym_name = entry.getTargetSymbolName(self); + assert(target_sym.undf()); + scoped_log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + target_sym_name, + }); + } + + scoped_log.debug("stubs entries:", .{}); + for (self.stubs.items, 0..) |entry, i| { + const atom_sym = entry.getAtomSymbol(self); + const target_sym = entry.getTargetSymbol(self); + const target_sym_name = entry.getTargetSymbolName(self); + assert(target_sym.undf()); + scoped_log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + target_sym_name, + }); + } + + scoped_log.debug("thunks:", .{}); + for (self.thunks.items, 0..) |thunk, i| { + scoped_log.debug(" thunk({d})", .{i}); + for (thunk.lookup.keys(), 0..) |target, j| { + const target_sym = self.getSymbol(target); + const atom = self.getAtom(thunk.lookup.get(target).?); + const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); + scoped_log.debug(" {d}@{x} => thunk('{s}'@{x})", .{ + j, + atom_sym.n_value, + self.getSymbolName(target), + target_sym.n_value, + }); + } + } +} + +fn logAtoms(self: *MachO) void { + log.debug("atoms:", .{}); + const slice = self.sections.slice(); + for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { + var atom_index = first_atom_index; + if (atom_index == 0) continue; + + const header = slice.items(.header)[sect_id]; + + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); + + while (true) { + const atom = self.getAtom(atom_index); + self.logAtom(atom_index, log); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } +} + +pub fn logAtom(self: *MachO, atom_index: AtomIndex, logger: anytype) void { + if (!build_options.enable_logging) return; + + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const sym_name = self.getSymbolName(atom.getSymbolWithLoc()); + logger.debug(" ATOM(%{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({?}) in sect({d})", .{ + atom.sym_index, + sym_name, + sym.n_value, + atom.size, + atom.alignment, + atom.getFile(), + sym.n_sect, + }); + + if (atom.getFile() != null) { + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |sym_loc| { + const inner = self.getSymbol(sym_loc); + const inner_name = self.getSymbolName(sym_loc); + const offset = Atom.calcInnerSymbolOffset(self, atom_index, sym_loc.sym_index); + + logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ + sym_loc.sym_index, + inner_name, + inner.n_value, + offset, + }); + } + + if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { + const alias = self.getSymbol(sym_loc); + const alias_name = self.getSymbolName(sym_loc); + + logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ + sym_loc.sym_index, + alias_name, + alias.n_value, + 0, + }); + } + } +} diff --git a/src/archive/archive/zld/MachO/Archive.zig b/src/archive/archive/zld/MachO/Archive.zig new file mode 100644 index 000000000000..e8f9e9316026 --- /dev/null +++ b/src/archive/archive/zld/MachO/Archive.zig @@ -0,0 +1,232 @@ +const Archive = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.macho); +const macho = std.macho; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Object = @import("Object.zig"); + +file: fs.File, +fat_offset: u64, +name: []const u8, +header: ar_hdr = undefined, + +/// Parsed table of contents. +/// Each symbol name points to a list of all definition +/// sites within the current static archive. +toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{}, + +// Archive files start with the ARMAG identifying string. Then follows a +// `struct ar_hdr', and as many bytes of member file data as its `ar_size' +// member indicates, for each member file. +/// String that begins an archive file. +const ARMAG: *const [SARMAG:0]u8 = "!\n"; +/// Size of that string. +const SARMAG: u4 = 8; + +/// String in ar_fmag at the end of each header. +const ARFMAG: *const [2:0]u8 = "`\n"; + +const ar_hdr = extern struct { + /// Member file name, sometimes / terminated. + ar_name: [16]u8, + + /// File date, decimal seconds since Epoch. + ar_date: [12]u8, + + /// User ID, in ASCII format. + ar_uid: [6]u8, + + /// Group ID, in ASCII format. + ar_gid: [6]u8, + + /// File mode, in ASCII octal. + ar_mode: [8]u8, + + /// File size, in ASCII decimal. + ar_size: [10]u8, + + /// Always contains ARFMAG. + ar_fmag: [2]u8, + + const NameOrLength = union(enum) { + Name: []const u8, + Length: u32, + }; + fn nameOrLength(self: ar_hdr) !NameOrLength { + const value = getValue(&self.ar_name); + const slash_index = mem.indexOf(u8, value, "/") orelse return error.MalformedArchive; + const len = value.len; + if (slash_index == len - 1) { + // Name stored directly + return NameOrLength{ .Name = value }; + } else { + // Name follows the header directly and its length is encoded in + // the name field. + const length = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10); + return NameOrLength{ .Length = length }; + } + } + + fn date(self: ar_hdr) !u64 { + const value = getValue(&self.ar_date); + return std.fmt.parseInt(u64, value, 10); + } + + fn size(self: ar_hdr) !u32 { + const value = getValue(&self.ar_size); + return std.fmt.parseInt(u32, value, 10); + } + + fn getValue(raw: []const u8) []const u8 { + return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)}); + } +}; + +pub fn deinit(self: *Archive, allocator: Allocator) void { + for (self.toc.keys()) |*key| { + allocator.free(key.*); + } + for (self.toc.values()) |*value| { + value.deinit(allocator); + } + self.toc.deinit(allocator); + allocator.free(self.name); +} + +pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { + const magic = try reader.readBytesNoEof(SARMAG); + if (!mem.eql(u8, &magic, ARMAG)) { + log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); + return error.NotArchive; + } + + self.header = try reader.readStruct(ar_hdr); + if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) { + log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ + ARFMAG, + self.header.ar_fmag, + }); + return error.NotArchive; + } + + const name_or_length = try self.header.nameOrLength(); + var embedded_name = try parseName(allocator, name_or_length, reader); + log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); + defer allocator.free(embedded_name); + + try self.parseTableOfContents(allocator, reader); +} + +fn parseName(allocator: Allocator, name_or_length: ar_hdr.NameOrLength, reader: anytype) ![]u8 { + var name: []u8 = undefined; + switch (name_or_length) { + .Name => |n| { + name = try allocator.dupe(u8, n); + }, + .Length => |len| { + var n = try allocator.alloc(u8, len); + defer allocator.free(n); + try reader.readNoEof(n); + const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len; + name = try allocator.dupe(u8, n[0..actual_len]); + }, + } + return name; +} + +fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !void { + const symtab_size = try reader.readIntLittle(u32); + var symtab = try allocator.alloc(u8, symtab_size); + defer allocator.free(symtab); + + reader.readNoEof(symtab) catch { + log.err("incomplete symbol table: expected symbol table of length 0x{x}", .{symtab_size}); + return error.MalformedArchive; + }; + + const strtab_size = try reader.readIntLittle(u32); + var strtab = try allocator.alloc(u8, strtab_size); + defer allocator.free(strtab); + + reader.readNoEof(strtab) catch { + log.err("incomplete symbol table: expected string table of length 0x{x}", .{strtab_size}); + return error.MalformedArchive; + }; + + var symtab_stream = std.io.fixedBufferStream(symtab); + var symtab_reader = symtab_stream.reader(); + + while (true) { + const n_strx = symtab_reader.readIntLittle(u32) catch |err| switch (err) { + error.EndOfStream => break, + else => |e| return e, + }; + const object_offset = try symtab_reader.readIntLittle(u32); + + const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + n_strx)), 0); + const owned_name = try allocator.dupe(u8, sym_name); + const res = try self.toc.getOrPut(allocator, owned_name); + defer if (res.found_existing) allocator.free(owned_name); + + if (!res.found_existing) { + res.value_ptr.* = .{}; + } + + try res.value_ptr.append(allocator, object_offset); + } +} + +pub fn parseObject( + self: Archive, + gpa: Allocator, + cpu_arch: std.Target.Cpu.Arch, + offset: u32, +) !Object { + const reader = self.file.reader(); + try reader.context.seekTo(self.fat_offset + offset); + + const object_header = try reader.readStruct(ar_hdr); + + if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG)) { + log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, object_header.ar_fmag }); + return error.MalformedArchive; + } + + const name_or_length = try object_header.nameOrLength(); + const object_name = try parseName(gpa, name_or_length, reader); + defer gpa.free(object_name); + + log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); + + const name = name: { + var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.os.realpath(self.name, &buffer); + break :name try std.fmt.allocPrint(gpa, "{s}({s})", .{ path, object_name }); + }; + + const object_name_len = switch (name_or_length) { + .Name => 0, + .Length => |len| len, + }; + const object_size = (try object_header.size()) - object_name_len; + const contents = try gpa.allocWithOptions(u8, object_size, @alignOf(u64), null); + const amt = try reader.readAll(contents); + if (amt != object_size) { + return error.Io; + } + + var object = Object{ + .name = name, + .mtime = object_header.date() catch 0, + .contents = contents, + }; + + try object.parse(gpa, cpu_arch); + + return object; +} diff --git a/src/archive/archive/zld/MachO/Atom.zig b/src/archive/archive/zld/MachO/Atom.zig new file mode 100644 index 000000000000..dbfca6a17d13 --- /dev/null +++ b/src/archive/archive/zld/MachO/Atom.zig @@ -0,0 +1,1052 @@ +const Atom = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const aarch64 = @import("../aarch64.zig"); +const assert = std.debug.assert; +const log = std.log.scoped(.atom); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; +const trace = @import("../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Arch = std.Target.Cpu.Arch; +const AtomIndex = MachO.AtomIndex; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; + +const dis_x86_64 = @import("dis_x86_64"); +const Disassembler = dis_x86_64.Disassembler; +const Instruction = dis_x86_64.Instruction; +const RegisterOrMemory = dis_x86_64.RegisterOrMemory; + +/// Each decl always gets a local symbol with the fully qualified name. +/// The vaddr and size are found here directly. +/// The file offset is found by computing the vaddr offset from the section vaddr +/// the symbol references, and adding that to the file offset of the section. +/// If this field is 0, it means the codegen size = 0 and there is no symbol or +/// offset table entry. +sym_index: u32, + +/// If this Atom references a subsection in an Object file, `nsyms_trailing` +/// tells how many symbols trailing `sym_index` fall within this Atom's address +/// range. +inner_sym_index: u32, +inner_nsyms_trailing: u32, + +/// 0 means symbol defined by the linker. +/// Otherwise, it is the index into appropriate object file (indexing from 1). +file: u32, + +/// Size and alignment of this atom +/// Unlike in Elf, we need to store the size of this symbol as part of +/// the atom since macho.nlist_64 lacks this information. +size: u64, + +/// Alignment of this atom as a power of 2. +/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. +alignment: u32, + +/// Points to the previous and next neighbours +next_index: ?AtomIndex, +prev_index: ?AtomIndex, + +pub const empty = Atom{ + .sym_index = 0, + .inner_sym_index = 0, + .inner_nsyms_trailing = 0, + .file = 0, + .size = 0, + .alignment = 0, + .prev_index = null, + .next_index = null, +}; + +pub fn getFile(self: Atom) ?u32 { + if (self.file == 0) return null; + return self.file - 1; +} + +pub inline fn getSymbolWithLoc(self: Atom) SymbolWithLoc { + return .{ + .sym_index = self.sym_index, + .file = self.file, + }; +} + +const InnerSymIterator = struct { + sym_index: u32, + count: u32, + file: u32, + + pub fn next(it: *@This()) ?SymbolWithLoc { + if (it.count == 0) return null; + const res = SymbolWithLoc{ .sym_index = it.sym_index, .file = it.file }; + it.sym_index += 1; + it.count -= 1; + return res; + } +}; + +pub fn getInnerSymbolsIterator(macho_file: *MachO, atom_index: AtomIndex) InnerSymIterator { + const atom = macho_file.getAtom(atom_index); + assert(atom.getFile() != null); + return .{ + .sym_index = atom.inner_sym_index, + .count = atom.inner_nsyms_trailing, + .file = atom.file, + }; +} + +pub fn getSectionAlias(macho_file: *MachO, atom_index: AtomIndex) ?SymbolWithLoc { + const atom = macho_file.getAtom(atom_index); + assert(atom.getFile() != null); + + const object = macho_file.objects.items[atom.getFile().?]; + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const ntotal = @as(u32, @intCast(object.symtab.len)); + var sym_index: u32 = nbase; + while (sym_index < ntotal) : (sym_index += 1) { + if (object.getAtomIndexForSymbol(sym_index)) |other_atom_index| { + if (other_atom_index == atom_index) return SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + } + } + return null; +} + +pub fn calcInnerSymbolOffset(macho_file: *MachO, atom_index: AtomIndex, sym_index: u32) u64 { + const atom = macho_file.getAtom(atom_index); + assert(atom.getFile() != null); + + if (atom.sym_index == sym_index) return 0; + + const object = macho_file.objects.items[atom.getFile().?]; + const source_sym = object.getSourceSymbol(sym_index).?; + const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym| + sym.n_value + else blk: { + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + const source_sect = object.getSourceSection(sect_id); + break :blk source_sect.addr; + }; + return source_sym.n_value - base_addr; +} + +pub fn scanAtomRelocs( + macho_file: *MachO, + atom_index: AtomIndex, + relocs: []align(1) const macho.relocation_info, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const arch = macho_file.options.target.cpu_arch.?; + const atom = macho_file.getAtom(atom_index); + assert(atom.getFile() != null); // synthetic atoms do not have relocs + + return switch (arch) { + .aarch64 => scanAtomRelocsArm64(macho_file, atom_index, relocs), + .x86_64 => scanAtomRelocsX86(macho_file, atom_index, relocs), + else => unreachable, + }; +} + +const RelocContext = struct { + base_addr: u64 = 0, + base_offset: i32 = 0, +}; + +pub fn getRelocContext(macho_file: *MachO, atom_index: AtomIndex) RelocContext { + const atom = macho_file.getAtom(atom_index); + assert(atom.getFile() != null); // synthetic atoms do not have relocs + + const object = macho_file.objects.items[atom.getFile().?]; + if (object.getSourceSymbol(atom.sym_index)) |source_sym| { + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + return .{ + .base_addr = source_sect.addr, + .base_offset = @as(i32, @intCast(source_sym.n_value - source_sect.addr)), + }; + } + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + const source_sect = object.getSourceSection(sect_id); + return .{ + .base_addr = source_sect.addr, + .base_offset = 0, + }; +} + +pub fn parseRelocTarget(macho_file: *MachO, atom_index: AtomIndex, rel: macho.relocation_info) MachO.SymbolWithLoc { + const tracy = trace(@src()); + defer tracy.end(); + + const atom = macho_file.getAtom(atom_index); + const object = &macho_file.objects.items[atom.getFile().?]; + + const sym_index = if (rel.r_extern == 0) sym_index: { + const sect_id = @as(u8, @intCast(rel.r_symbolnum - 1)); + const ctx = getRelocContext(macho_file, atom_index); + const atom_code = getAtomCode(macho_file, atom_index); + const rel_offset = @as(u32, @intCast(rel.r_address - ctx.base_offset)); + + const address_in_section = if (rel.r_pcrel == 0) blk: { + break :blk if (rel.r_length == 3) + mem.readIntLittle(u64, atom_code[rel_offset..][0..8]) + else + mem.readIntLittle(u32, atom_code[rel_offset..][0..4]); + } else blk: { + const correction: u3 = switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const target_address = @as(i64, @intCast(ctx.base_addr)) + rel.r_address + 4 + correction + addend; + break :blk @as(u64, @intCast(target_address)); + }; + + // Find containing atom + const sym_index = object.getSymbolByAddress(address_in_section, sect_id); + break :sym_index sym_index; + } else object.reverse_symtab_lookup[rel.r_symbolnum]; + + const sym_loc = MachO.SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + const sym = macho_file.getSymbol(sym_loc); + + if (sym.sect() and !sym.ext()) { + return sym_loc; + } else if (object.getGlobal(sym_index)) |global_index| { + return macho_file.globals.items[global_index]; + } else return sym_loc; +} + +pub fn getRelocTargetAtomIndex(macho_file: *MachO, target: SymbolWithLoc, is_via_got: bool) ?AtomIndex { + const tracy = trace(@src()); + defer tracy.end(); + + if (is_via_got) { + return macho_file.getGotAtomIndexForSymbol(target).?; // panic means fatal error + } + if (macho_file.getStubsAtomIndexForSymbol(target)) |stubs_atom| return stubs_atom; + if (macho_file.getTlvPtrAtomIndexForSymbol(target)) |tlv_ptr_atom| return tlv_ptr_atom; + + if (target.getFile() == null) { + const target_sym_name = macho_file.getSymbolName(target); + if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null; + if (mem.eql(u8, "___dso_handle", target_sym_name)) return null; + + unreachable; // referenced symbol not found + } + + const object = macho_file.objects.items[target.getFile().?]; + return object.getAtomIndexForSymbol(target.sym_index); +} + +fn scanAtomRelocsArm64( + macho_file: *MachO, + atom_index: AtomIndex, + relocs: []align(1) const macho.relocation_info, +) !void { + for (relocs) |rel| { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + + switch (rel_type) { + .ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue, + else => {}, + } + + if (rel.r_extern == 0) continue; + + const atom = macho_file.getAtom(atom_index); + const object = &macho_file.objects.items[atom.getFile().?]; + const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; + const sym_loc = MachO.SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + const sym = macho_file.getSymbol(sym_loc); + + if (sym.sect() and !sym.ext()) continue; + + const target = if (object.getGlobal(sym_index)) |global_index| + macho_file.globals.items[global_index] + else + sym_loc; + + switch (rel_type) { + .ARM64_RELOC_BRANCH26 => { + // TODO rewrite relocation + try addStub(macho_file, target); + }, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => { + // TODO rewrite relocation + try addGotEntry(macho_file, target); + }, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + try addTlvPtrEntry(macho_file, target); + }, + else => {}, + } + } +} + +fn scanAtomRelocsX86(macho_file: *MachO, atom_index: AtomIndex, relocs: []align(1) const macho.relocation_info) !void { + for (relocs) |rel| { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + + switch (rel_type) { + .X86_64_RELOC_SUBTRACTOR => continue, + else => {}, + } + + if (rel.r_extern == 0) continue; + + const atom = macho_file.getAtom(atom_index); + const object = &macho_file.objects.items[atom.getFile().?]; + const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; + const sym_loc = MachO.SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + const sym = macho_file.getSymbol(sym_loc); + + if (sym.sect() and !sym.ext()) continue; + + const target = if (object.getGlobal(sym_index)) |global_index| + macho_file.globals.items[global_index] + else + sym_loc; + + switch (rel_type) { + .X86_64_RELOC_BRANCH => { + // TODO rewrite relocation + try addStub(macho_file, target); + }, + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { + // TODO rewrite relocation + try addGotEntry(macho_file, target); + }, + .X86_64_RELOC_TLV => { + try addTlvPtrEntry(macho_file, target); + }, + else => {}, + } + } +} + +fn addTlvPtrEntry(macho_file: *MachO, target: MachO.SymbolWithLoc) !void { + const target_sym = macho_file.getSymbol(target); + if (!target_sym.undf()) return; + if (macho_file.tlv_ptr_table.contains(target)) return; + + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.createTlvPtrAtom(); + const tlv_ptr_index = @as(u32, @intCast(macho_file.tlv_ptr_entries.items.len)); + try macho_file.tlv_ptr_entries.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try macho_file.tlv_ptr_table.putNoClobber(gpa, target, tlv_ptr_index); +} + +pub fn addGotEntry(macho_file: *MachO, target: MachO.SymbolWithLoc) !void { + if (macho_file.got_table.contains(target)) return; + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.createGotAtom(); + const got_index = @as(u32, @intCast(macho_file.got_entries.items.len)); + try macho_file.got_entries.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try macho_file.got_table.putNoClobber(gpa, target, got_index); +} + +fn addStub(macho_file: *MachO, target: MachO.SymbolWithLoc) !void { + const target_sym = macho_file.getSymbol(target); + if (!target_sym.undf()) return; + if (macho_file.stubs_table.contains(target)) return; + + const gpa = macho_file.base.allocator; + _ = try macho_file.createStubHelperAtom(); + _ = try macho_file.createLazyPointerAtom(); + const atom_index = try macho_file.createStubAtom(); + const stubs_index = @as(u32, @intCast(macho_file.stubs.items.len)); + try macho_file.stubs.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try macho_file.stubs_table.putNoClobber(gpa, target, stubs_index); +} + +pub fn resolveRelocs( + macho_file: *MachO, + atom_index: AtomIndex, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const arch = macho_file.options.target.cpu_arch.?; + const atom = macho_file.getAtom(atom_index); + assert(atom.getFile() != null); // synthetic atoms do not have relocs + + log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{ + atom.sym_index, + macho_file.getSymbolName(atom.getSymbolWithLoc()), + }); + + const ctx = getRelocContext(macho_file, atom_index); + + return switch (arch) { + .aarch64 => resolveRelocsArm64(macho_file, atom_index, atom_code, atom_relocs, ctx), + .x86_64 => resolveRelocsX86(macho_file, atom_index, atom_code, atom_relocs, ctx), + else => unreachable, + }; +} + +pub fn getRelocTargetAddress( + macho_file: *MachO, + target: SymbolWithLoc, + is_via_got: bool, + is_tlv: bool, +) !u64 { + const target_atom_index = getRelocTargetAtomIndex(macho_file, target, is_via_got) orelse { + // If there is no atom for target, we still need to check for special, atom-less + // symbols such as `___dso_handle`. + const target_name = macho_file.getSymbolName(target); + const atomless_sym = macho_file.getSymbol(target); + log.debug(" | atomless target '{s}'", .{target_name}); + return atomless_sym.n_value; + }; + const target_atom = macho_file.getAtom(target_atom_index); + log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{ + target_atom.sym_index, + macho_file.getSymbolName(target_atom.getSymbolWithLoc()), + target_atom.getFile(), + }); + const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc()); + assert(target_sym.n_desc != MachO.N_DEAD); + + // If `target` is contained within the target atom, pull its address value. + const offset = if (target_atom.getFile() != null) blk: { + const object = macho_file.objects.items[target_atom.getFile().?]; + break :blk if (object.getSourceSymbol(target.sym_index)) |_| + Atom.calcInnerSymbolOffset(macho_file, target_atom_index, target.sym_index) + else + 0; // section alias + } else 0; + const base_address: u64 = if (is_tlv) base_address: { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const sect_id: u16 = sect_id: { + if (macho_file.getSectionByName("__DATA", "__thread_data")) |i| { + break :sect_id i; + } else if (macho_file.getSectionByName("__DATA", "__thread_bss")) |i| { + break :sect_id i; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :base_address macho_file.sections.items(.header)[sect_id].addr; + } else 0; + return target_sym.n_value + offset - base_address; +} + +fn resolveRelocsArm64( + macho_file: *MachO, + atom_index: AtomIndex, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, + context: RelocContext, +) !void { + const atom = macho_file.getAtom(atom_index); + const object = macho_file.objects.items[atom.getFile().?]; + + var addend: ?i64 = null; + var subtractor: ?SymbolWithLoc = null; + + for (atom_relocs) |rel| { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + + switch (rel_type) { + .ARM64_RELOC_ADDEND => { + assert(addend == null); + + log.debug(" RELA({s}) @ {x} => {x}", .{ @tagName(rel_type), rel.r_address, rel.r_symbolnum }); + + addend = rel.r_symbolnum; + continue; + }, + .ARM64_RELOC_SUBTRACTOR => { + assert(subtractor == null); + + log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ + @tagName(rel_type), + rel.r_address, + rel.r_symbolnum, + atom.getFile(), + }); + + subtractor = parseRelocTarget(macho_file, atom_index, rel); + continue; + }, + else => {}, + } + + const target = parseRelocTarget(macho_file, atom_index, rel); + const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); + + log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?d})", .{ + @tagName(rel_type), + rel.r_address, + target.sym_index, + macho_file.getSymbolName(target), + target.getFile(), + }); + + const source_addr = blk: { + const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + break :blk source_sym.n_value + rel_offset; + }; + const is_via_got = relocRequiresGot(macho_file, rel); + const is_tlv = is_tlv: { + const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; + }; + const target_addr = try getRelocTargetAddress(macho_file, target, is_via_got, is_tlv); + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + + switch (rel_type) { + .ARM64_RELOC_BRANCH26 => { + const actual_target = if (macho_file.getStubsAtomIndexForSymbol(target)) |stub_atom_index| inner: { + const stub_atom = macho_file.getAtom(stub_atom_index); + break :inner stub_atom.getSymbolWithLoc(); + } else target; + log.debug(" source {s} (object({?})), target {s} (object({?}))", .{ + macho_file.getSymbolName(atom.getSymbolWithLoc()), + atom.getFile(), + macho_file.getSymbolName(target), + macho_file.getAtom(getRelocTargetAtomIndex(macho_file, target, false).?).getFile(), + }); + + const displacement = if (calcPcRelativeDisplacementArm64( + source_addr, + macho_file.getSymbol(actual_target).n_value, + )) |disp| blk: { + log.debug(" | target_addr = 0x{x}", .{macho_file.getSymbol(actual_target).n_value}); + break :blk disp; + } else |_| blk: { + const thunk_index = macho_file.thunk_table.get(atom_index).?; + const thunk = macho_file.thunks.items[thunk_index]; + const thunk_sym = macho_file.getSymbol(thunk.getTrampolineForSymbol( + macho_file, + actual_target, + ).?); + log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_sym.n_value}); + break :blk try calcPcRelativeDisplacementArm64(source_addr, thunk_sym.n_value); + }; + + const code = atom_code[rel_offset..][0..4]; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2)))); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const pages = @as(u21, @bitCast(calcNumberOfPages(source_addr, adjusted_target_addr))); + const code = atom_code[rel_offset..][0..4]; + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); + inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_PAGEOFF12 => { + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const code = atom_code[rel_offset..][0..4]; + if (isArithmeticOp(code)) { + const off = try calcPageOffset(adjusted_target_addr, .arithmetic); + var inst = aarch64.Instruction{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + inst.add_subtract_immediate.imm12 = off; + mem.writeIntLittle(u32, code, inst.toU32()); + } else { + var inst = aarch64.Instruction{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const off = try calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) { + 0 => if (inst.load_store_register.v == 1) PageOffsetInstKind.load_store_128 else PageOffsetInstKind.load_store_8, + 1 => .load_store_16, + 2 => .load_store_32, + 3 => .load_store_64, + }); + inst.load_store_register.offset = off; + mem.writeIntLittle(u32, code, inst.toU32()); + } + addend = null; + }, + + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { + const code = atom_code[rel_offset..][0..4]; + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const off = try calcPageOffset(adjusted_target_addr, .load_store_64); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + inst.load_store_register.offset = off; + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { + const code = atom_code[rel_offset..][0..4]; + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const RegInfo = struct { + rd: u5, + rn: u5, + size: u2, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = inst.size, + }; + } + }; + + var inst = if (macho_file.tlv_ptr_table.contains(target)) aarch64.Instruction{ + .load_store_register = .{ + .rt = reg_info.rd, + .rn = reg_info.rn, + .offset = try calcPageOffset(adjusted_target_addr, .load_store_64), + .opc = 0b01, + .op1 = 0b01, + .v = 0, + .size = reg_info.size, + }, + } else aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = try calcPageOffset(adjusted_target_addr, .arithmetic), + .sh = 0, + .s = 0, + .op = 0, + .sf = @as(u1, @truncate(reg_info.size)), + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_POINTER_TO_GOT => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); + const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse + return error.Overflow; + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @bitCast(result))); + }, + + .ARM64_RELOC_UNSIGNED => { + var ptr_addend = if (rel.r_length == 3) + mem.readIntLittle(i64, atom_code[rel_offset..][0..8]) + else + mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + + if (rel.r_extern == 0) { + const base_addr = if (target.sym_index > object.source_address_lookup.len) + @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) + else + object.source_address_lookup[target.sym_index]; + ptr_addend -= base_addr; + } + + const result = blk: { + if (subtractor) |sub| { + const sym = macho_file.getSymbol(sub); + break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + ptr_addend; + } else { + break :blk @as(i64, @intCast(target_addr)) + ptr_addend; + } + }; + log.debug(" | target_addr = 0x{x}", .{result}); + + if (rel.r_length == 3) { + mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result))); + } else { + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result))))); + } + + subtractor = null; + }, + + .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_SUBTRACTOR => unreachable, + } + } +} + +fn resolveRelocsX86( + macho_file: *MachO, + atom_index: AtomIndex, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, + context: RelocContext, +) !void { + const atom = macho_file.getAtom(atom_index); + const object = macho_file.objects.items[atom.getFile().?]; + + var subtractor: ?SymbolWithLoc = null; + + for (atom_relocs) |rel| { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + + switch (rel_type) { + .X86_64_RELOC_SUBTRACTOR => { + assert(subtractor == null); + + log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ + @tagName(rel_type), + rel.r_address, + rel.r_symbolnum, + atom.getFile(), + }); + + subtractor = parseRelocTarget(macho_file, atom_index, rel); + continue; + }, + else => {}, + } + + const target = parseRelocTarget(macho_file, atom_index, rel); + const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); + + log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ + @tagName(rel_type), + rel.r_address, + target.sym_index, + target.getFile(), + }); + + const source_addr = blk: { + const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + break :blk source_sym.n_value + rel_offset; + }; + const is_via_got = relocRequiresGot(macho_file, rel); + const is_tlv = is_tlv: { + const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + + const target_addr = try getRelocTargetAddress(macho_file, target, is_via_got, is_tlv); + + switch (rel_type) { + .X86_64_RELOC_BRANCH => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_TLV => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + + if (macho_file.tlv_ptr_table.get(target) == null) { + // We need to rewrite the opcode from movq to leaq. + var disassembler = Disassembler.init(atom_code[rel_offset - 3 ..]); + const inst = (try disassembler.next()) orelse unreachable; + assert(inst.enc == .rm); + assert(inst.tag == .mov); + const rm = inst.data.rm; + const dst = rm.reg; + const src = rm.reg_or_mem.mem; + + var stream = std.io.fixedBufferStream(atom_code[rel_offset - 3 ..][0..7]); + const writer = stream.writer(); + + const new_inst = Instruction{ + .tag = .lea, + .enc = .rm, + .data = Instruction.Data.rm(dst, RegisterOrMemory.mem(.{ + .ptr_size = src.ptr_size, + .scale_index = src.scale_index, + .base = src.base, + .disp = disp, + })), + }; + try new_inst.encode(writer); + } else { + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + } + }, + + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + const correction: u3 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + var addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + const base_addr = if (target.sym_index > object.source_address_lookup.len) + @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) + else + object.source_address_lookup[target.sym_index]; + addend += @as(i32, @intCast(@as(i64, @intCast(context.base_addr)) + rel.r_address + 4 - + @as(i64, @intCast(base_addr)))); + } + + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_UNSIGNED => { + var addend = if (rel.r_length == 3) + mem.readIntLittle(i64, atom_code[rel_offset..][0..8]) + else + mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + + if (rel.r_extern == 0) { + const base_addr = if (target.sym_index > object.source_address_lookup.len) + @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) + else + object.source_address_lookup[target.sym_index]; + addend -= base_addr; + } + + const result = blk: { + if (subtractor) |sub| { + const sym = macho_file.getSymbol(sub); + break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + addend; + } else { + break :blk @as(i64, @intCast(target_addr)) + addend; + } + }; + log.debug(" | target_addr = 0x{x}", .{result}); + + if (rel.r_length == 3) { + mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result))); + } else { + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result))))); + } + + subtractor = null; + }, + + .X86_64_RELOC_SUBTRACTOR => unreachable, + } + } +} + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @as(u5, @truncate(inst[3])); + return ((group_decode >> 2) == 4); +} + +pub fn getAtomCode(macho_file: *MachO, atom_index: AtomIndex) []const u8 { + const atom = macho_file.getAtom(atom_index); + assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code. + const object = macho_file.objects.items[atom.getFile().?]; + const source_sym = object.getSourceSymbol(atom.sym_index) orelse { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + const source_sect = object.getSourceSection(sect_id); + assert(!source_sect.isZerofill()); + const code = object.getSectionContents(source_sect); + return code[0..atom.size]; + }; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + assert(!source_sect.isZerofill()); + const offset = source_sym.n_value - source_sect.addr; + const code = object.getSectionContents(source_sect); + return code[offset..][0..atom.size]; +} + +pub fn getAtomRelocs(macho_file: *MachO, atom_index: AtomIndex) []const macho.relocation_info { + const atom = macho_file.getAtom(atom_index); + assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. + const object = macho_file.objects.items[atom.getFile().?]; + const cache = object.relocs_lookup[atom.sym_index]; + + const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + break :blk source_sym.n_sect - 1; + } else blk: { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + break :blk sect_id; + }; + const source_sect = object.getSourceSection(source_sect_id); + assert(!source_sect.isZerofill()); + const relocs = object.getRelocs(source_sect_id); + return relocs[cache.start..][0..cache.len]; +} + +pub fn calcPcRelativeDisplacementX86(source_addr: u64, target_addr: u64, correction: u3) error{Overflow}!i32 { + const disp = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr + 4 + correction)); + return math.cast(i32, disp) orelse error.Overflow; +} + +pub fn calcPcRelativeDisplacementArm64(source_addr: u64, target_addr: u64) error{Overflow}!i28 { + const disp = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); + return math.cast(i28, disp) orelse error.Overflow; +} + +pub fn calcNumberOfPages(source_addr: u64, target_addr: u64) i21 { + const source_page = @as(i32, @intCast(source_addr >> 12)); + const target_page = @as(i32, @intCast(target_addr >> 12)); + const pages = @as(i21, @intCast(target_page - source_page)); + return pages; +} + +const PageOffsetInstKind = enum { + arithmetic, + load_store_8, + load_store_16, + load_store_32, + load_store_64, + load_store_128, +}; + +pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 { + const narrowed = @as(u12, @truncate(target_addr)); + return switch (kind) { + .arithmetic, .load_store_8 => narrowed, + .load_store_16 => try math.divExact(u12, narrowed, 2), + .load_store_32 => try math.divExact(u12, narrowed, 4), + .load_store_64 => try math.divExact(u12, narrowed, 8), + .load_store_128 => try math.divExact(u12, narrowed, 16), + }; +} + +pub fn relocRequiresGot(macho_file: *MachO, rel: macho.relocation_info) bool { + switch (macho_file.options.target.cpu_arch.?) { + .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => return true, + else => return false, + }, + .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + => return true, + else => return false, + }, + else => unreachable, + } +} diff --git a/src/archive/archive/zld/MachO/CodeSignature.zig b/src/archive/archive/zld/MachO/CodeSignature.zig new file mode 100644 index 000000000000..b274f4dc1aba --- /dev/null +++ b/src/archive/archive/zld/MachO/CodeSignature.zig @@ -0,0 +1,445 @@ +const CodeSignature = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const Allocator = mem.Allocator; +const MachO = @import("../MachO.zig"); +const Sha256 = std.crypto.hash.sha2.Sha256; +const ThreadPool = @import("../ThreadPool.zig"); +const WaitGroup = @import("../WaitGroup.zig"); +const Zld = @import("../Zld.zig"); + +const hash_size = Sha256.digest_length; + +const Blob = union(enum) { + code_directory: *CodeDirectory, + requirements: *Requirements, + entitlements: *Entitlements, + signature: *Signature, + + fn slotType(self: Blob) u32 { + return switch (self) { + .code_directory => |x| x.slotType(), + .requirements => |x| x.slotType(), + .entitlements => |x| x.slotType(), + .signature => |x| x.slotType(), + }; + } + + fn size(self: Blob) u32 { + return switch (self) { + .code_directory => |x| x.size(), + .requirements => |x| x.size(), + .entitlements => |x| x.size(), + .signature => |x| x.size(), + }; + } + + fn write(self: Blob, writer: anytype) !void { + return switch (self) { + .code_directory => |x| x.write(writer), + .requirements => |x| x.write(writer), + .entitlements => |x| x.write(writer), + .signature => |x| x.write(writer), + }; + } +}; + +const CodeDirectory = struct { + inner: macho.CodeDirectory, + ident: []const u8, + special_slots: [n_special_slots][hash_size]u8, + code_slots: std.ArrayListUnmanaged([hash_size]u8) = .{}, + + const n_special_slots: usize = 7; + + fn init(page_size: u16) CodeDirectory { + var cdir: CodeDirectory = .{ + .inner = .{ + .magic = macho.CSMAGIC_CODEDIRECTORY, + .length = @sizeOf(macho.CodeDirectory), + .version = macho.CS_SUPPORTSEXECSEG, + .flags = macho.CS_ADHOC | macho.CS_LINKER_SIGNED, + .hashOffset = 0, + .identOffset = @sizeOf(macho.CodeDirectory), + .nSpecialSlots = 0, + .nCodeSlots = 0, + .codeLimit = 0, + .hashSize = hash_size, + .hashType = macho.CS_HASHTYPE_SHA256, + .platform = 0, + .pageSize = @as(u8, @truncate(std.math.log2(page_size))), + .spare2 = 0, + .scatterOffset = 0, + .teamOffset = 0, + .spare3 = 0, + .codeLimit64 = 0, + .execSegBase = 0, + .execSegLimit = 0, + .execSegFlags = 0, + }, + .ident = undefined, + .special_slots = undefined, + }; + comptime var i = 0; + inline while (i < n_special_slots) : (i += 1) { + cdir.special_slots[i] = [_]u8{0} ** hash_size; + } + return cdir; + } + + fn deinit(self: *CodeDirectory, allocator: Allocator) void { + self.code_slots.deinit(allocator); + } + + fn addSpecialHash(self: *CodeDirectory, index: u32, hash: [hash_size]u8) void { + assert(index > 0); + self.inner.nSpecialSlots = std.math.max(self.inner.nSpecialSlots, index); + mem.copy(u8, &self.special_slots[index - 1], &hash); + } + + fn slotType(self: CodeDirectory) u32 { + _ = self; + return macho.CSSLOT_CODEDIRECTORY; + } + + fn size(self: CodeDirectory) u32 { + const code_slots = self.inner.nCodeSlots * hash_size; + const special_slots = self.inner.nSpecialSlots * hash_size; + return @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.ident.len + 1 + special_slots + code_slots)); + } + + fn write(self: CodeDirectory, writer: anytype) !void { + try writer.writeIntBig(u32, self.inner.magic); + try writer.writeIntBig(u32, self.inner.length); + try writer.writeIntBig(u32, self.inner.version); + try writer.writeIntBig(u32, self.inner.flags); + try writer.writeIntBig(u32, self.inner.hashOffset); + try writer.writeIntBig(u32, self.inner.identOffset); + try writer.writeIntBig(u32, self.inner.nSpecialSlots); + try writer.writeIntBig(u32, self.inner.nCodeSlots); + try writer.writeIntBig(u32, self.inner.codeLimit); + try writer.writeByte(self.inner.hashSize); + try writer.writeByte(self.inner.hashType); + try writer.writeByte(self.inner.platform); + try writer.writeByte(self.inner.pageSize); + try writer.writeIntBig(u32, self.inner.spare2); + try writer.writeIntBig(u32, self.inner.scatterOffset); + try writer.writeIntBig(u32, self.inner.teamOffset); + try writer.writeIntBig(u32, self.inner.spare3); + try writer.writeIntBig(u64, self.inner.codeLimit64); + try writer.writeIntBig(u64, self.inner.execSegBase); + try writer.writeIntBig(u64, self.inner.execSegLimit); + try writer.writeIntBig(u64, self.inner.execSegFlags); + + try writer.writeAll(self.ident); + try writer.writeByte(0); + + var i: isize = @as(isize, @intCast(self.inner.nSpecialSlots)); + while (i > 0) : (i -= 1) { + try writer.writeAll(&self.special_slots[@as(usize, @intCast(i - 1))]); + } + + for (self.code_slots.items) |slot| { + try writer.writeAll(&slot); + } + } +}; + +const Requirements = struct { + fn deinit(self: *Requirements, allocator: Allocator) void { + _ = self; + _ = allocator; + } + + fn slotType(self: Requirements) u32 { + _ = self; + return macho.CSSLOT_REQUIREMENTS; + } + + fn size(self: Requirements) u32 { + _ = self; + return 3 * @sizeOf(u32); + } + + fn write(self: Requirements, writer: anytype) !void { + try writer.writeIntBig(u32, macho.CSMAGIC_REQUIREMENTS); + try writer.writeIntBig(u32, self.size()); + try writer.writeIntBig(u32, 0); + } +}; + +const Entitlements = struct { + inner: []const u8, + + fn deinit(self: *Entitlements, allocator: Allocator) void { + allocator.free(self.inner); + } + + fn slotType(self: Entitlements) u32 { + _ = self; + return macho.CSSLOT_ENTITLEMENTS; + } + + fn size(self: Entitlements) u32 { + return @as(u32, @intCast(self.inner.len)) + 2 * @sizeOf(u32); + } + + fn write(self: Entitlements, writer: anytype) !void { + try writer.writeIntBig(u32, macho.CSMAGIC_EMBEDDED_ENTITLEMENTS); + try writer.writeIntBig(u32, self.size()); + try writer.writeAll(self.inner); + } +}; + +const Signature = struct { + fn deinit(self: *Signature, allocator: Allocator) void { + _ = self; + _ = allocator; + } + + fn slotType(self: Signature) u32 { + _ = self; + return macho.CSSLOT_SIGNATURESLOT; + } + + fn size(self: Signature) u32 { + _ = self; + return 2 * @sizeOf(u32); + } + + fn write(self: Signature, writer: anytype) !void { + try writer.writeIntBig(u32, macho.CSMAGIC_BLOBWRAPPER); + try writer.writeIntBig(u32, self.size()); + } +}; + +page_size: u16, +code_directory: CodeDirectory, +requirements: ?Requirements = null, +entitlements: ?Entitlements = null, +signature: ?Signature = null, + +pub fn init(page_size: u16) CodeSignature { + return .{ + .page_size = page_size, + .code_directory = CodeDirectory.init(page_size), + }; +} + +pub fn deinit(self: *CodeSignature, allocator: Allocator) void { + self.code_directory.deinit(allocator); + if (self.requirements) |*req| { + req.deinit(allocator); + } + if (self.entitlements) |*ents| { + ents.deinit(allocator); + } + if (self.signature) |*sig| { + sig.deinit(allocator); + } +} + +pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void { + const file = try fs.cwd().openFile(path, .{}); + defer file.close(); + const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); + self.entitlements = .{ .inner = inner }; +} + +pub const WriteOpts = struct { + file: fs.File, + exec_seg_base: u64, + exec_seg_limit: u64, + file_size: u32, + output_mode: Zld.OutputMode, +}; + +pub fn writeAdhocSignature( + self: *CodeSignature, + macho_file: *MachO, + opts: WriteOpts, + writer: anytype, +) !void { + const allocator = macho_file.base.allocator; + + var header: macho.SuperBlob = .{ + .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, + .length = @sizeOf(macho.SuperBlob), + .count = 0, + }; + + var blobs = std.ArrayList(Blob).init(allocator); + defer blobs.deinit(); + + self.code_directory.inner.execSegBase = opts.exec_seg_base; + self.code_directory.inner.execSegLimit = opts.exec_seg_limit; + self.code_directory.inner.execSegFlags = if (opts.output_mode == .exe) macho.CS_EXECSEG_MAIN_BINARY else 0; + self.code_directory.inner.codeLimit = opts.file_size; + + const total_pages = @as(u32, @intCast(mem.alignForward(opts.file_size, self.page_size) / self.page_size)); + + try self.code_directory.code_slots.ensureTotalCapacityPrecise(allocator, total_pages); + self.code_directory.code_slots.items.len = total_pages; + self.code_directory.inner.nCodeSlots = total_pages; + + // Calculate hash for each page (in file) and write it to the buffer + try self.parallelHash(allocator, macho_file.base.thread_pool, opts.file, opts.file_size); + + try blobs.append(.{ .code_directory = &self.code_directory }); + header.length += @sizeOf(macho.BlobIndex); + header.count += 1; + + var hash: [hash_size]u8 = undefined; + + if (self.requirements) |*req| { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + try req.write(buf.writer()); + Sha256.hash(buf.items, &hash, .{}); + self.code_directory.addSpecialHash(req.slotType(), hash); + + try blobs.append(.{ .requirements = req }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + req.size(); + } + + if (self.entitlements) |*ents| { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + try ents.write(buf.writer()); + Sha256.hash(buf.items, &hash, .{}); + self.code_directory.addSpecialHash(ents.slotType(), hash); + + try blobs.append(.{ .entitlements = ents }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + ents.size(); + } + + if (self.signature) |*sig| { + try blobs.append(.{ .signature = sig }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + sig.size(); + } + + self.code_directory.inner.hashOffset = + @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size)); + self.code_directory.inner.length = self.code_directory.size(); + header.length += self.code_directory.size(); + + try writer.writeIntBig(u32, header.magic); + try writer.writeIntBig(u32, header.length); + try writer.writeIntBig(u32, header.count); + + var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len)); + for (blobs.items) |blob| { + try writer.writeIntBig(u32, blob.slotType()); + try writer.writeIntBig(u32, offset); + offset += blob.size(); + } + + for (blobs.items) |blob| { + try blob.write(writer); + } +} + +fn parallelHash( + self: *CodeSignature, + gpa: Allocator, + pool: *ThreadPool, + file: fs.File, + file_size: u32, +) !void { + var wg: WaitGroup = .{}; + + const total_num_chunks = mem.alignForward(file_size, self.page_size) / self.page_size; + assert(self.code_directory.code_slots.items.len >= total_num_chunks); + + const buffer = try gpa.alloc(u8, self.page_size * total_num_chunks); + defer gpa.free(buffer); + + const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks); + defer gpa.free(results); + + { + wg.reset(); + defer wg.wait(); + + var i: usize = 0; + while (i < total_num_chunks) : (i += 1) { + const fstart = i * self.page_size; + const fsize = if (fstart + self.page_size > file_size) + file_size - fstart + else + self.page_size; + wg.start(); + try pool.spawn(worker, .{ + file, + fstart, + buffer[fstart..][0..fsize], + &self.code_directory.code_slots.items[i], + &results[i], + &wg, + }); + } + } + for (results) |result| _ = try result; +} + +fn worker( + file: fs.File, + fstart: usize, + buffer: []u8, + out: *[hash_size]u8, + err: *fs.File.PReadError!usize, + wg: *WaitGroup, +) void { + defer wg.finish(); + err.* = file.preadAll(buffer, fstart); + Sha256.hash(buffer, out, .{}); +} + +pub fn size(self: CodeSignature) u32 { + var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); + if (self.requirements) |req| { + ssize += @sizeOf(macho.BlobIndex) + req.size(); + } + if (self.entitlements) |ent| { + ssize += @sizeOf(macho.BlobIndex) + ent.size(); + } + if (self.signature) |sig| { + ssize += @sizeOf(macho.BlobIndex) + sig.size(); + } + return ssize; +} + +pub fn estimateSize(self: CodeSignature, file_size: u64) u32 { + var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); + // Approx code slots + const total_pages = mem.alignForwardGeneric(u64, file_size, self.page_size) / self.page_size; + ssize += total_pages * hash_size; + var n_special_slots: u32 = 0; + if (self.requirements) |req| { + ssize += @sizeOf(macho.BlobIndex) + req.size(); + n_special_slots = std.math.max(n_special_slots, req.slotType()); + } + if (self.entitlements) |ent| { + ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size; + n_special_slots = std.math.max(n_special_slots, ent.slotType()); + } + if (self.signature) |sig| { + ssize += @sizeOf(macho.BlobIndex) + sig.size(); + } + ssize += n_special_slots * hash_size; + return @as(u32, @intCast(mem.alignForwardGeneric(u64, ssize, @sizeOf(u64)))); +} + +pub fn clear(self: *CodeSignature, allocator: Allocator) void { + self.code_directory.deinit(allocator); + self.code_directory = CodeDirectory.init(self.page_size); +} diff --git a/src/archive/archive/zld/MachO/DwarfInfo.zig b/src/archive/archive/zld/MachO/DwarfInfo.zig new file mode 100644 index 000000000000..da77e3c1c7cc --- /dev/null +++ b/src/archive/archive/zld/MachO/DwarfInfo.zig @@ -0,0 +1,512 @@ +const DwarfInfo = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const leb = std.leb; +const log = std.log.scoped(.macho); +const mem = std.mem; + +const Allocator = mem.Allocator; +pub const AbbrevLookupTable = std.AutoHashMap(u64, struct { pos: usize, len: usize }); +pub const SubprogramLookupByName = std.StringHashMap(struct { addr: u64, size: u64 }); + +debug_info: []const u8, +debug_abbrev: []const u8, +debug_str: []const u8, + +pub fn getCompileUnitIterator(self: DwarfInfo) CompileUnitIterator { + return .{ .ctx = self }; +} + +const CompileUnitIterator = struct { + ctx: DwarfInfo, + pos: usize = 0, + + pub fn next(self: *CompileUnitIterator) !?CompileUnit { + if (self.pos >= self.ctx.debug_info.len) return null; + + var stream = std.io.fixedBufferStream(self.ctx.debug_info); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const cuh = try CompileUnit.Header.read(reader); + const total_length = cuh.length + @as(u64, if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32)); + + const cu = CompileUnit{ + .cuh = cuh, + .debug_info_off = creader.bytes_read, + }; + + self.pos += total_length; + + return cu; + } +}; + +pub fn genSubprogramLookupByName( + self: DwarfInfo, + compile_unit: CompileUnit, + abbrev_lookup: AbbrevLookupTable, + lookup: *SubprogramLookupByName, +) !void { + var abbrev_it = compile_unit.getAbbrevEntryIterator(self); + while (try abbrev_it.next(abbrev_lookup)) |entry| switch (entry.tag) { + dwarf.TAG.subprogram => { + var attr_it = entry.getAttributeIterator(self, compile_unit.cuh); + + var name: ?[]const u8 = null; + var low_pc: ?u64 = null; + var high_pc: ?u64 = null; + + while (try attr_it.next()) |attr| switch (attr.name) { + dwarf.AT.name => if (attr.getString(self, compile_unit.cuh)) |str| { + name = str; + }, + dwarf.AT.low_pc => { + if (attr.getAddr(self, compile_unit.cuh)) |addr| { + low_pc = addr; + } + if (try attr.getConstant(self)) |constant| { + low_pc = @as(u64, @intCast(constant)); + } + }, + dwarf.AT.high_pc => { + if (attr.getAddr(self, compile_unit.cuh)) |addr| { + high_pc = addr; + } + if (try attr.getConstant(self)) |constant| { + high_pc = @as(u64, @intCast(constant)); + } + }, + else => {}, + }; + + if (name == null or low_pc == null or high_pc == null) continue; + + try lookup.putNoClobber(name.?, .{ .addr = low_pc.?, .size = high_pc.? }); + }, + else => {}, + }; +} + +pub fn genAbbrevLookupByKind(self: DwarfInfo, off: usize, lookup: *AbbrevLookupTable) !void { + const data = self.debug_abbrev[off..]; + var stream = std.io.fixedBufferStream(data); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + while (true) { + const kind = try leb.readULEB128(u64, reader); + + if (kind == 0) break; + + const pos = creader.bytes_read; + _ = try leb.readULEB128(u64, reader); // TAG + _ = try reader.readByte(); // CHILDREN + + while (true) { + const name = try leb.readULEB128(u64, reader); + const form = try leb.readULEB128(u64, reader); + + if (name == 0 and form == 0) break; + } + + try lookup.putNoClobber(kind, .{ + .pos = pos, + .len = creader.bytes_read - pos - 2, + }); + } +} + +pub const CompileUnit = struct { + cuh: Header, + debug_info_off: usize, + + pub const Header = struct { + is_64bit: bool, + length: u64, + version: u16, + debug_abbrev_offset: u64, + address_size: u8, + + fn read(reader: anytype) !Header { + var length: u64 = try reader.readIntLittle(u32); + + const is_64bit = length == 0xffffffff; + if (is_64bit) { + length = try reader.readIntLittle(u64); + } + + const version = try reader.readIntLittle(u16); + const debug_abbrev_offset = if (is_64bit) + try reader.readIntLittle(u64) + else + try reader.readIntLittle(u32); + const address_size = try reader.readIntLittle(u8); + + return Header{ + .is_64bit = is_64bit, + .length = length, + .version = version, + .debug_abbrev_offset = debug_abbrev_offset, + .address_size = address_size, + }; + } + }; + + inline fn getDebugInfo(self: CompileUnit, ctx: DwarfInfo) []const u8 { + return ctx.debug_info[self.debug_info_off..][0..self.cuh.length]; + } + + pub fn getAbbrevEntryIterator(self: CompileUnit, ctx: DwarfInfo) AbbrevEntryIterator { + return .{ .cu = self, .ctx = ctx }; + } +}; + +const AbbrevEntryIterator = struct { + cu: CompileUnit, + ctx: DwarfInfo, + pos: usize = 0, + + pub fn next(self: *AbbrevEntryIterator, lookup: AbbrevLookupTable) !?AbbrevEntry { + if (self.pos + self.cu.debug_info_off >= self.ctx.debug_info.len) return null; + + const debug_info = self.ctx.debug_info[self.pos + self.cu.debug_info_off ..]; + var stream = std.io.fixedBufferStream(debug_info); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const kind = try leb.readULEB128(u64, reader); + self.pos += creader.bytes_read; + + if (kind == 0) { + return AbbrevEntry.null(); + } + + const abbrev_pos = lookup.get(kind) orelse return error.MalformedDwarf; + const len = try findAbbrevEntrySize( + self.ctx, + abbrev_pos.pos, + abbrev_pos.len, + self.pos + self.cu.debug_info_off, + self.cu.cuh, + ); + const entry = try getAbbrevEntry( + self.ctx, + abbrev_pos.pos, + abbrev_pos.len, + self.pos + self.cu.debug_info_off, + len, + ); + + self.pos += len; + + return entry; + } +}; + +pub const AbbrevEntry = struct { + tag: u64, + children: u8, + debug_abbrev_off: usize, + debug_abbrev_len: usize, + debug_info_off: usize, + debug_info_len: usize, + + fn @"null"() AbbrevEntry { + return .{ + .tag = 0, + .children = dwarf.CHILDREN.no, + .debug_abbrev_off = 0, + .debug_abbrev_len = 0, + .debug_info_off = 0, + .debug_info_len = 0, + }; + } + + pub fn hasChildren(self: AbbrevEntry) bool { + return self.children == dwarf.CHILDREN.yes; + } + + inline fn getDebugInfo(self: AbbrevEntry, ctx: DwarfInfo) []const u8 { + return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len]; + } + + inline fn getDebugAbbrev(self: AbbrevEntry, ctx: DwarfInfo) []const u8 { + return ctx.debug_abbrev[self.debug_abbrev_off..][0..self.debug_abbrev_len]; + } + + pub fn getAttributeIterator(self: AbbrevEntry, ctx: DwarfInfo, cuh: CompileUnit.Header) AttributeIterator { + return .{ .entry = self, .ctx = ctx, .cuh = cuh }; + } +}; + +pub const Attribute = struct { + name: u64, + form: u64, + debug_info_off: usize, + debug_info_len: usize, + + inline fn getDebugInfo(self: Attribute, ctx: DwarfInfo) []const u8 { + return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len]; + } + + pub fn getString(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?[]const u8 { + const debug_info = self.getDebugInfo(ctx); + + switch (self.form) { + dwarf.FORM.string => { + return mem.sliceTo(@as([*:0]const u8, @ptrCast(debug_info.ptr)), 0); + }, + dwarf.FORM.strp => { + const off = if (cuh.is_64bit) + mem.readIntLittle(u64, debug_info[0..8]) + else + mem.readIntLittle(u32, debug_info[0..4]); + return ctx.getString(off); + }, + else => return null, + } + } + + pub fn getConstant(self: Attribute, ctx: DwarfInfo) !?i128 { + const debug_info = self.getDebugInfo(ctx); + var stream = std.io.fixedBufferStream(debug_info); + const reader = stream.reader(); + + return switch (self.form) { + dwarf.FORM.data1 => debug_info[0], + dwarf.FORM.data2 => mem.readIntLittle(u16, debug_info[0..2]), + dwarf.FORM.data4 => mem.readIntLittle(u32, debug_info[0..4]), + dwarf.FORM.data8 => mem.readIntLittle(u64, debug_info[0..8]), + dwarf.FORM.udata => try leb.readULEB128(u64, reader), + dwarf.FORM.sdata => try leb.readILEB128(i64, reader), + else => null, + }; + } + + pub fn getReference(self: Attribute, ctx: DwarfInfo) !?u64 { + const debug_info = self.getDebugInfo(ctx); + var stream = std.io.fixedBufferStream(debug_info); + const reader = stream.reader(); + + return switch (self.form) { + dwarf.FORM.ref1 => debug_info[0], + dwarf.FORM.ref2 => mem.readIntLittle(u16, debug_info[0..2]), + dwarf.FORM.ref4 => mem.readIntLittle(u32, debug_info[0..4]), + dwarf.FORM.ref8 => mem.readIntLittle(u64, debug_info[0..8]), + dwarf.FORM.ref_udata => try leb.readULEB128(u64, reader), + else => null, + }; + } + + pub fn getAddr(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?u64 { + if (self.form != dwarf.FORM.addr) return null; + const debug_info = self.getDebugInfo(ctx); + return switch (cuh.address_size) { + 1 => debug_info[0], + 2 => mem.readIntLittle(u16, debug_info[0..2]), + 4 => mem.readIntLittle(u32, debug_info[0..4]), + 8 => mem.readIntLittle(u64, debug_info[0..8]), + else => unreachable, + }; + } +}; + +const AttributeIterator = struct { + entry: AbbrevEntry, + ctx: DwarfInfo, + cuh: CompileUnit.Header, + debug_abbrev_pos: usize = 0, + debug_info_pos: usize = 0, + + pub fn next(self: *AttributeIterator) !?Attribute { + const debug_abbrev = self.entry.getDebugAbbrev(self.ctx); + if (self.debug_abbrev_pos >= debug_abbrev.len) return null; + + var stream = std.io.fixedBufferStream(debug_abbrev[self.debug_abbrev_pos..]); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const name = try leb.readULEB128(u64, reader); + const form = try leb.readULEB128(u64, reader); + + self.debug_abbrev_pos += creader.bytes_read; + + const len = try findFormSize( + self.ctx, + form, + self.debug_info_pos + self.entry.debug_info_off, + self.cuh, + ); + const attr = Attribute{ + .name = name, + .form = form, + .debug_info_off = self.debug_info_pos + self.entry.debug_info_off, + .debug_info_len = len, + }; + + self.debug_info_pos += len; + + return attr; + } +}; + +fn getAbbrevEntry(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, di_len: usize) !AbbrevEntry { + const debug_abbrev = self.debug_abbrev[da_off..][0..da_len]; + var stream = std.io.fixedBufferStream(debug_abbrev); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const tag = try leb.readULEB128(u64, reader); + const children = switch (tag) { + std.dwarf.TAG.const_type, + std.dwarf.TAG.packed_type, + std.dwarf.TAG.pointer_type, + std.dwarf.TAG.reference_type, + std.dwarf.TAG.restrict_type, + std.dwarf.TAG.rvalue_reference_type, + std.dwarf.TAG.shared_type, + std.dwarf.TAG.volatile_type, + => if (creader.bytes_read == da_len) std.dwarf.CHILDREN.no else try reader.readByte(), + else => try reader.readByte(), + }; + + return AbbrevEntry{ + .tag = tag, + .children = children, + .debug_abbrev_off = creader.bytes_read + da_off, + .debug_abbrev_len = da_len - creader.bytes_read, + .debug_info_off = di_off, + .debug_info_len = di_len, + }; +} + +fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Header) !usize { + const debug_info = self.debug_info[di_off..]; + var stream = std.io.fixedBufferStream(debug_info); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + switch (form) { + dwarf.FORM.strp, + dwarf.FORM.sec_offset, + dwarf.FORM.ref_addr, + => return if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32), + + dwarf.FORM.addr => return cuh.address_size, + + dwarf.FORM.block1, + dwarf.FORM.block2, + dwarf.FORM.block4, + dwarf.FORM.block, + => { + const len: u64 = switch (form) { + dwarf.FORM.block1 => try reader.readIntLittle(u8), + dwarf.FORM.block2 => try reader.readIntLittle(u16), + dwarf.FORM.block4 => try reader.readIntLittle(u32), + dwarf.FORM.block => try leb.readULEB128(u64, reader), + else => unreachable, + }; + var i: u64 = 0; + while (i < len) : (i += 1) { + _ = try reader.readByte(); + } + return creader.bytes_read; + }, + + dwarf.FORM.exprloc => { + const expr_len = try leb.readULEB128(u64, reader); + var i: u64 = 0; + while (i < expr_len) : (i += 1) { + _ = try reader.readByte(); + } + return creader.bytes_read; + }, + dwarf.FORM.flag_present => return 0, + + dwarf.FORM.data1, + dwarf.FORM.ref1, + dwarf.FORM.flag, + => return @sizeOf(u8), + + dwarf.FORM.data2, + dwarf.FORM.ref2, + => return @sizeOf(u16), + + dwarf.FORM.data4, + dwarf.FORM.ref4, + => return @sizeOf(u32), + + dwarf.FORM.data8, + dwarf.FORM.ref8, + dwarf.FORM.ref_sig8, + => return @sizeOf(u64), + + dwarf.FORM.udata, + dwarf.FORM.ref_udata, + => { + _ = try leb.readULEB128(u64, reader); + return creader.bytes_read; + }, + + dwarf.FORM.sdata => { + _ = try leb.readILEB128(i64, reader); + return creader.bytes_read; + }, + + dwarf.FORM.string => { + var count: usize = 0; + while (true) { + const byte = try reader.readByte(); + count += 1; + if (byte == 0x0) break; + } + return count; + }, + + else => { + log.err("unhandled DW_FORM_* value with identifier {x}", .{form}); + return error.UnhandledDwFormValue; + }, + } +} + +fn findAbbrevEntrySize(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, cuh: CompileUnit.Header) !usize { + const debug_abbrev = self.debug_abbrev[da_off..][0..da_len]; + var stream = std.io.fixedBufferStream(debug_abbrev); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const tag = try leb.readULEB128(u64, reader); + switch (tag) { + std.dwarf.TAG.const_type, + std.dwarf.TAG.packed_type, + std.dwarf.TAG.pointer_type, + std.dwarf.TAG.reference_type, + std.dwarf.TAG.restrict_type, + std.dwarf.TAG.rvalue_reference_type, + std.dwarf.TAG.shared_type, + std.dwarf.TAG.volatile_type, + => if (creader.bytes_read != da_len) { + _ = try reader.readByte(); + }, + else => _ = try reader.readByte(), + } + + var len: usize = 0; + while (creader.bytes_read < debug_abbrev.len) { + _ = try leb.readULEB128(u64, reader); + const form = try leb.readULEB128(u64, reader); + const form_len = try self.findFormSize(form, di_off + len, cuh); + len += form_len; + } + + return len; +} + +fn getString(self: DwarfInfo, off: u64) []const u8 { + assert(off < self.debug_str.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.debug_str.ptr + off)), 0); +} diff --git a/src/archive/archive/zld/MachO/Dylib.zig b/src/archive/archive/zld/MachO/Dylib.zig new file mode 100644 index 000000000000..2454f228c67a --- /dev/null +++ b/src/archive/archive/zld/MachO/Dylib.zig @@ -0,0 +1,534 @@ +const Dylib = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const fmt = std.fmt; +const log = std.log.scoped(.macho); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const fat = @import("fat.zig"); +const Allocator = mem.Allocator; +const CrossTarget = std.zig.CrossTarget; +const LibStub = @import("../tapi.zig").LibStub; +const LoadCommandIterator = macho.LoadCommandIterator; +const MachO = @import("../MachO.zig"); + +id: ?Id = null, +weak: bool = false, + +/// Parsed symbol table represented as hash map of symbols' +/// names. We can and should defer creating *Symbols until +/// a symbol is referenced by an object file. +/// +/// The value for each parsed symbol represents whether the +/// symbol is defined as a weak symbol or strong. +/// TODO when the referenced symbol is weak, ld64 marks it as +/// N_REF_TO_WEAK but need to investigate if there's more to it +/// such as weak binding entry or simply weak. For now, we generate +/// standard bind or lazy bind. +symbols: std.StringArrayHashMapUnmanaged(bool) = .{}, + +pub const Id = struct { + name: []const u8, + timestamp: u32, + current_version: u32, + compatibility_version: u32, + + pub fn default(allocator: Allocator, name: []const u8) !Id { + return Id{ + .name = try allocator.dupe(u8, name), + .timestamp = 2, + .current_version = 0x10000, + .compatibility_version = 0x10000, + }; + } + + pub fn fromLoadCommand(allocator: Allocator, lc: macho.dylib_command, name: []const u8) !Id { + return Id{ + .name = try allocator.dupe(u8, name), + .timestamp = lc.dylib.timestamp, + .current_version = lc.dylib.current_version, + .compatibility_version = lc.dylib.compatibility_version, + }; + } + + pub fn deinit(id: Id, allocator: Allocator) void { + allocator.free(id.name); + } + + pub const ParseError = fmt.ParseIntError || fmt.BufPrintError; + + pub fn parseCurrentVersion(id: *Id, version: anytype) ParseError!void { + id.current_version = try parseVersion(version); + } + + pub fn parseCompatibilityVersion(id: *Id, version: anytype) ParseError!void { + id.compatibility_version = try parseVersion(version); + } + + fn parseVersion(version: anytype) ParseError!u32 { + const string = blk: { + switch (version) { + .int => |int| { + var out: u32 = 0; + const major = math.cast(u16, int) orelse return error.Overflow; + out += @as(u32, @intCast(major)) << 16; + return out; + }, + .float => |float| { + var buf: [256]u8 = undefined; + break :blk try fmt.bufPrint(&buf, "{d:.2}", .{float}); + }, + .string => |string| { + break :blk string; + }, + } + }; + + var out: u32 = 0; + var values: [3][]const u8 = undefined; + + var split = mem.split(u8, string, "."); + var count: u4 = 0; + while (split.next()) |value| { + if (count > 2) { + log.debug("malformed version field: {s}", .{string}); + return 0x10000; + } + values[count] = value; + count += 1; + } + + if (count > 2) { + out += try fmt.parseInt(u8, values[2], 10); + } + if (count > 1) { + out += @as(u32, @intCast(try fmt.parseInt(u8, values[1], 10))) << 8; + } + out += @as(u32, @intCast(try fmt.parseInt(u16, values[0], 10))) << 16; + + return out; + } +}; + +pub fn deinit(self: *Dylib, allocator: Allocator) void { + for (self.symbols.keys()) |key| { + allocator.free(key); + } + self.symbols.deinit(allocator); + if (self.id) |*id| { + id.deinit(allocator); + } +} + +pub fn parseFromBinary( + self: *Dylib, + allocator: Allocator, + cpu_arch: std.Target.Cpu.Arch, + dylib_id: u16, + dependent_libs: anytype, + name: []const u8, + data: []align(@alignOf(u64)) const u8, +) !void { + var stream = std.io.fixedBufferStream(data); + const reader = stream.reader(); + + log.debug("parsing shared library '{s}'", .{name}); + + const header = try reader.readStruct(macho.mach_header_64); + + if (header.filetype != macho.MH_DYLIB) { + log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ + macho.MH_DYLIB, + header.filetype, + }); + return error.NotDylib; + } + + const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(header.cputype, true); + + if (this_arch != cpu_arch) { + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); + return error.MismatchedCpuArchitecture; + } + + const should_lookup_reexports = header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; + var it = LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = data[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .SYMTAB => { + const symtab_cmd = cmd.cast(macho.symtab_command).?; + const symtab = @as( + [*]const macho.nlist_64, + // Alignment is guaranteed as a dylib is a final linked image and has to have sections + // properly aligned in order to be correctly loaded by the loader. + @ptrCast(@alignCast(&data[symtab_cmd.symoff])), + )[0..symtab_cmd.nsyms]; + const strtab = data[symtab_cmd.stroff..][0..symtab_cmd.strsize]; + + for (symtab) |sym| { + const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); + if (!add_to_symtab) continue; + + const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + sym.n_strx)), 0); + try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), false); + } + }, + .ID_DYLIB => { + self.id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); + }, + .REEXPORT_DYLIB => { + if (should_lookup_reexports) { + // Parse install_name to dependent dylib. + var id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); + try dependent_libs.writeItem(.{ .id = id, .parent = dylib_id }); + } + }, + else => {}, + } + } + + if (self.id == null) { + log.debug("no LC_ID_DYLIB load command found; using hard-coded defaults...", .{}); + self.id = try Id.default(allocator, name); + } +} + +fn addObjCClassSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { + const expanded = &[_][]const u8{ + try std.fmt.allocPrint(allocator, "_OBJC_CLASS_$_{s}", .{sym_name}), + try std.fmt.allocPrint(allocator, "_OBJC_METACLASS_$_{s}", .{sym_name}), + }; + + for (expanded) |sym| { + if (self.symbols.contains(sym)) continue; + try self.symbols.putNoClobber(allocator, sym, false); + } +} + +fn addObjCIVarSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { + const expanded = try std.fmt.allocPrint(allocator, "_OBJC_IVAR_$_{s}", .{sym_name}); + if (self.symbols.contains(expanded)) return; + try self.symbols.putNoClobber(allocator, expanded, false); +} + +fn addObjCEhTypeSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { + const expanded = try std.fmt.allocPrint(allocator, "_OBJC_EHTYPE_$_{s}", .{sym_name}); + if (self.symbols.contains(expanded)) return; + try self.symbols.putNoClobber(allocator, expanded, false); +} + +fn addSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { + if (self.symbols.contains(sym_name)) return; + try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), false); +} + +fn addWeakSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { + if (self.symbols.contains(sym_name)) return; + try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), true); +} + +const TargetMatcher = struct { + allocator: Allocator, + target: CrossTarget, + target_strings: std.ArrayListUnmanaged([]const u8) = .{}, + + fn init(allocator: Allocator, target: CrossTarget) !TargetMatcher { + var self = TargetMatcher{ + .allocator = allocator, + .target = target, + }; + try self.target_strings.append(allocator, try targetToAppleString(allocator, target)); + + const abi = target.abi orelse .none; + if (abi == .simulator) { + // For Apple simulator targets, linking gets tricky as we need to link against the simulator + // hosts dylibs too. + const host_target = try targetToAppleString(allocator, .{ + .cpu_arch = target.cpu_arch.?, + .os_tag = .macos, + }); + try self.target_strings.append(allocator, host_target); + } + + return self; + } + + fn deinit(self: *TargetMatcher) void { + for (self.target_strings.items) |t| { + self.allocator.free(t); + } + self.target_strings.deinit(self.allocator); + } + + fn targetToAppleString(allocator: Allocator, target: CrossTarget) ![]const u8 { + const cpu_arch = switch (target.cpu_arch.?) { + .aarch64 => "arm64", + .x86_64 => "x86_64", + else => unreachable, + }; + const os_tag = @tagName(target.os_tag.?); + const target_abi = target.abi orelse .none; + const abi: ?[]const u8 = switch (target_abi) { + .none => null, + .simulator => "simulator", + .macabi => "maccatalyst", + else => unreachable, + }; + if (abi) |x| { + return std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ cpu_arch, os_tag, x }); + } + return std.fmt.allocPrint(allocator, "{s}-{s}", .{ cpu_arch, os_tag }); + } + + fn hasValue(stack: []const []const u8, needle: []const u8) bool { + for (stack) |v| { + if (mem.eql(u8, v, needle)) return true; + } + return false; + } + + fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool { + for (self.target_strings.items) |t| { + if (hasValue(targets, t)) return true; + } + return false; + } + + fn matchesArch(self: TargetMatcher, archs: []const []const u8) bool { + return hasValue(archs, @tagName(self.target.cpu_arch.?)); + } +}; + +pub fn parseFromStub( + self: *Dylib, + allocator: Allocator, + target: CrossTarget, + lib_stub: LibStub, + dylib_id: u16, + dependent_libs: anytype, + name: []const u8, +) !void { + if (lib_stub.inner.len == 0) return error.EmptyStubFile; + + log.debug("parsing shared library from stub '{s}'", .{name}); + + const umbrella_lib = lib_stub.inner[0]; + + { + var id = try Id.default(allocator, umbrella_lib.installName()); + if (umbrella_lib.currentVersion()) |version| { + try id.parseCurrentVersion(version); + } + if (umbrella_lib.compatibilityVersion()) |version| { + try id.parseCompatibilityVersion(version); + } + self.id = id; + } + + var umbrella_libs = std.StringHashMap(void).init(allocator); + defer umbrella_libs.deinit(); + + log.debug(" (install_name '{s}')", .{umbrella_lib.installName()}); + + var matcher = try TargetMatcher.init(allocator, target); + defer matcher.deinit(); + + for (lib_stub.inner, 0..) |elem, stub_index| { + const is_match = switch (elem) { + .v3 => |stub| matcher.matchesArch(stub.archs), + .v4 => |stub| matcher.matchesTarget(stub.targets), + }; + if (!is_match) continue; + + if (stub_index > 0) { + // TODO I thought that we could switch on presence of `parent-umbrella` map; + // however, turns out `libsystem_notify.dylib` is fully reexported by `libSystem.dylib` + // BUT does not feature a `parent-umbrella` map as the only sublib. Apple's bug perhaps? + try umbrella_libs.put(elem.installName(), {}); + } + + switch (elem) { + .v3 => |stub| { + if (stub.exports) |exports| { + for (exports) |exp| { + if (!matcher.matchesArch(exp.archs)) continue; + + if (exp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addSymbol(allocator, sym_name); + } + } + + if (exp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addWeakSymbol(allocator, sym_name); + } + } + + if (exp.objc_classes) |objc_classes| { + for (objc_classes) |class_name| { + try self.addObjCClassSymbol(allocator, class_name); + } + } + + if (exp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVarSymbol(allocator, ivar); + } + } + + if (exp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhTypeSymbol(allocator, eht); + } + } + + // TODO track which libs were already parsed in different steps + if (exp.re_exports) |re_exports| { + for (re_exports) |lib| { + if (umbrella_libs.contains(lib)) continue; + + log.debug(" (found re-export '{s}')", .{lib}); + + var dep_id = try Id.default(allocator, lib); + try dependent_libs.writeItem(.{ .id = dep_id, .parent = dylib_id }); + } + } + } + } + }, + .v4 => |stub| { + if (stub.exports) |exports| { + for (exports) |exp| { + if (!matcher.matchesTarget(exp.targets)) continue; + + if (exp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addSymbol(allocator, sym_name); + } + } + + if (exp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addWeakSymbol(allocator, sym_name); + } + } + + if (exp.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClassSymbol(allocator, sym_name); + } + } + + if (exp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVarSymbol(allocator, ivar); + } + } + + if (exp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhTypeSymbol(allocator, eht); + } + } + } + } + + if (stub.reexports) |reexports| { + for (reexports) |reexp| { + if (!matcher.matchesTarget(reexp.targets)) continue; + + if (reexp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addSymbol(allocator, sym_name); + } + } + + if (reexp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addWeakSymbol(allocator, sym_name); + } + } + + if (reexp.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClassSymbol(allocator, sym_name); + } + } + + if (reexp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVarSymbol(allocator, ivar); + } + } + + if (reexp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhTypeSymbol(allocator, eht); + } + } + } + } + + if (stub.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClassSymbol(allocator, sym_name); + } + } + + if (stub.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVarSymbol(allocator, ivar); + } + } + + if (stub.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhTypeSymbol(allocator, eht); + } + } + }, + } + } + + // For V4, we add dependent libs in a separate pass since some stubs such as libSystem include + // re-exports directly in the stub file. + for (lib_stub.inner) |elem| { + if (elem == .v3) break; + const stub = elem.v4; + + // TODO track which libs were already parsed in different steps + if (stub.reexported_libraries) |reexports| { + for (reexports) |reexp| { + if (!matcher.matchesTarget(reexp.targets)) continue; + + for (reexp.libraries) |lib| { + if (umbrella_libs.contains(lib)) continue; + + log.debug(" (found re-export '{s}')", .{lib}); + + var dep_id = try Id.default(allocator, lib); + try dependent_libs.writeItem(.{ .id = dep_id, .parent = dylib_id }); + } + } + } + } +} diff --git a/src/archive/archive/zld/MachO/Object.zig b/src/archive/archive/zld/MachO/Object.zig new file mode 100644 index 000000000000..c2d1946bfd29 --- /dev/null +++ b/src/archive/archive/zld/MachO/Object.zig @@ -0,0 +1,1054 @@ +const Object = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const eh_frame = @import("eh_frame.zig"); +const fs = std.fs; +const io = std.io; +const log = std.log.scoped(.macho); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const sort = std.sort; +const trace = @import("../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const AtomIndex = MachO.AtomIndex; +const DwarfInfo = @import("DwarfInfo.zig"); +const LoadCommandIterator = macho.LoadCommandIterator; +const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; +const UnwindInfo = @import("UnwindInfo.zig"); + +name: []const u8, +mtime: u64, +contents: []align(@alignOf(u64)) const u8, + +header: macho.mach_header_64 = undefined, + +/// Symtab and strtab might not exist for empty object files so we use an optional +/// to signal this. +in_symtab: ?[]align(1) const macho.nlist_64 = null, +in_strtab: ?[]const u8 = null, + +/// Output symtab is sorted so that we can easily reference symbols following each +/// other in address space. +/// The length of the symtab is at least of the input symtab length however there +/// can be trailing section symbols. +symtab: []macho.nlist_64 = undefined, +/// Can be undefined as set together with in_symtab. +source_symtab_lookup: []u32 = undefined, +/// Can be undefined as set together with in_symtab. +reverse_symtab_lookup: []u32 = undefined, +/// Can be undefined as set together with in_symtab. +source_address_lookup: []i64 = undefined, +/// Can be undefined as set together with in_symtab. +source_section_index_lookup: []i64 = undefined, +/// Can be undefined as set together with in_symtab. +strtab_lookup: []u32 = undefined, +/// Can be undefined as set together with in_symtab. +atom_by_index_table: []AtomIndex = undefined, +/// Can be undefined as set together with in_symtab. +globals_lookup: []i64 = undefined, +/// Can be undefined as set together with in_symtab. +relocs_lookup: []RelocEntry = undefined, + +/// All relocations sorted and flatened, sorted by address descending +/// per section. +relocations: std.ArrayListUnmanaged(macho.relocation_info) = .{}, +/// Beginning index to the relocations array for each input section +/// defined within this Object file. +section_relocs_lookup: std.ArrayListUnmanaged(u32) = .{}, + +/// Data-in-code records sorted by address. +data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, + +atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, +exec_atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, + +eh_frame_sect_id: ?u8 = null, +eh_frame_relocs_lookup: std.AutoArrayHashMapUnmanaged(u32, Record) = .{}, +eh_frame_records_lookup: std.AutoArrayHashMapUnmanaged(AtomIndex, u32) = .{}, + +unwind_info_sect_id: ?u8 = null, +unwind_relocs_lookup: []Record = undefined, +unwind_records_lookup: std.AutoHashMapUnmanaged(AtomIndex, u32) = .{}, + +const RelocEntry = struct { start: u32, len: u32 }; + +const Record = struct { + dead: bool, + reloc: RelocEntry, +}; + +pub fn deinit(self: *Object, gpa: Allocator) void { + self.atoms.deinit(gpa); + self.exec_atoms.deinit(gpa); + // ZAR MODIFICATION: + // We manage memory of this ourselves in zar - so + // freeing this here for that does not make much sense. + //gpa.free(self.name); + //gpa.free(self.contents); + if (self.in_symtab) |_| { + gpa.free(self.source_symtab_lookup); + gpa.free(self.reverse_symtab_lookup); + gpa.free(self.source_address_lookup); + gpa.free(self.source_section_index_lookup); + gpa.free(self.strtab_lookup); + gpa.free(self.symtab); + gpa.free(self.atom_by_index_table); + gpa.free(self.globals_lookup); + gpa.free(self.relocs_lookup); + } + self.eh_frame_relocs_lookup.deinit(gpa); + self.eh_frame_records_lookup.deinit(gpa); + if (self.hasUnwindRecords()) { + gpa.free(self.unwind_relocs_lookup); + } + self.unwind_records_lookup.deinit(gpa); + self.relocations.deinit(gpa); + self.section_relocs_lookup.deinit(gpa); + self.data_in_code.deinit(gpa); +} + +pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var stream = std.io.fixedBufferStream(self.contents); + const reader = stream.reader(); + + self.header = try reader.readStruct(macho.mach_header_64); + + if (self.header.filetype != macho.MH_OBJECT) { + log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ + macho.MH_OBJECT, + self.header.filetype, + }); + return error.NotObject; + } + + const this_arch: std.Target.Cpu.Arch = switch (self.header.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => |value| { + log.err("unsupported cpu architecture 0x{x}", .{value}); + return error.UnsupportedCpuArchitecture; + }, + }; + + // ZAR MODIFICATION: This check doesn't serve any purpose for the needs of + // zar. + _ = this_arch; + _ = cpu_arch; + // if (this_arch != cpu_arch) { + // log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + // @tagName(cpu_arch), + // @tagName(this_arch), + // }); + // return error.MismatchedCpuArchitecture; + // } + + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + const nsects = self.getSourceSections().len; + + // Prepopulate relocations per section lookup table. + try self.section_relocs_lookup.resize(allocator, nsects); + @memset(self.section_relocs_lookup.items, 0); + + const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break cmd.cast(macho.symtab_command).?, + else => {}, + } else return; + + self.in_symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.contents.ptr + symtab.symoff))[0..symtab.nsyms]; + self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; + + self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); + self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.reverse_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); + self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects); + self.relocs_lookup = try allocator.alloc(RelocEntry, self.in_symtab.?.len + nsects); + // This is wasteful but we need to be able to lookup source symbol address after stripping and + // allocating of sections. + self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len); + self.source_section_index_lookup = try allocator.alloc(i64, nsects); + + for (self.symtab) |*sym| { + sym.* = .{ + .n_value = 0, + .n_sect = 0, + .n_desc = 0, + .n_strx = 0, + .n_type = 0, + }; + } + + @memset(self.globals_lookup, -1); + @memset(self.atom_by_index_table, 0); + @memset(self.source_section_index_lookup, -1); + @memset(self.relocs_lookup, .{ + .start = 0, + .len = 0, + }); + + // You would expect that the symbol table is at least pre-sorted based on symbol's type: + // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, + // the GO compiler does not necessarily respect that therefore we sort immediately by type + // and address within. + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(allocator, self.in_symtab.?.len); + defer sorted_all_syms.deinit(); + + for (self.in_symtab.?, 0..) |_, index| { + sorted_all_syms.appendAssumeCapacity(.{ .index = @as(u32, @intCast(index)) }); + } + + // We sort by type: defined < undefined, and + // afterwards by address in each group. Normally, dysymtab should + // be enough to guarantee the sort, but turns out not every compiler + // is kind enough to specify the symbols in the correct order. + sort.heap(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); + + for (sorted_all_syms.items, 0..) |sym_id, i| { + const sym = sym_id.getSymbol(self); + + if (sym.sect() and self.source_section_index_lookup[sym.n_sect - 1] == -1) { + self.source_section_index_lookup[sym.n_sect - 1] = @as(i64, @intCast(i)); + } + + self.symtab[i] = sym; + self.source_address_lookup[i] = if (sym.undf()) -1 else @as(i64, @intCast(sym.n_value)); + self.source_symtab_lookup[i] = sym_id.index; + self.reverse_symtab_lookup[sym_id.index] = @as(u32, @intCast(i)); + + const sym_name_len = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.in_strtab.?.ptr + sym.n_strx)), 0).len + 1; + self.strtab_lookup[i] = @as(u32, @intCast(sym_name_len)); + } + + // Parse __TEXT,__eh_frame if one exists. + self.eh_frame_sect_id = self.getSourceSectionIndexByName("__TEXT", "__eh_frame"); + + // Parse __LD,__compact_unwind if one exists. + self.unwind_info_sect_id = self.getSourceSectionIndexByName("__LD", "__compact_unwind"); + if (self.hasUnwindRecords()) { + self.unwind_relocs_lookup = try allocator.alloc(Record, self.getUnwindRecords().len); + @memset(self.unwind_relocs_lookup, .{ + .dead = true, + .reloc = .{ + .start = 0, + .len = 0, + }, + }); + } +} + +const SymbolAtIndex = struct { + index: u32, + + const Context = *const Object; + + fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { + return ctx.in_symtab.?[self.index]; + } + + fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { + const off = self.getSymbol(ctx).n_strx; + return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.in_strtab.?.ptr + off)), 0); + } + + fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 { + const sym = self.getSymbol(ctx); + if (!sym.ext()) { + const sym_name = self.getSymbolName(ctx); + if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 0; + return 1; + } + if (sym.weakDef() or sym.pext()) return 2; + return 3; + } + + /// Performs lexicographic-like check. + /// * lhs and rhs defined + /// * if lhs == rhs + /// * if lhs.n_sect == rhs.n_sect + /// * ext < weak < local < temp + /// * lhs.n_sect < rhs.n_sect + /// * lhs < rhs + /// * !rhs is undefined + fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { + const lhs = lhs_index.getSymbol(ctx); + const rhs = rhs_index.getSymbol(ctx); + if (lhs.sect() and rhs.sect()) { + if (lhs.n_value == rhs.n_value) { + if (lhs.n_sect == rhs.n_sect) { + const lhs_senior = lhs_index.getSymbolSeniority(ctx); + const rhs_senior = rhs_index.getSymbolSeniority(ctx); + if (lhs_senior == rhs_senior) { + return lessThanByNStrx(ctx, lhs_index, rhs_index); + } else return lhs_senior < rhs_senior; + } else return lhs.n_sect < rhs.n_sect; + } else return lhs.n_value < rhs.n_value; + } else if (lhs.undf() and rhs.undf()) { + return lessThanByNStrx(ctx, lhs_index, rhs_index); + } else return rhs.undf(); + } + + fn lessThanByNStrx(ctx: Context, lhs: SymbolAtIndex, rhs: SymbolAtIndex) bool { + return lhs.getSymbol(ctx).n_strx < rhs.getSymbol(ctx).n_strx; + } +}; + +fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct { + index: u32, + len: u32, +} { + const FirstMatch = struct { + n_sect: u8, + + pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { + return symbol.n_sect == pred.n_sect; + } + }; + const FirstNonMatch = struct { + n_sect: u8, + + pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { + return symbol.n_sect != pred.n_sect; + } + }; + + const index = MachO.lsearch(macho.nlist_64, symbols, FirstMatch{ + .n_sect = n_sect, + }); + const len = MachO.lsearch(macho.nlist_64, symbols[index..], FirstNonMatch{ + .n_sect = n_sect, + }); + + return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) }; +} + +fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: u64) struct { + index: u32, + len: u32, +} { + const Predicate = struct { + addr: u64, + + pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { + return symbol.n_value >= pred.addr; + } + }; + + const index = MachO.lsearch(macho.nlist_64, symbols, Predicate{ + .addr = start_addr, + }); + const len = MachO.lsearch(macho.nlist_64, symbols[index..], Predicate{ + .addr = end_addr, + }); + + return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) }; +} + +const SortedSection = struct { + header: macho.section_64, + id: u8, +}; + +fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) bool { + _ = ctx; + if (lhs.header.addr == rhs.header.addr) { + return lhs.id < rhs.id; + } + return lhs.header.addr < rhs.header.addr; +} + +pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { + log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name }); + + try self.splitRegularSections(macho_file, object_id); + try self.parseEhFrameSection(macho_file, object_id); + try self.parseUnwindInfo(macho_file, object_id); + try self.parseDataInCode(macho_file.base.allocator); +} + +pub fn splitRegularSections(self: *Object, macho_file: *MachO, object_id: u32) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + const sections = self.getSourceSections(); + for (sections, 0..) |sect, id| { + if (sect.isDebug()) continue; + const out_sect_id = (try macho_file.getOutputSection(sect)) orelse { + log.debug(" unhandled section '{s},{s}'", .{ sect.segName(), sect.sectName() }); + continue; + }; + if (sect.size == 0) continue; + + const sect_id = @as(u8, @intCast(id)); + const sym = self.getSectionAliasSymbolPtr(sect_id); + sym.* = .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = out_sect_id + 1, + .n_desc = 0, + .n_value = sect.addr, + }; + } + + if (self.in_symtab == null) { + for (sections, 0..) |sect, id| { + if (sect.isDebug()) continue; + const out_sect_id = (try macho_file.getOutputSection(sect)) orelse continue; + if (sect.size == 0) continue; + + const sect_id = @as(u8, @intCast(id)); + const sym_index = self.getSectionAliasSymbolIndex(sect_id); + const atom_index = try self.createAtomFromSubsection( + macho_file, + object_id, + sym_index, + 0, + 0, + sect.size, + sect.@"align", + out_sect_id, + ); + macho_file.addAtomToSection(atom_index); + } + return; + } + + // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we + // have to infer the start of undef section in the symtab ourselves. + const iundefsym = blk: { + const dysymtab = self.parseDysymtab() orelse { + var iundefsym: usize = self.in_symtab.?.len; + while (iundefsym > 0) : (iundefsym -= 1) { + const sym = self.symtab[iundefsym - 1]; + if (sym.sect()) break; + } + break :blk iundefsym; + }; + break :blk dysymtab.iundefsym; + }; + + // We only care about defined symbols, so filter every other out. + const symtab = try gpa.dupe(macho.nlist_64, self.symtab[0..iundefsym]); + defer gpa.free(symtab); + + const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + + // Sort section headers by address. + var sorted_sections = try gpa.alloc(SortedSection, sections.len); + defer gpa.free(sorted_sections); + + for (sections, 0..) |sect, id| { + sorted_sections[id] = .{ .header = sect, .id = @as(u8, @intCast(id)) }; + } + + std.sort.sort(SortedSection, sorted_sections, {}, sectionLessThanByAddress); + + var sect_sym_index: u32 = 0; + for (sorted_sections) |section| { + const sect = section.header; + if (sect.isDebug()) continue; + + const sect_id = section.id; + log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); + + // Get output segment/section in the final artifact. + const out_sect_id = (try macho_file.getOutputSection(sect)) orelse continue; + + log.debug(" output sect({d}, '{s},{s}')", .{ + out_sect_id + 1, + macho_file.sections.items(.header)[out_sect_id].segName(), + macho_file.sections.items(.header)[out_sect_id].sectName(), + }); + + try self.parseRelocs(gpa, section.id); + + const cpu_arch = macho_file.options.target.cpu_arch.?; + const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1); + const sect_start_index = sect_sym_index + sect_loc.index; + + sect_sym_index += sect_loc.len; + + if (sect.size == 0) continue; + if (subsections_via_symbols and sect_loc.len > 0) { + // If the first nlist does not match the start of the section, + // then we need to encapsulate the memory range [section start, first symbol) + // as a temporary symbol and insert the matching Atom. + const first_sym = symtab[sect_start_index]; + if (first_sym.n_value > sect.addr) { + const sym_index = self.getSectionAliasSymbolIndex(sect_id); + const atom_size = first_sym.n_value - sect.addr; + const atom_index = try self.createAtomFromSubsection( + macho_file, + object_id, + sym_index, + 0, + 0, + atom_size, + sect.@"align", + out_sect_id, + ); + if (!sect.isZerofill()) { + try self.cacheRelocs(macho_file, atom_index); + } + macho_file.addAtomToSection(atom_index); + } + + var next_sym_index = sect_start_index; + while (next_sym_index < sect_start_index + sect_loc.len) { + const next_sym = symtab[next_sym_index]; + const addr = next_sym.n_value; + const atom_loc = filterSymbolsByAddress(symtab[next_sym_index..], addr, addr + 1); + assert(atom_loc.len > 0); + const atom_sym_index = atom_loc.index + next_sym_index; + const nsyms_trailing = atom_loc.len - 1; + next_sym_index += atom_loc.len; + + const atom_size = if (next_sym_index < sect_start_index + sect_loc.len) + symtab[next_sym_index].n_value - addr + else + sect.addr + sect.size - addr; + + const atom_align = if (addr > 0) + math.min(@ctz(addr), sect.@"align") + else + sect.@"align"; + + const atom_index = try self.createAtomFromSubsection( + macho_file, + object_id, + atom_sym_index, + atom_sym_index + 1, + nsyms_trailing, + atom_size, + atom_align, + out_sect_id, + ); + + // TODO rework this at the relocation level + if (cpu_arch == .x86_64 and addr == sect.addr) { + // In x86_64 relocs, it can so happen that the compiler refers to the same + // atom by both the actual assigned symbol and the start of the section. In this + // case, we need to link the two together so add an alias. + const alias_index = self.getSectionAliasSymbolIndex(sect_id); + self.atom_by_index_table[alias_index] = atom_index; + } + if (!sect.isZerofill()) { + try self.cacheRelocs(macho_file, atom_index); + } + macho_file.addAtomToSection(atom_index); + } + } else { + const alias_index = self.getSectionAliasSymbolIndex(sect_id); + const atom_index = try self.createAtomFromSubsection( + macho_file, + object_id, + alias_index, + sect_start_index, + sect_loc.len, + sect.size, + sect.@"align", + out_sect_id, + ); + if (!sect.isZerofill()) { + try self.cacheRelocs(macho_file, atom_index); + } + macho_file.addAtomToSection(atom_index); + } + } +} + +fn createAtomFromSubsection( + self: *Object, + macho_file: *MachO, + object_id: u32, + sym_index: u32, + inner_sym_index: u32, + inner_nsyms_trailing: u32, + size: u64, + alignment: u32, + out_sect_id: u8, +) !AtomIndex { + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.createEmptyAtom(sym_index, size, alignment); + const atom = macho_file.getAtomPtr(atom_index); + atom.inner_sym_index = inner_sym_index; + atom.inner_nsyms_trailing = inner_nsyms_trailing; + atom.file = object_id + 1; + self.symtab[sym_index].n_sect = out_sect_id + 1; + + log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ + sym_index, + self.getSymbolName(sym_index), + out_sect_id + 1, + macho_file.sections.items(.header)[out_sect_id].segName(), + macho_file.sections.items(.header)[out_sect_id].sectName(), + object_id, + }); + + try self.atoms.append(gpa, atom_index); + self.atom_by_index_table[sym_index] = atom_index; + + var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); + while (it.next()) |sym_loc| { + const inner = macho_file.getSymbolPtr(sym_loc); + inner.n_sect = out_sect_id + 1; + self.atom_by_index_table[sym_loc.sym_index] = atom_index; + } + + const out_sect = macho_file.sections.items(.header)[out_sect_id]; + if (out_sect.isCode() and + mem.eql(u8, "__TEXT", out_sect.segName()) and + mem.eql(u8, "__text", out_sect.sectName())) + { + // TODO currently assuming a single section for executable machine code + try self.exec_atoms.append(gpa, atom_index); + } + + return atom_index; +} + +fn filterRelocs( + relocs: []align(1) const macho.relocation_info, + start_addr: u64, + end_addr: u64, +) RelocEntry { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address >= self.addr; + } + }; + const LPredicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } + }; + + const start = MachO.bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr }); + const len = MachO.lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr }); + + return .{ .start = @as(u32, @intCast(start)), .len = @as(u32, @intCast(len)) }; +} + +/// Parse all relocs for the input section, and sort in descending order. +/// Previously, I have wrongly assumed the compilers output relocations for each +/// section in a sorted manner which is simply not true. +fn parseRelocs(self: *Object, gpa: Allocator, sect_id: u8) !void { + const section = self.getSourceSection(sect_id); + const start = @as(u32, @intCast(self.relocations.items.len)); + if (self.getSourceRelocs(section)) |relocs| { + try self.relocations.ensureUnusedCapacity(gpa, relocs.len); + self.relocations.appendUnalignedSliceAssumeCapacity(relocs); + std.sort.sort(macho.relocation_info, self.relocations.items[start..], {}, relocGreaterThan); + } + self.section_relocs_lookup.items[sect_id] = start; +} + +fn cacheRelocs(self: *Object, macho_file: *MachO, atom_index: AtomIndex) !void { + const atom = macho_file.getAtom(atom_index); + + const source_sect_id = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + break :blk source_sym.n_sect - 1; + } else blk: { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @as(u32, @intCast(self.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + break :blk sect_id; + }; + const source_sect = self.getSourceSection(source_sect_id); + assert(!source_sect.isZerofill()); + const relocs = self.getRelocs(source_sect_id); + + self.relocs_lookup[atom.sym_index] = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + const offset = source_sym.n_value - source_sect.addr; + break :blk filterRelocs(relocs, offset, offset + atom.size); + } else filterRelocs(relocs, 0, atom.size); +} + +fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool { + _ = ctx; + return lhs.r_address > rhs.r_address; +} + +fn parseEhFrameSection(self: *Object, macho_file: *MachO, object_id: u32) !void { + const sect_id = self.eh_frame_sect_id orelse return; + const sect = self.getSourceSection(sect_id); + + log.debug("parsing __TEXT,__eh_frame section", .{}); + + if (macho_file.getSectionByName("__TEXT", "__eh_frame") == null) { + _ = try macho_file.initSection("__TEXT", "__eh_frame", .{}); + } + + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.options.target.cpu_arch.?; + try self.parseRelocs(gpa, sect_id); + const relocs = self.getRelocs(sect_id); + + var it = self.getEhFrameRecordsIterator(); + var record_count: u32 = 0; + while (try it.next()) |_| { + record_count += 1; + } + + try self.eh_frame_relocs_lookup.ensureTotalCapacity(gpa, record_count); + try self.eh_frame_records_lookup.ensureTotalCapacity(gpa, record_count); + + it.reset(); + + while (try it.next()) |record| { + const offset = it.pos - record.getSize(); + const rel_pos = switch (cpu_arch) { + .aarch64 => filterRelocs(relocs, offset, offset + record.getSize()), + .x86_64 => RelocEntry{ .start = 0, .len = 0 }, + else => unreachable, + }; + self.eh_frame_relocs_lookup.putAssumeCapacityNoClobber(offset, .{ + .dead = false, + .reloc = rel_pos, + }); + + if (record.tag == .fde) { + const target = blk: { + switch (cpu_arch) { + .aarch64 => { + assert(rel_pos.len > 0); // TODO convert to an error as the FDE eh frame is malformed + // Find function symbol that this record describes + const rel = relocs[rel_pos.start..][rel_pos.len - 1]; + const target = UnwindInfo.parseRelocTarget( + macho_file, + object_id, + rel, + it.data[offset..], + @as(i32, @intCast(offset)), + ); + break :blk target; + }, + .x86_64 => { + const target_address = record.getTargetSymbolAddress(.{ + .base_addr = sect.addr, + .base_offset = offset, + }); + const target_sym_index = self.getSymbolByAddress(target_address, null); + const target = if (self.getGlobal(target_sym_index)) |global_index| + macho_file.globals.items[global_index] + else + MachO.SymbolWithLoc{ .sym_index = target_sym_index, .file = object_id + 1 }; + break :blk target; + }, + else => unreachable, + } + }; + log.debug("FDE at offset {x} tracks {s}", .{ offset, macho_file.getSymbolName(target) }); + if (target.getFile() != object_id) { + self.eh_frame_relocs_lookup.getPtr(offset).?.dead = true; + } else { + const atom_index = self.getAtomIndexForSymbol(target.sym_index).?; + self.eh_frame_records_lookup.putAssumeCapacityNoClobber(atom_index, offset); + } + } + } +} + +fn parseUnwindInfo(self: *Object, macho_file: *MachO, object_id: u32) !void { + const sect_id = self.unwind_info_sect_id orelse { + // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`, + // we will try fully synthesising unwind info records to somewhat match Apple ld's + // approach. However, we will only synthesise DWARF records and nothing more. For this reason, + // we still create the output `__TEXT,__unwind_info` section. + if (self.hasEhFrameRecords()) { + if (macho_file.getSectionByName("__TEXT", "__unwind_info") == null) { + _ = try macho_file.initSection("__TEXT", "__unwind_info", .{}); + } + } + return; + }; + + log.debug("parsing unwind info in {s}", .{self.name}); + + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.options.target.cpu_arch.?; + + if (macho_file.getSectionByName("__TEXT", "__unwind_info") == null) { + _ = try macho_file.initSection("__TEXT", "__unwind_info", .{}); + } + + try self.unwind_records_lookup.ensureTotalCapacity(gpa, @as(u32, @intCast(self.exec_atoms.items.len))); + + const unwind_records = self.getUnwindRecords(); + + const needs_eh_frame = for (unwind_records) |record| { + if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) break true; + } else false; + + if (needs_eh_frame and !self.hasEhFrameRecords()) { + log.err("missing __TEXT,__eh_frame section", .{}); + log.err(" in object {s}", .{self.name}); + return error.MissingEhFrameSection; + } + + try self.parseRelocs(gpa, sect_id); + const relocs = self.getRelocs(sect_id); + + for (unwind_records, 0..) |record, record_id| { + const offset = record_id * @sizeOf(macho.compact_unwind_entry); + const rel_pos = filterRelocs( + relocs, + offset, + offset + @sizeOf(macho.compact_unwind_entry), + ); + assert(rel_pos.len > 0); // TODO convert to an error as the unwind info is malformed + self.unwind_relocs_lookup[record_id] = .{ + .dead = false, + .reloc = rel_pos, + }; + + // Find function symbol that this record describes + const rel = relocs[rel_pos.start..][rel_pos.len - 1]; + const target = UnwindInfo.parseRelocTarget( + macho_file, + object_id, + rel, + mem.asBytes(&record), + @as(i32, @intCast(offset)), + ); + log.debug("unwind record {d} tracks {s}", .{ record_id, macho_file.getSymbolName(target) }); + if (target.getFile() != object_id) { + self.unwind_relocs_lookup[record_id].dead = true; + } else { + const atom_index = self.getAtomIndexForSymbol(target.sym_index).?; + self.unwind_records_lookup.putAssumeCapacityNoClobber(atom_index, @as(u32, @intCast(record_id))); + } + } +} + +pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { + const symtab = self.in_symtab.?; + if (index >= symtab.len) return null; + const mapped_index = self.source_symtab_lookup[index]; + return symtab[mapped_index]; +} + +pub fn getSourceSection(self: Object, index: u8) macho.section_64 { + const sections = self.getSourceSections(); + assert(index < sections.len); + return sections[index]; +} + +pub fn getSourceSectionByName(self: Object, segname: []const u8, sectname: []const u8) ?macho.section_64 { + const index = self.getSourceSectionIndexByName(segname, sectname) orelse return null; + const sections = self.getSourceSections(); + return sections[index]; +} + +pub fn getSourceSectionIndexByName(self: Object, segname: []const u8, sectname: []const u8) ?u8 { + const sections = self.getSourceSections(); + for (sections, 0..) |sect, i| { + if (mem.eql(u8, segname, sect.segName()) and mem.eql(u8, sectname, sect.sectName())) + return @as(u8, @intCast(i)); + } else return null; +} + +pub fn getSourceSections(self: Object) []const macho.section_64 { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + return cmd.getSections(); + }, + else => {}, + } else unreachable; +} + +pub fn parseDataInCode(self: *Object, gpa: Allocator) !void { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + const cmd = while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DATA_IN_CODE => break cmd.cast(macho.linkedit_data_command).?, + else => {}, + } + } else return; + const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); + const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(self.contents.ptr + cmd.dataoff))[0..ndice]; + try self.data_in_code.ensureTotalCapacityPrecise(gpa, dice.len); + self.data_in_code.appendUnalignedSliceAssumeCapacity(dice); + std.sort.sort(macho.data_in_code_entry, self.data_in_code.items, {}, diceLessThan); +} + +fn diceLessThan(ctx: void, lhs: macho.data_in_code_entry, rhs: macho.data_in_code_entry) bool { + _ = ctx; + return lhs.offset < rhs.offset; +} + +fn parseDysymtab(self: Object) ?macho.dysymtab_command { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DYSYMTAB => { + return cmd.cast(macho.dysymtab_command).?; + }, + else => {}, + } + } else return null; +} + +pub fn parseDwarfInfo(self: Object) DwarfInfo { + var di = DwarfInfo{ + .debug_info = &[0]u8{}, + .debug_abbrev = &[0]u8{}, + .debug_str = &[0]u8{}, + }; + for (self.getSourceSections()) |sect| { + if (!sect.isDebug()) continue; + const sectname = sect.sectName(); + if (mem.eql(u8, sectname, "__debug_info")) { + di.debug_info = self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_abbrev")) { + di.debug_abbrev = self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_str")) { + di.debug_str = self.getSectionContents(sect); + } + } + return di; +} + +pub fn getSectionContents(self: Object, sect: macho.section_64) []const u8 { + const size = @as(usize, @intCast(sect.size)); + return self.contents[sect.offset..][0..size]; +} + +pub fn getSectionAliasSymbolIndex(self: Object, sect_id: u8) u32 { + const start = @as(u32, @intCast(self.in_symtab.?.len)); + return start + sect_id; +} + +pub fn getSectionAliasSymbol(self: *Object, sect_id: u8) macho.nlist_64 { + return self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; +} + +pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 { + return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; +} + +fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info { + if (sect.nreloc == 0) return null; + return @as([*]align(1) const macho.relocation_info, @ptrCast(self.contents.ptr + sect.reloff))[0..sect.nreloc]; +} + +pub fn getRelocs(self: Object, sect_id: u8) []const macho.relocation_info { + const sect = self.getSourceSection(sect_id); + const start = self.section_relocs_lookup.items[sect_id]; + const len = sect.nreloc; + return self.relocations.items[start..][0..len]; +} + +pub fn getSymbolName(self: Object, index: u32) []const u8 { + const strtab = self.in_strtab.?; + const sym = self.symtab[index]; + + if (self.getSourceSymbol(index) == null) { + assert(sym.n_strx == 0); + return ""; + } + + const start = sym.n_strx; + const len = self.strtab_lookup[index]; + + return strtab[start..][0 .. len - 1 :0]; +} + +pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 { + // Find containing atom + const Predicate = struct { + addr: i64, + + pub fn predicate(pred: @This(), other: i64) bool { + return if (other == -1) true else other > pred.addr; + } + }; + + if (sect_hint) |sect_id| { + if (self.source_section_index_lookup[sect_id] > -1) { + const first_sym_index = @as(usize, @intCast(self.source_section_index_lookup[sect_id])); + const target_sym_index = MachO.lsearch(i64, self.source_address_lookup[first_sym_index..], Predicate{ + .addr = @as(i64, @intCast(addr)), + }); + if (target_sym_index > 0) { + return @as(u32, @intCast(first_sym_index + target_sym_index - 1)); + } + } + return self.getSectionAliasSymbolIndex(sect_id); + } + + const target_sym_index = MachO.lsearch(i64, self.source_address_lookup, Predicate{ + .addr = @as(i64, @intCast(addr)), + }); + assert(target_sym_index > 0); + return @as(u32, @intCast(target_sym_index - 1)); +} + +pub fn getGlobal(self: Object, sym_index: u32) ?u32 { + if (self.globals_lookup[sym_index] == -1) return null; + return @as(u32, @intCast(self.globals_lookup[sym_index])); +} + +pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?AtomIndex { + const atom_index = self.atom_by_index_table[sym_index]; + if (atom_index == 0) return null; + return atom_index; +} + +pub fn hasUnwindRecords(self: Object) bool { + return self.unwind_info_sect_id != null; +} + +pub fn getUnwindRecords(self: Object) []align(1) const macho.compact_unwind_entry { + const sect_id = self.unwind_info_sect_id orelse return &[0]macho.compact_unwind_entry{}; + const sect = self.getSourceSection(sect_id); + const data = self.getSectionContents(sect); + const num_entries = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); + return @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data))[0..num_entries]; +} + +pub fn hasEhFrameRecords(self: Object) bool { + return self.eh_frame_sect_id != null; +} + +pub fn getEhFrameRecordsIterator(self: Object) eh_frame.Iterator { + const sect_id = self.eh_frame_sect_id orelse return .{ .data = &[0]u8{} }; + const sect = self.getSourceSection(sect_id); + const data = self.getSectionContents(sect); + return .{ .data = data }; +} + +pub fn hasDataInCode(self: Object) bool { + return self.data_in_code.items.len > 0; +} diff --git a/src/archive/archive/zld/MachO/Options.zig b/src/archive/archive/zld/MachO/Options.zig new file mode 100644 index 000000000000..2de77b710b33 --- /dev/null +++ b/src/archive/archive/zld/MachO/Options.zig @@ -0,0 +1,495 @@ +const Options = @This(); + +const std = @import("std"); +const builtin = @import("builtin"); +const io = std.io; +const macho = std.macho; +const mem = std.mem; +const process = std.process; + +const Allocator = mem.Allocator; +const CrossTarget = std.zig.CrossTarget; +const MachO = @import("../MachO.zig"); +const Zld = @import("../Zld.zig"); + +pub const SearchStrategy = enum { + paths_first, + dylibs_first, +}; + +const usage = + \\Usage: {s} [files...] + \\ + \\General Options: + \\ + \\-arch [name] + \\ Specifies which architecture the output file should be + \\ + \\-current_version [value] + \\ Specifies the current version number of the library + \\ + \\-compatibility_version [value] + \\ Specifies the compatibility version number of the library + \\ + \\-dead_strip + \\ Remove functions and data that are unreachable by the entry point or exported symbols + \\ + \\-dead_strip_dylibs + \\ Remove dylibs that were unreachable by the entry point or exported symbols + \\ + \\-dylib + \\ Create dynamic library + \\ + \\-dynamic + \\ Perform dynamic linking + \\ + \\-e [name] + \\ Specifies the entry point of main executable + \\ + \\-force_load [path] + \\ Loads all members of the specified static archive library + \\ + \\-framework [name] + \\ Link against framework + \\ + \\-F[path] + \\ Add search path for frameworks + \\ + \\-headerpad [value] + \\ Set minimum space for future expansion of the load commands in hexadecimal notation + \\ + \\-headerpad_max_install_names + \\ Set enough space as if all paths were MAXPATHLEN + \\ + \\-install_name + \\ Add dylib's install name + \\ + \\-l[name] + \\ Link against library + \\ + \\-L[path] + \\ Add search path for libraries + \\ + \\-needed_framework [name] + \\ Link against framework (even if unused) + \\ + \\-needed-l[name] + \\ Alias of -needed_library + \\ + \\-needed_library [name] + \\ Link against library (even if unused) + \\ + \\-rpath [path] + \\ Specify runtime path + \\ + \\-pagezero_size [value] + \\ Size of the __PAGEZERO segment in hexademical notation + \\ + \\-platform_version [platform] [min_version] [sdk_version] + \\ Sets the platform, oldest supported version of that platform and the SDK it was built against + \\ + \\-S + \\ Do not put debug information (STABS or DWARF) in the output file + \\ + \\-search_paths_first + \\ Search each dir in library search paths for `libx.dylib` then `libx.a` + \\ + \\-search_dylibs_first + \\ Search `libx.dylib` in each dir in library search paths, then `libx.a` + \\ + \\-stack_size [value] + \\ Size of the default stack in hexadecimal notation + \\ + \\-syslibroot [path] + \\ Specify the syslibroot + \\ + \\-undefined [value] + \\ Specifies how undefined symbols are to be treated: error (default), warning, suppress, or dynamic_lookup. + \\ + \\-weak_framework [name] + \\ Link against framework and mark it and all referenced symbols as weak + \\ + \\-weak-l[name] + \\ Alias of -weak_library + \\ + \\-weak_library [name] + \\ Link against library and mark it and all referenced symbols as weak + \\ + \\--entitlements + \\ (Linker extension) add path to entitlements file for embedding in code signature + \\ + \\-o [path] + \\ Specify output path for the final artifact + \\ + \\-h, --help + \\ Print this help and exit + \\ + \\--debug-log [scope] + \\ Turn on debugging logs for [scope] (requires zld compiled with -Dlog) +; + +emit: Zld.Emit, +output_mode: Zld.OutputMode, +target: CrossTarget, +platform_version: std.builtin.Version, +sdk_version: std.builtin.Version, +positionals: []const Zld.LinkObject, +libs: std.StringArrayHashMap(Zld.SystemLib), +frameworks: std.StringArrayHashMap(Zld.SystemLib), +lib_dirs: []const []const u8, +framework_dirs: []const []const u8, +rpath_list: []const []const u8, +dynamic: bool = false, +syslibroot: ?[]const u8 = null, +stack_size: ?u64 = null, +strip: bool = false, +entry: ?[]const u8 = null, +current_version: ?std.builtin.Version = null, +compatibility_version: ?std.builtin.Version = null, +install_name: ?[]const u8 = null, +entitlements: ?[]const u8 = null, +pagezero_size: ?u64 = null, +search_strategy: ?SearchStrategy = null, +headerpad: ?u32 = null, +headerpad_max_install_names: bool = false, +dead_strip: bool = false, +dead_strip_dylibs: bool = false, +allow_undef: bool = false, + +pub fn parseArgs(arena: Allocator, ctx: Zld.MainCtx) !Options { + if (ctx.args.len == 0) { + ctx.printSuccess(usage, .{ctx.cmd}); + } + + var positionals = std.ArrayList(Zld.LinkObject).init(arena); + var libs = std.StringArrayHashMap(Zld.SystemLib).init(arena); + var lib_dirs = std.ArrayList([]const u8).init(arena); + var frameworks = std.StringArrayHashMap(Zld.SystemLib).init(arena); + var framework_dirs = std.ArrayList([]const u8).init(arena); + var rpath_list = std.ArrayList([]const u8).init(arena); + var out_path: ?[]const u8 = null; + var syslibroot: ?[]const u8 = null; + var stack_size: ?u64 = null; + var dynamic: bool = false; + var dylib: bool = false; + var install_name: ?[]const u8 = null; + var current_version: ?std.builtin.Version = null; + var compatibility_version: ?std.builtin.Version = null; + var headerpad: ?u32 = null; + var headerpad_max_install_names: bool = false; + var pagezero_size: ?u64 = null; + var dead_strip: bool = false; + var dead_strip_dylibs: bool = false; + var entry: ?[]const u8 = null; + var strip: bool = false; + var allow_undef: bool = false; + var search_strategy: ?SearchStrategy = null; + + var target: ?CrossTarget = if (comptime builtin.target.isDarwin()) + CrossTarget.fromTarget(builtin.target) + else + null; + var platform_version: ?std.builtin.Version = if (comptime builtin.target.isDarwin()) + builtin.target.os.version_range.semver.min + else + null; + var sdk_version: ?std.builtin.Version = if (comptime builtin.target.isDarwin()) + builtin.target.os.version_range.semver.min + else + null; + + const Iterator = struct { + args: []const []const u8, + i: usize = 0, + fn next(it: *@This()) ?[]const u8 { + if (it.i >= it.args.len) { + return null; + } + defer it.i += 1; + return it.args[it.i]; + } + }; + var args_iter = Iterator{ .args = ctx.args }; + + while (args_iter.next()) |arg| { + if (mem.eql(u8, arg, "--help") or mem.eql(u8, arg, "-h")) { + ctx.printSuccess(usage, .{ctx.cmd}); + } else if (mem.eql(u8, arg, "--debug-log")) { + const scope = args_iter.next() orelse ctx.printFailure("Expected log scope after {s}", .{arg}); + try ctx.log_scopes.append(scope); + } else if (mem.eql(u8, arg, "-syslibroot")) { + syslibroot = args_iter.next() orelse ctx.printFailure("Expected path after {s}", .{arg}); + } else if (mem.eql(u8, arg, "-search_paths_first")) { + search_strategy = .paths_first; + } else if (mem.eql(u8, arg, "-search_dylib_first")) { + search_strategy = .dylibs_first; + } else if (mem.eql(u8, arg, "-framework") or mem.eql(u8, arg, "-weak_framework")) { + const name = args_iter.next() orelse ctx.printFailure("Expected framework name after {s}", .{arg}); + try frameworks.put(name, .{}); + } else if (mem.startsWith(u8, arg, "-F")) { + try framework_dirs.append(arg[2..]); + } else if (mem.startsWith(u8, arg, "-needed-l")) { + try libs.put(arg["-needed-l".len..], .{ .needed = true }); + } else if (mem.eql(u8, arg, "-needed_library")) { + const name = args_iter.next() orelse ctx.printFailure("Expected library name after {s}", .{arg}); + try libs.put(name, .{ .needed = true }); + } else if (mem.eql(u8, arg, "-needed_framework")) { + const name = args_iter.next() orelse ctx.printFailure("Expected framework name after {s}", .{arg}); + try frameworks.put(name, .{ .needed = true }); + } else if (mem.startsWith(u8, arg, "-weak-l")) { + try libs.put(arg["-weak-l".len..], .{ .weak = true }); + } else if (mem.eql(u8, arg, "-weak_library")) { + const name = args_iter.next() orelse ctx.printFailure("Expected library name after {s}", .{arg}); + try libs.put(name, .{ .weak = true }); + } else if (mem.eql(u8, arg, "-weak_framework")) { + const name = args_iter.next() orelse ctx.printFailure("Expected framework name after {s}", .{arg}); + try frameworks.put(name, .{ .weak = true }); + } else if (mem.eql(u8, arg, "-o")) { + out_path = args_iter.next() orelse ctx.printFailure("Expected output path after {s}", .{arg}); + } else if (mem.eql(u8, arg, "-stack_size")) { + const stack_s = args_iter.next() orelse + ctx.printFailure("Expected stack size value after {s}", .{arg}); + stack_size = std.fmt.parseUnsigned(u64, eatIntPrefix(stack_s, 16), 16) catch |err| { + ctx.printFailure("Unable to parse '{s}': {s}", .{ arg, @errorName(err) }); + }; + } else if (mem.eql(u8, arg, "-dylib")) { + dylib = true; + } else if (mem.eql(u8, arg, "-dynamic")) { + dynamic = true; + } else if (mem.eql(u8, arg, "-static")) { + dynamic = false; + } else if (mem.eql(u8, arg, "-rpath")) { + const rpath = args_iter.next() orelse ctx.printFailure("Expected path after {s}", .{arg}); + try rpath_list.append(rpath); + } else if (mem.eql(u8, arg, "-compatibility_version")) { + const raw = args_iter.next() orelse ctx.printFailure("Expected version after {s}", .{arg}); + compatibility_version = std.builtin.Version.parse(raw) catch |err| { + ctx.printFailure("Unable to parse {s} {s}: {s}", .{ arg, raw, @errorName(err) }); + }; + } else if (mem.eql(u8, arg, "-current_version")) { + const raw = args_iter.next() orelse ctx.printFailure("Expected version after {s}", .{arg}); + current_version = std.builtin.Version.parse(raw) catch |err| { + ctx.printFailure("Unable to parse {s} {s}: {s}", .{ arg, raw, @errorName(err) }); + }; + } else if (mem.eql(u8, arg, "-install_name")) { + install_name = args_iter.next() orelse ctx.printFailure("Expected argument after {s}", .{arg}); + } else if (mem.eql(u8, arg, "-headerpad")) { + const headerpad_s = args_iter.next() orelse + ctx.printFailure("Expected headerpad size value after {s}", .{arg}); + headerpad = std.fmt.parseUnsigned(u32, eatIntPrefix(headerpad_s, 16), 16) catch |err| { + ctx.printFailure("Unable to parse '{s}': {s}", .{ arg, @errorName(err) }); + }; + } else if (mem.eql(u8, arg, "-headerpad_max_install_names")) { + headerpad_max_install_names = true; + } else if (mem.eql(u8, arg, "-pagezero_size")) { + const pagezero_s = args_iter.next() orelse + ctx.printFailure("Expected pagezero size value after {s}", .{arg}); + pagezero_size = std.fmt.parseUnsigned(u64, eatIntPrefix(pagezero_s, 16), 16) catch |err| { + ctx.printFailure("Unable to parse '{s}': {s}", .{ arg, @errorName(err) }); + }; + } else if (mem.eql(u8, arg, "-dead_strip")) { + dead_strip = true; + } else if (mem.eql(u8, arg, "-dead_strip_dylibs")) { + dead_strip_dylibs = true; + } else if (mem.eql(u8, arg, "-e")) { + entry = args_iter.next() orelse ctx.printFailure("Expected symbol name after {s}", .{arg}); + } else if (mem.eql(u8, arg, "-S")) { + strip = true; + } else if (mem.eql(u8, arg, "-force_load")) { + const path = args_iter.next() orelse ctx.printFailure("Expected path after {s}", .{arg}); + try positionals.append(.{ + .path = path, + .must_link = true, + }); + } else if (mem.eql(u8, arg, "-arch")) { + const arch_s = args_iter.next() orelse + ctx.printFailure("Expected architecture name after {s}", .{arg}); + if (mem.eql(u8, arch_s, "arm64")) { + target.?.cpu_arch = .aarch64; + } else if (mem.eql(u8, arch_s, "x86_64")) { + target.?.cpu_arch = .x86_64; + } else { + ctx.printFailure("Failed to parse CPU architecture from '{s}'", .{arch_s}); + } + } else if (mem.eql(u8, arg, "-platform_version")) { + const platform = args_iter.next() orelse + ctx.printFailure("Expected platform name after {s}", .{arg}); + const min_v = args_iter.next() orelse + ctx.printFailure("Expected minimum platform version after {s} {s}", .{ arg, platform }); + const sdk_v = args_iter.next() orelse + ctx.printFailure("Expected SDK version after {s} {s} {s}", .{ arg, platform, min_v }); + + var tmp_target = CrossTarget{}; + + // First, try parsing platform as a numeric value. + if (std.fmt.parseUnsigned(u32, platform, 10)) |ord| { + switch (@as(macho.PLATFORM, @enumFromInt(ord))) { + .MACOS => tmp_target = .{ + .os_tag = .macos, + .abi = .none, + }, + .IOS => tmp_target = .{ + .os_tag = .ios, + .abi = .none, + }, + .TVOS => tmp_target = .{ + .os_tag = .tvos, + .abi = .none, + }, + .WATCHOS => tmp_target = .{ + .os_tag = .watchos, + .abi = .none, + }, + .IOSSIMULATOR => tmp_target = .{ + .os_tag = .ios, + .abi = .simulator, + }, + .TVOSSIMULATOR => tmp_target = .{ + .os_tag = .tvos, + .abi = .simulator, + }, + .WATCHOSSIMULATOR => tmp_target = .{ + .os_tag = .watchos, + .abi = .simulator, + }, + else => |x| ctx.printFailure("Unsupported Apple OS: {s}", .{@tagName(x)}), + } + } else |_| { + if (mem.eql(u8, platform, "macos")) { + tmp_target = .{ + .os_tag = .macos, + .abi = .none, + }; + } else if (mem.eql(u8, platform, "ios")) { + tmp_target = .{ + .os_tag = .ios, + .abi = .none, + }; + } else if (mem.eql(u8, platform, "tvos")) { + tmp_target = .{ + .os_tag = .tvos, + .abi = .none, + }; + } else if (mem.eql(u8, platform, "watchos")) { + tmp_target = .{ + .os_tag = .watchos, + .abi = .none, + }; + } else if (mem.eql(u8, platform, "ios-simulator")) { + tmp_target = .{ + .os_tag = .ios, + .abi = .simulator, + }; + } else if (mem.eql(u8, platform, "tvos-simulator")) { + tmp_target = .{ + .os_tag = .tvos, + .abi = .simulator, + }; + } else if (mem.eql(u8, platform, "watchos-simulator")) { + tmp_target = .{ + .os_tag = .watchos, + .abi = .simulator, + }; + } else { + ctx.printFailure("Unsupported Apple OS: {s}", .{platform}); + } + } + + if (target) |*tt| { + tt.os_tag = tmp_target.os_tag; + tt.abi = tmp_target.abi; + } + + platform_version = std.builtin.Version.parse(min_v) catch |err| { + ctx.printFailure("Failed to parse min_version '{s}': {s}", .{ min_v, @errorName(err) }); + }; + sdk_version = std.builtin.Version.parse(sdk_v) catch |err| { + ctx.printFailure("Failed to parse sdk_version '{s}': {s}", .{ sdk_v, @errorName(err) }); + }; + } else if (mem.eql(u8, arg, "-undefined")) { + const treatment = args_iter.next() orelse ctx.printFailure("Expected value after {s}", .{arg}); + if (mem.eql(u8, treatment, "error")) { + allow_undef = false; + } else if (mem.eql(u8, treatment, "warning") or mem.eql(u8, treatment, "suppress")) { + ctx.printFailure("TODO unimplemented -undefined {s} option", .{treatment}); + } else if (mem.eql(u8, treatment, "dynamic_lookup")) { + allow_undef = true; + } else { + ctx.printFailure("Unknown option -undefined {s}", .{treatment}); + } + } else if (mem.eql(u8, arg, "-lto_library")) { + const lto_lib = args_iter.next() orelse + ctx.printFailure("Expected path after {s}", .{arg}); + ctx.printFailure("TODO unimplemented -lto_library {s} option", .{lto_lib}); + } else if (mem.eql(u8, arg, "-demangle")) { + ctx.printFailure("TODO unimplemented -demangle option", .{}); + } else if (mem.startsWith(u8, arg, "-l")) { + try libs.put(arg[2..], .{}); + } else if (mem.startsWith(u8, arg, "-L")) { + try lib_dirs.append(arg[2..]); + } else { + try positionals.append(.{ + .path = arg, + .must_link = false, + }); + } + } + + if (positionals.items.len == 0) { + ctx.printFailure("Expected at least one input .o file", .{}); + } + if (target == null or target.?.cpu_arch == null) { + ctx.printFailure("Missing -arch when cross-linking", .{}); + } + if (target.?.os_tag == null) { + ctx.printFailure("Missing -platform_version when cross-linking", .{}); + } + + // Add some defaults + try lib_dirs.append("/usr/lib"); + try framework_dirs.append("/System/Library/Frameworks"); + + return Options{ + .emit = .{ + .directory = std.fs.cwd(), + .sub_path = out_path orelse "a.out", + }, + .dynamic = dynamic, + .target = target.?, + .platform_version = platform_version.?, + .sdk_version = sdk_version.?, + .output_mode = if (dylib) .lib else .exe, + .syslibroot = syslibroot, + .positionals = positionals.items, + .libs = libs, + .frameworks = frameworks, + .lib_dirs = lib_dirs.items, + .framework_dirs = framework_dirs.items, + .rpath_list = rpath_list.items, + .stack_size = stack_size, + .install_name = install_name, + .current_version = current_version, + .compatibility_version = compatibility_version, + .dead_strip = dead_strip, + .dead_strip_dylibs = dead_strip_dylibs, + .headerpad = headerpad, + .headerpad_max_install_names = headerpad_max_install_names, + .pagezero_size = pagezero_size, + .entry = entry, + .strip = strip, + .allow_undef = allow_undef, + .search_strategy = search_strategy, + }; +} + +fn eatIntPrefix(arg: []const u8, radix: u8) []const u8 { + if (arg.len > 2 and arg[0] == '0') { + switch (std.ascii.toLower(arg[1])) { + 'b' => if (radix == 2) return arg[2..], + 'o' => if (radix == 8) return arg[2..], + 'x' => if (radix == 16) return arg[2..], + else => {}, + } + } + return arg; +} diff --git a/src/archive/archive/zld/MachO/Trie.zig b/src/archive/archive/zld/MachO/Trie.zig new file mode 100644 index 000000000000..dcbb0306ce5c --- /dev/null +++ b/src/archive/archive/zld/MachO/Trie.zig @@ -0,0 +1,612 @@ +//! Represents export trie used in MachO executables and dynamic libraries. +//! The purpose of an export trie is to encode as compactly as possible all +//! export symbols for the loader `dyld`. +//! The export trie encodes offset and other information using ULEB128 +//! encoding, and is part of the __LINKEDIT segment. +//! +//! Description from loader.h: +//! +//! The symbols exported by a dylib are encoded in a trie. This is a compact +//! representation that factors out common prefixes. It also reduces LINKEDIT pages +//! in RAM because it encodes all information (name, address, flags) in one small, +//! contiguous range. The export area is a stream of nodes. The first node sequentially +//! is the start node for the trie. +//! +//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol +//! information for the string so far. If there is no exported symbol, the node starts +//! with a zero byte. If there is exported info, it follows the length. +//! +//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded +//! offset which is location of the content named by the symbol from the mach_header +//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags +//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string +//! is zero length, then the symbol is re-export from the specified dylib with the same name. +//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two +//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. +//! The resolver is used by lazy pointers and must be called to get the actual address to use. +//! +//! After the optional exported symbol information is a byte of how many edges (0-255) that +//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of +//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. +const Trie = @This(); + +const std = @import("std"); +const mem = std.mem; +const leb = std.leb; +const log = std.log.scoped(.macho); +const macho = std.macho; +const testing = std.testing; +const assert = std.debug.assert; +const Allocator = mem.Allocator; + +pub const Node = struct { + base: *Trie, + + /// Terminal info associated with this node. + /// If this node is not a terminal node, info is null. + terminal_info: ?struct { + /// Export flags associated with this exported symbol. + export_flags: u64, + /// VM address offset wrt to the section this symbol is defined against. + vmaddr_offset: u64, + } = null, + + /// Offset of this node in the trie output byte stream. + trie_offset: ?u64 = null, + + /// List of all edges originating from this node. + edges: std.ArrayListUnmanaged(Edge) = .{}, + + node_dirty: bool = true, + + /// Edge connecting to nodes in the trie. + pub const Edge = struct { + from: *Node, + to: *Node, + label: []u8, + + fn deinit(self: *Edge, allocator: Allocator) void { + self.to.deinit(allocator); + allocator.destroy(self.to); + allocator.free(self.label); + self.from = undefined; + self.to = undefined; + self.label = undefined; + } + }; + + fn deinit(self: *Node, allocator: Allocator) void { + for (self.edges.items) |*edge| { + edge.deinit(allocator); + } + self.edges.deinit(allocator); + } + + /// Inserts a new node starting from `self`. + fn put(self: *Node, allocator: Allocator, label: []const u8) !*Node { + // Check for match with edges from this node. + for (self.edges.items) |*edge| { + const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; + if (match == 0) continue; + if (match == edge.label.len) return edge.to.put(allocator, label[match..]); + + // Found a match, need to splice up nodes. + // From: A -> B + // To: A -> C -> B + const mid = try allocator.create(Node); + mid.* = .{ .base = self.base }; + var to_label = try allocator.dupe(u8, edge.label[match..]); + allocator.free(edge.label); + const to_node = edge.to; + edge.to = mid; + edge.label = try allocator.dupe(u8, label[0..match]); + self.base.node_count += 1; + + try mid.edges.append(allocator, .{ + .from = mid, + .to = to_node, + .label = to_label, + }); + + return if (match == label.len) mid else mid.put(allocator, label[match..]); + } + + // Add a new node. + const node = try allocator.create(Node); + node.* = .{ .base = self.base }; + self.base.node_count += 1; + + try self.edges.append(allocator, .{ + .from = self, + .to = node, + .label = try allocator.dupe(u8, label), + }); + + return node; + } + + /// Recursively parses the node from the input byte stream. + fn read(self: *Node, allocator: Allocator, reader: anytype) Trie.ReadError!usize { + self.node_dirty = true; + const trie_offset = try reader.context.getPos(); + self.trie_offset = trie_offset; + + var nread: usize = 0; + + const node_size = try leb.readULEB128(u64, reader); + if (node_size > 0) { + const export_flags = try leb.readULEB128(u64, reader); + // TODO Parse special flags. + assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and + export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + + const vmaddr_offset = try leb.readULEB128(u64, reader); + + self.terminal_info = .{ + .export_flags = export_flags, + .vmaddr_offset = vmaddr_offset, + }; + } + + const nedges = try reader.readByte(); + self.base.node_count += nedges; + + nread += (try reader.context.getPos()) - trie_offset; + + var i: usize = 0; + while (i < nedges) : (i += 1) { + const edge_start_pos = try reader.context.getPos(); + + const label = blk: { + var label_buf = std.ArrayList(u8).init(allocator); + while (true) { + const next = try reader.readByte(); + if (next == @as(u8, 0)) + break; + try label_buf.append(next); + } + break :blk try label_buf.toOwnedSlice(); + }; + + const seek_to = try leb.readULEB128(u64, reader); + const return_pos = try reader.context.getPos(); + + nread += return_pos - edge_start_pos; + try reader.context.seekTo(seek_to); + + const node = try allocator.create(Node); + node.* = .{ .base = self.base }; + + nread += try node.read(allocator, reader); + try self.edges.append(allocator, .{ + .from = self, + .to = node, + .label = label, + }); + try reader.context.seekTo(return_pos); + } + + return nread; + } + + /// Writes this node to a byte stream. + /// The children of this node *are* not written to the byte stream + /// recursively. To write all nodes to a byte stream in sequence, + /// iterate over `Trie.ordered_nodes` and call this method on each node. + /// This is one of the requirements of the MachO. + /// Panics if `finalize` was not called before calling this method. + fn write(self: Node, writer: anytype) !void { + assert(!self.node_dirty); + if (self.terminal_info) |info| { + // Terminal node info: encode export flags and vmaddr offset of this symbol. + var info_buf: [@sizeOf(u64) * 2]u8 = undefined; + var info_stream = std.io.fixedBufferStream(&info_buf); + // TODO Implement for special flags. + assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and + info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + try leb.writeULEB128(info_stream.writer(), info.export_flags); + try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset); + + // Encode the size of the terminal node info. + var size_buf: [@sizeOf(u64)]u8 = undefined; + var size_stream = std.io.fixedBufferStream(&size_buf); + try leb.writeULEB128(size_stream.writer(), info_stream.pos); + + // Now, write them to the output stream. + try writer.writeAll(size_buf[0..size_stream.pos]); + try writer.writeAll(info_buf[0..info_stream.pos]); + } else { + // Non-terminal node is delimited by 0 byte. + try writer.writeByte(0); + } + // Write number of edges (max legal number of edges is 256). + try writer.writeByte(@as(u8, @intCast(self.edges.items.len))); + + for (self.edges.items) |edge| { + // Write edge label and offset to next node in trie. + try writer.writeAll(edge.label); + try writer.writeByte(0); + try leb.writeULEB128(writer, edge.to.trie_offset.?); + } + } + + const FinalizeResult = struct { + /// Current size of this node in bytes. + node_size: u64, + + /// True if the trie offset of this node in the output byte stream + /// would need updating; false otherwise. + updated: bool, + }; + + /// Updates offset of this node in the output byte stream. + fn finalize(self: *Node, offset_in_trie: u64) !FinalizeResult { + var stream = std.io.countingWriter(std.io.null_writer); + var writer = stream.writer(); + + var node_size: u64 = 0; + if (self.terminal_info) |info| { + try leb.writeULEB128(writer, info.export_flags); + try leb.writeULEB128(writer, info.vmaddr_offset); + try leb.writeULEB128(writer, stream.bytes_written); + } else { + node_size += 1; // 0x0 for non-terminal nodes + } + node_size += 1; // 1 byte for edge count + + for (self.edges.items) |edge| { + const next_node_offset = edge.to.trie_offset orelse 0; + node_size += edge.label.len + 1; + try leb.writeULEB128(writer, next_node_offset); + } + + const trie_offset = self.trie_offset orelse 0; + const updated = offset_in_trie != trie_offset; + self.trie_offset = offset_in_trie; + self.node_dirty = false; + node_size += stream.bytes_written; + + return FinalizeResult{ .node_size = node_size, .updated = updated }; + } +}; + +/// The root node of the trie. +root: ?*Node = null, + +/// If you want to access nodes ordered in DFS fashion, +/// you should call `finalize` first since the nodes +/// in this container are not guaranteed to not be stale +/// if more insertions took place after the last `finalize` +/// call. +ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}, + +/// The size of the trie in bytes. +/// This value may be outdated if there were additional +/// insertions performed after `finalize` was called. +/// Call `finalize` before accessing this value to ensure +/// it is up-to-date. +size: u64 = 0, + +/// Number of nodes currently in the trie. +node_count: usize = 0, + +trie_dirty: bool = true, + +/// Export symbol that is to be placed in the trie. +pub const ExportSymbol = struct { + /// Name of the symbol. + name: []const u8, + + /// Offset of this symbol's virtual memory address from the beginning + /// of the __TEXT segment. + vmaddr_offset: u64, + + /// Export flags of this exported symbol. + export_flags: u64, +}; + +/// Insert a symbol into the trie, updating the prefixes in the process. +/// This operation may change the layout of the trie by splicing edges in +/// certain circumstances. +pub fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { + try self.createRoot(allocator); + const node = try self.root.?.put(allocator, symbol.name); + node.terminal_info = .{ + .vmaddr_offset = symbol.vmaddr_offset, + .export_flags = symbol.export_flags, + }; + self.trie_dirty = true; +} + +/// Finalizes this trie for writing to a byte stream. +/// This step performs multiple passes through the trie ensuring +/// there are no gaps after every `Node` is ULEB128 encoded. +/// Call this method before trying to `write` the trie to a byte stream. +pub fn finalize(self: *Trie, allocator: Allocator) !void { + if (!self.trie_dirty) return; + + self.ordered_nodes.shrinkRetainingCapacity(0); + try self.ordered_nodes.ensureTotalCapacity(allocator, self.node_count); + + var fifo = std.fifo.LinearFifo(*Node, .Dynamic).init(allocator); + defer fifo.deinit(); + + try fifo.writeItem(self.root.?); + + while (fifo.readItem()) |next| { + for (next.edges.items) |*edge| { + try fifo.writeItem(edge.to); + } + self.ordered_nodes.appendAssumeCapacity(next); + } + + var more: bool = true; + while (more) { + self.size = 0; + more = false; + for (self.ordered_nodes.items) |node| { + const res = try node.finalize(self.size); + self.size += res.node_size; + if (res.updated) more = true; + } + } + + self.trie_dirty = false; +} + +const ReadError = error{ + OutOfMemory, + EndOfStream, + Overflow, +}; + +/// Parse the trie from a byte stream. +pub fn read(self: *Trie, allocator: Allocator, reader: anytype) ReadError!usize { + try self.createRoot(allocator); + return self.root.?.read(allocator, reader); +} + +/// Write the trie to a byte stream. +/// Panics if the trie was not finalized using `finalize` before calling this method. +pub fn write(self: Trie, writer: anytype) !u64 { + assert(!self.trie_dirty); + var counting_writer = std.io.countingWriter(writer); + for (self.ordered_nodes.items) |node| { + try node.write(counting_writer.writer()); + } + return counting_writer.bytes_written; +} + +pub fn deinit(self: *Trie, allocator: Allocator) void { + if (self.root) |root| { + root.deinit(allocator); + allocator.destroy(root); + } + self.ordered_nodes.deinit(allocator); +} + +fn createRoot(self: *Trie, allocator: Allocator) !void { + if (self.root == null) { + const root = try allocator.create(Node); + root.* = .{ .base = self }; + self.root = root; + self.node_count += 1; + } +} + +test "Trie node count" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + try testing.expectEqual(trie.node_count, 0); + try testing.expect(trie.root == null); + + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(trie.node_count, 2); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(trie.node_count, 2); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + try testing.expectEqual(trie.node_count, 4); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + try testing.expectEqual(trie.node_count, 4); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(trie.node_count, 4); +} + +test "Trie basic" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + // root --- _st ---> node + try trie.put(gpa, .{ + .name = "_st", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + try testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st")); + + { + // root --- _st ---> node --- art ---> node + try trie.put(gpa, .{ + .name = "_start", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + + const nextEdge = &trie.root.?.edges.items[0]; + try testing.expect(mem.eql(u8, nextEdge.label, "_st")); + try testing.expect(nextEdge.to.edges.items.len == 1); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); + } + { + // root --- _ ---> node --- st ---> node --- art ---> node + // | + // | --- main ---> node + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + + const nextEdge = &trie.root.?.edges.items[0]; + try testing.expect(mem.eql(u8, nextEdge.label, "_")); + try testing.expect(nextEdge.to.edges.items.len == 2); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); + + const nextNextEdge = &nextEdge.to.edges.items[0]; + try testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); + } +} + +fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { + assert(expected.len > 0); + if (mem.eql(u8, expected, given)) return; + const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(expected)}); + defer testing.allocator.free(expected_fmt); + const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(given)}); + defer testing.allocator.free(given_fmt); + const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?; + var padding = try testing.allocator.alloc(u8, idx + 5); + defer testing.allocator.free(padding); + mem.set(u8, padding, ' '); + std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding }); + return error.TestFailed; +} + +test "write Trie to a byte stream" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + + try trie.finalize(gpa); + try trie.finalize(gpa); // Finalizing mulitple times is a nop subsequently unless we add new nodes. + + const exp_buffer = [_]u8{ + 0x0, 0x1, // node root + 0x5f, 0x0, 0x5, // edge '_' + 0x0, 0x2, // non-terminal node + 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' + 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' + 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' + 0x2, 0x0, 0x0, 0x0, // terminal node + 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node + }; + + var buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + { + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); + } + { + // Writing finalized trie again should yield the same result. + try stream.seekTo(0); + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); + } +} + +test "parse Trie from byte stream" { + var gpa = testing.allocator; + + const in_buffer = [_]u8{ + 0x0, 0x1, // node root + 0x5f, 0x0, 0x5, // edge '_' + 0x0, 0x2, // non-terminal node + 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' + 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' + 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' + 0x2, 0x0, 0x0, 0x0, // terminal node + 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node + }; + + var in_stream = std.io.fixedBufferStream(&in_buffer); + var trie: Trie = .{}; + defer trie.deinit(gpa); + const nread = try trie.read(gpa, in_stream.reader()); + + try testing.expect(nread == in_buffer.len); + + try trie.finalize(gpa); + + var out_buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(out_buffer); + var out_stream = std.io.fixedBufferStream(out_buffer); + _ = try trie.write(out_stream.writer()); + try expectEqualHexStrings(&in_buffer, out_buffer); +} + +test "ordering bug" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + try trie.put(gpa, .{ + .name = "_asStr", + .vmaddr_offset = 0x558, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_a", + .vmaddr_offset = 0x8008, + .export_flags = 0, + }); + + try trie.finalize(gpa); + + const exp_buffer = [_]u8{ + 0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00, + 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, + 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, + }; + + var buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + // Writing finalized trie again should yield the same result. + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); +} diff --git a/src/archive/archive/zld/MachO/UnwindInfo.zig b/src/archive/archive/zld/MachO/UnwindInfo.zig new file mode 100644 index 000000000000..ffbf16d5611a --- /dev/null +++ b/src/archive/archive/zld/MachO/UnwindInfo.zig @@ -0,0 +1,835 @@ +const UnwindInfo = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const eh_frame = @import("eh_frame.zig"); +const fs = std.fs; +const leb = std.leb; +const log = std.log.scoped(.unwind_info); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const trace = @import("../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const AtomIndex = MachO.AtomIndex; +const EhFrameRecord = eh_frame.EhFrameRecord; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); + +gpa: Allocator, + +/// List of all unwind records gathered from all objects and sorted +/// by source function address. +records: std.ArrayListUnmanaged(macho.compact_unwind_entry) = .{}, +records_lookup: std.AutoHashMapUnmanaged(AtomIndex, RecordIndex) = .{}, + +/// List of all personalities referenced by either unwind info entries +/// or __eh_frame entries. +personalities: [max_personalities]MachO.SymbolWithLoc = undefined, +personalities_count: u2 = 0, + +/// List of common encodings sorted in descending order with the most common first. +common_encodings: [max_common_encodings]macho.compact_unwind_encoding_t = undefined, +common_encodings_count: u7 = 0, + +/// List of record indexes containing an LSDA pointer. +lsdas: std.ArrayListUnmanaged(RecordIndex) = .{}, +lsdas_lookup: std.AutoHashMapUnmanaged(RecordIndex, u32) = .{}, + +/// List of second level pages. +pages: std.ArrayListUnmanaged(Page) = .{}, + +const RecordIndex = u32; + +const max_personalities = 3; +const max_common_encodings = 127; +const max_compact_encodings = 256; + +const second_level_page_bytes = 0x1000; +const second_level_page_words = second_level_page_bytes / @sizeOf(u32); + +const max_regular_second_level_entries = + (second_level_page_bytes - @sizeOf(macho.unwind_info_regular_second_level_page_header)) / + @sizeOf(macho.unwind_info_regular_second_level_entry); + +const max_compressed_second_level_entries = + (second_level_page_bytes - @sizeOf(macho.unwind_info_compressed_second_level_page_header)) / + @sizeOf(u32); + +const compressed_entry_func_offset_mask = ~@as(u24, 0); + +const Page = struct { + kind: enum { regular, compressed }, + start: RecordIndex, + count: u16, + page_encodings: [max_compact_encodings]RecordIndex = undefined, + page_encodings_count: u9 = 0, + + fn appendPageEncoding(page: *Page, record_id: RecordIndex) void { + assert(page.page_encodings_count <= max_compact_encodings); + page.page_encodings[page.page_encodings_count] = record_id; + page.page_encodings_count += 1; + } + + fn getPageEncoding( + page: *const Page, + info: *const UnwindInfo, + enc: macho.compact_unwind_encoding_t, + ) ?u8 { + comptime var index: u9 = 0; + inline while (index < max_compact_encodings) : (index += 1) { + if (index >= page.page_encodings_count) return null; + const record_id = page.page_encodings[index]; + const record = info.records.items[record_id]; + if (record.compactUnwindEncoding == enc) { + return @as(u8, @intCast(index)); + } + } + return null; + } + + fn format( + page: *const Page, + comptime unused_format_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = page; + _ = unused_format_string; + _ = options; + _ = writer; + @compileError("do not format Page directly; use page.fmtDebug()"); + } + + const DumpCtx = struct { + page: *const Page, + info: *const UnwindInfo, + }; + + fn dump( + ctx: DumpCtx, + comptime unused_format_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + _ = options; + comptime assert(unused_format_string.len == 0); + try writer.writeAll("Page:\n"); + try writer.print(" kind: {s}\n", .{@tagName(ctx.page.kind)}); + try writer.print(" entries: {d} - {d}\n", .{ + ctx.page.start, + ctx.page.start + ctx.page.count, + }); + try writer.print(" encodings (count = {d})\n", .{ctx.page.page_encodings_count}); + for (ctx.page.page_encodings[0..ctx.page.page_encodings_count], 0..) |record_id, i| { + const record = ctx.info.records.items[record_id]; + const enc = record.compactUnwindEncoding; + try writer.print(" {d}: 0x{x:0>8}\n", .{ ctx.info.common_encodings_count + i, enc }); + } + } + + fn fmtDebug(page: *const Page, info: *const UnwindInfo) std.fmt.Formatter(dump) { + return .{ .data = .{ + .page = page, + .info = info, + } }; + } + + fn write(page: *const Page, info: *const UnwindInfo, writer: anytype) !void { + switch (page.kind) { + .regular => { + try writer.writeStruct(macho.unwind_info_regular_second_level_page_header{ + .entryPageOffset = @sizeOf(macho.unwind_info_regular_second_level_page_header), + .entryCount = page.count, + }); + + for (info.records.items[page.start..][0..page.count]) |record| { + try writer.writeStruct(macho.unwind_info_regular_second_level_entry{ + .functionOffset = @as(u32, @intCast(record.rangeStart)), + .encoding = record.compactUnwindEncoding, + }); + } + }, + .compressed => { + const entry_offset = @sizeOf(macho.unwind_info_compressed_second_level_page_header) + + @as(u16, @intCast(page.page_encodings_count)) * @sizeOf(u32); + try writer.writeStruct(macho.unwind_info_compressed_second_level_page_header{ + .entryPageOffset = entry_offset, + .entryCount = page.count, + .encodingsPageOffset = @sizeOf( + macho.unwind_info_compressed_second_level_page_header, + ), + .encodingsCount = page.page_encodings_count, + }); + + for (page.page_encodings[0..page.page_encodings_count]) |record_id| { + const enc = info.records.items[record_id].compactUnwindEncoding; + try writer.writeIntLittle(u32, enc); + } + + assert(page.count > 0); + const first_entry = info.records.items[page.start]; + for (info.records.items[page.start..][0..page.count]) |record| { + const enc_index = blk: { + if (info.getCommonEncoding(record.compactUnwindEncoding)) |id| { + break :blk id; + } + const ncommon = info.common_encodings_count; + break :blk ncommon + page.getPageEncoding(info, record.compactUnwindEncoding).?; + }; + const compressed = macho.UnwindInfoCompressedEntry{ + .funcOffset = @as(u24, @intCast(record.rangeStart - first_entry.rangeStart)), + .encodingIndex = @as(u8, @intCast(enc_index)), + }; + try writer.writeStruct(compressed); + } + }, + } + } +}; + +pub fn deinit(info: *UnwindInfo) void { + info.records.deinit(info.gpa); + info.records_lookup.deinit(info.gpa); + info.pages.deinit(info.gpa); + info.lsdas.deinit(info.gpa); + info.lsdas_lookup.deinit(info.gpa); +} + +pub fn scanRelocs(macho_file: *MachO) !void { + if (macho_file.getSectionByName("__TEXT", "__unwind_info") == null) return; + + const cpu_arch = macho_file.options.target.cpu_arch.?; + for (macho_file.objects.items, 0..) |*object, object_id| { + const unwind_records = object.getUnwindRecords(); + for (object.exec_atoms.items) |atom_index| { + const record_id = object.unwind_records_lookup.get(atom_index) orelse continue; + if (object.unwind_relocs_lookup[record_id].dead) continue; + const record = unwind_records[record_id]; + if (!UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { + if (getPersonalityFunctionReloc( + macho_file, + @as(u32, @intCast(object_id)), + record_id, + )) |rel| { + // Personality function; add GOT pointer. + const target = parseRelocTarget( + macho_file, + @as(u32, @intCast(object_id)), + rel, + mem.asBytes(&record), + @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), + ); + try Atom.addGotEntry(macho_file, target); + } + } + } + } +} + +pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { + if (macho_file.getSectionByName("__TEXT", "__unwind_info") == null) return; + + const cpu_arch = macho_file.options.target.cpu_arch.?; + + var records = std.ArrayList(macho.compact_unwind_entry).init(info.gpa); + defer records.deinit(); + + var atom_indexes = std.ArrayList(AtomIndex).init(info.gpa); + defer atom_indexes.deinit(); + + // TODO handle dead stripping + for (macho_file.objects.items, 0..) |*object, object_id| { + log.debug("collecting unwind records in {s} ({d})", .{ object.name, object_id }); + const unwind_records = object.getUnwindRecords(); + + // Contents of unwind records does not have to cover all symbol in executable section + // so we need insert them ourselves. + try records.ensureUnusedCapacity(object.exec_atoms.items.len); + try atom_indexes.ensureUnusedCapacity(object.exec_atoms.items.len); + + for (object.exec_atoms.items) |atom_index| { + var record = if (object.unwind_records_lookup.get(atom_index)) |record_id| blk: { + if (object.unwind_relocs_lookup[record_id].dead) continue; + var record = unwind_records[record_id]; + + if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { + try info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), atom_index, &record); + } else { + if (getPersonalityFunctionReloc( + macho_file, + @as(u32, @intCast(object_id)), + record_id, + )) |rel| { + const target = parseRelocTarget( + macho_file, + @as(u32, @intCast(object_id)), + rel, + mem.asBytes(&record), + @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), + ); + const personality_index = info.getPersonalityFunction(target) orelse inner: { + const personality_index = info.personalities_count; + info.personalities[personality_index] = target; + info.personalities_count += 1; + break :inner personality_index; + }; + + record.personalityFunction = personality_index + 1; + UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); + } + + if (getLsdaReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| { + const target = parseRelocTarget( + macho_file, + @as(u32, @intCast(object_id)), + rel, + mem.asBytes(&record), + @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), + ); + record.lsda = @as(u64, @bitCast(target)); + } + } + break :blk record; + } else blk: { + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + if (sym.n_desc == MachO.N_DEAD) continue; + + if (!object.hasUnwindRecords()) { + if (object.eh_frame_records_lookup.get(atom_index)) |fde_offset| { + if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; + var record = nullRecord(); + try info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), atom_index, &record); + switch (cpu_arch) { + .aarch64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_ARM64_MODE.DWARF), + .x86_64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_X86_64_MODE.DWARF), + else => unreachable, + } + break :blk record; + } + } + + break :blk nullRecord(); + }; + + const atom = macho_file.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + const sym = macho_file.getSymbol(sym_loc); + assert(sym.n_desc != MachO.N_DEAD); + record.rangeStart = sym.n_value; + record.rangeLength = @as(u32, @intCast(atom.size)); + + records.appendAssumeCapacity(record); + atom_indexes.appendAssumeCapacity(atom_index); + } + } + + // Fold records + try info.records.ensureTotalCapacity(info.gpa, records.items.len); + try info.records_lookup.ensureTotalCapacity(info.gpa, @as(u32, @intCast(atom_indexes.items.len))); + + var maybe_prev: ?macho.compact_unwind_entry = null; + for (records.items, 0..) |record, i| { + const record_id = blk: { + if (maybe_prev) |prev| { + const is_dwarf = UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); + if (is_dwarf or + (prev.compactUnwindEncoding != record.compactUnwindEncoding) or + (prev.personalityFunction != record.personalityFunction) or + record.lsda > 0) + { + const record_id = @as(RecordIndex, @intCast(info.records.items.len)); + info.records.appendAssumeCapacity(record); + maybe_prev = record; + break :blk record_id; + } else { + break :blk @as(RecordIndex, @intCast(info.records.items.len - 1)); + } + } else { + const record_id = @as(RecordIndex, @intCast(info.records.items.len)); + info.records.appendAssumeCapacity(record); + maybe_prev = record; + break :blk record_id; + } + }; + info.records_lookup.putAssumeCapacityNoClobber(atom_indexes.items[i], record_id); + } + + // Calculate common encodings + { + const CommonEncWithCount = struct { + enc: macho.compact_unwind_encoding_t, + count: u32, + + fn greaterThan(ctx: void, lhs: @This(), rhs: @This()) bool { + _ = ctx; + return lhs.count > rhs.count; + } + }; + + const Context = struct { + pub fn hash(ctx: @This(), key: macho.compact_unwind_encoding_t) u32 { + _ = ctx; + return key; + } + + pub fn eql( + ctx: @This(), + key1: macho.compact_unwind_encoding_t, + key2: macho.compact_unwind_encoding_t, + b_index: usize, + ) bool { + _ = ctx; + _ = b_index; + return key1 == key2; + } + }; + + var common_encodings_counts = std.ArrayHashMap( + macho.compact_unwind_encoding_t, + CommonEncWithCount, + Context, + false, + ).init(info.gpa); + defer common_encodings_counts.deinit(); + + for (info.records.items) |record| { + assert(!isNull(record)); + if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) continue; + const enc = record.compactUnwindEncoding; + const gop = try common_encodings_counts.getOrPut(enc); + if (!gop.found_existing) { + gop.value_ptr.* = .{ + .enc = enc, + .count = 0, + }; + } + gop.value_ptr.count += 1; + } + + var slice = common_encodings_counts.values(); + std.sort.sort(CommonEncWithCount, slice, {}, CommonEncWithCount.greaterThan); + + var i: u7 = 0; + while (i < slice.len) : (i += 1) { + if (i >= max_common_encodings) break; + if (slice[i].count < 2) continue; + info.appendCommonEncoding(slice[i].enc); + log.debug("adding common encoding: {d} => 0x{x:0>8}", .{ i, slice[i].enc }); + } + } + + // Compute page allocations + { + var i: u32 = 0; + while (i < info.records.items.len) { + const range_start_max: u64 = + info.records.items[i].rangeStart + compressed_entry_func_offset_mask; + var encoding_count: u9 = info.common_encodings_count; + var space_left: u32 = second_level_page_words - + @sizeOf(macho.unwind_info_compressed_second_level_page_header) / @sizeOf(u32); + var page = Page{ + .kind = undefined, + .start = i, + .count = 0, + }; + + while (space_left >= 1 and i < info.records.items.len) { + const record = info.records.items[i]; + const enc = record.compactUnwindEncoding; + const is_dwarf = UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); + + if (record.rangeStart >= range_start_max) { + break; + } else if (info.getCommonEncoding(enc) != null or + page.getPageEncoding(info, enc) != null and !is_dwarf) + { + i += 1; + space_left -= 1; + } else if (space_left >= 2 and encoding_count < max_compact_encodings) { + page.appendPageEncoding(i); + i += 1; + space_left -= 2; + encoding_count += 1; + } else { + break; + } + } + + page.count = @as(u16, @intCast(i - page.start)); + + if (i < info.records.items.len and page.count < max_regular_second_level_entries) { + page.kind = .regular; + page.count = @as(u16, @intCast(@min( + max_regular_second_level_entries, + info.records.items.len - page.start, + ))); + i = page.start + page.count; + } else { + page.kind = .compressed; + } + + log.debug("{}", .{page.fmtDebug(info)}); + + try info.pages.append(info.gpa, page); + } + } + + // Save indices of records requiring LSDA relocation + try info.lsdas_lookup.ensureTotalCapacity(info.gpa, @as(u32, @intCast(info.records.items.len))); + for (info.records.items, 0..) |rec, i| { + info.lsdas_lookup.putAssumeCapacityNoClobber(@as(RecordIndex, @intCast(i)), @as(u32, @intCast(info.lsdas.items.len))); + if (rec.lsda == 0) continue; + try info.lsdas.append(info.gpa, @as(RecordIndex, @intCast(i))); + } +} + +fn collectPersonalityFromDwarf( + info: *UnwindInfo, + macho_file: *MachO, + object_id: u32, + atom_index: u32, + record: *macho.compact_unwind_entry, +) !void { + const object = &macho_file.objects.items[object_id]; + var it = object.getEhFrameRecordsIterator(); + const fde_offset = object.eh_frame_records_lookup.get(atom_index).?; + it.seekTo(fde_offset); + const fde = (try it.next()).?; + const cie_ptr = fde.getCiePointer(); + const cie_offset = fde_offset + 4 - cie_ptr; + it.seekTo(cie_offset); + const cie = (try it.next()).?; + + if (cie.getPersonalityPointerReloc( + macho_file, + @as(u32, @intCast(object_id)), + cie_offset, + )) |target| { + const personality_index = info.getPersonalityFunction(target) orelse inner: { + const personality_index = info.personalities_count; + info.personalities[personality_index] = target; + info.personalities_count += 1; + break :inner personality_index; + }; + + record.personalityFunction = personality_index + 1; + UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); + } +} + +pub fn calcSectionSize(info: UnwindInfo, macho_file: *MachO) !void { + const sect_id = macho_file.getSectionByName("__TEXT", "__unwind_info") orelse return; + const sect = &macho_file.sections.items(.header)[sect_id]; + sect.@"align" = 2; + sect.size = info.calcRequiredSize(); +} + +fn calcRequiredSize(info: UnwindInfo) usize { + var total_size: usize = 0; + total_size += @sizeOf(macho.unwind_info_section_header); + total_size += + @as(usize, @intCast(info.common_encodings_count)) * @sizeOf(macho.compact_unwind_encoding_t); + total_size += @as(usize, @intCast(info.personalities_count)) * @sizeOf(u32); + total_size += (info.pages.items.len + 1) * @sizeOf(macho.unwind_info_section_header_index_entry); + total_size += info.lsdas.items.len * @sizeOf(macho.unwind_info_section_header_lsda_index_entry); + total_size += info.pages.items.len * second_level_page_bytes; + return total_size; +} + +pub fn write(info: *UnwindInfo, macho_file: *MachO) !void { + const sect_id = macho_file.getSectionByName("__TEXT", "__unwind_info") orelse return; + const sect = &macho_file.sections.items(.header)[sect_id]; + const seg_id = macho_file.sections.items(.segment_index)[sect_id]; + const seg = macho_file.segments.items[seg_id]; + + const text_sect_id = macho_file.getSectionByName("__TEXT", "__text").?; + const text_sect = macho_file.sections.items(.header)[text_sect_id]; + + var personalities: [max_personalities]u32 = undefined; + const cpu_arch = macho_file.options.target.cpu_arch.?; + + log.debug("Personalities:", .{}); + for (info.personalities[0..info.personalities_count], 0..) |target, i| { + const atom_index = macho_file.getGotAtomIndexForSymbol(target).?; + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + personalities[i] = @as(u32, @intCast(sym.n_value - seg.vmaddr)); + log.debug(" {d}: 0x{x} ({s})", .{ i, personalities[i], macho_file.getSymbolName(target) }); + } + + for (info.records.items) |*rec| { + // Finalize missing address values + rec.rangeStart += text_sect.addr - seg.vmaddr; + if (rec.personalityFunction > 0) { + rec.personalityFunction = personalities[rec.personalityFunction - 1]; + } + + if (rec.compactUnwindEncoding > 0 and !UnwindEncoding.isDwarf(rec.compactUnwindEncoding, cpu_arch)) { + const lsda_target = @as(MachO.SymbolWithLoc, @bitCast(rec.lsda)); + if (lsda_target.getFile()) |_| { + const sym = macho_file.getSymbol(lsda_target); + rec.lsda = sym.n_value - seg.vmaddr; + } + } + } + + for (info.records.items, 0..) |record, i| { + log.debug("Unwind record at offset 0x{x}", .{i * @sizeOf(macho.compact_unwind_entry)}); + log.debug(" start: 0x{x}", .{record.rangeStart}); + log.debug(" length: 0x{x}", .{record.rangeLength}); + log.debug(" compact encoding: 0x{x:0>8}", .{record.compactUnwindEncoding}); + log.debug(" personality: 0x{x}", .{record.personalityFunction}); + log.debug(" LSDA: 0x{x}", .{record.lsda}); + } + + var buffer = std.ArrayList(u8).init(info.gpa); + defer buffer.deinit(); + + const size = info.calcRequiredSize(); + try buffer.ensureTotalCapacityPrecise(size); + + var cwriter = std.io.countingWriter(buffer.writer()); + const writer = cwriter.writer(); + + const common_encodings_offset: u32 = @sizeOf(macho.unwind_info_section_header); + const common_encodings_count: u32 = info.common_encodings_count; + const personalities_offset: u32 = common_encodings_offset + common_encodings_count * @sizeOf(u32); + const personalities_count: u32 = info.personalities_count; + const indexes_offset: u32 = personalities_offset + personalities_count * @sizeOf(u32); + const indexes_count: u32 = @as(u32, @intCast(info.pages.items.len + 1)); + + try writer.writeStruct(macho.unwind_info_section_header{ + .commonEncodingsArraySectionOffset = common_encodings_offset, + .commonEncodingsArrayCount = common_encodings_count, + .personalityArraySectionOffset = personalities_offset, + .personalityArrayCount = personalities_count, + .indexSectionOffset = indexes_offset, + .indexCount = indexes_count, + }); + + try writer.writeAll(mem.sliceAsBytes(info.common_encodings[0..info.common_encodings_count])); + try writer.writeAll(mem.sliceAsBytes(personalities[0..info.personalities_count])); + + const pages_base_offset = @as(u32, @intCast(size - (info.pages.items.len * second_level_page_bytes))); + const lsda_base_offset = @as(u32, @intCast(pages_base_offset - + (info.lsdas.items.len * @sizeOf(macho.unwind_info_section_header_lsda_index_entry)))); + for (info.pages.items, 0..) |page, i| { + assert(page.count > 0); + const first_entry = info.records.items[page.start]; + try writer.writeStruct(macho.unwind_info_section_header_index_entry{ + .functionOffset = @as(u32, @intCast(first_entry.rangeStart)), + .secondLevelPagesSectionOffset = @as(u32, @intCast(pages_base_offset + i * second_level_page_bytes)), + .lsdaIndexArraySectionOffset = lsda_base_offset + + info.lsdas_lookup.get(page.start).? * @sizeOf(macho.unwind_info_section_header_lsda_index_entry), + }); + } + + const last_entry = info.records.items[info.records.items.len - 1]; + const sentinel_address = @as(u32, @intCast(last_entry.rangeStart + last_entry.rangeLength)); + try writer.writeStruct(macho.unwind_info_section_header_index_entry{ + .functionOffset = sentinel_address, + .secondLevelPagesSectionOffset = 0, + .lsdaIndexArraySectionOffset = lsda_base_offset + + @as(u32, @intCast(info.lsdas.items.len)) * @sizeOf(macho.unwind_info_section_header_lsda_index_entry), + }); + + for (info.lsdas.items) |record_id| { + const record = info.records.items[record_id]; + try writer.writeStruct(macho.unwind_info_section_header_lsda_index_entry{ + .functionOffset = @as(u32, @intCast(record.rangeStart)), + .lsdaOffset = @as(u32, @intCast(record.lsda)), + }); + } + + for (info.pages.items) |page| { + const start = cwriter.bytes_written; + try page.write(info, writer); + const nwritten = cwriter.bytes_written - start; + if (nwritten < second_level_page_bytes) { + try writer.writeByteNTimes(0, second_level_page_bytes - nwritten); + } + } + + const padding = buffer.items.len - cwriter.bytes_written; + if (padding > 0) { + mem.set(u8, buffer.items[cwriter.bytes_written..], 0); + } + + try macho_file.base.file.pwriteAll(buffer.items, sect.offset); +} + +pub fn parseRelocTarget( + macho_file: *MachO, + object_id: u32, + rel: macho.relocation_info, + code: []const u8, + base_offset: i32, +) MachO.SymbolWithLoc { + const tracy = trace(@src()); + defer tracy.end(); + + const object = &macho_file.objects.items[object_id]; + + const sym_index = if (rel.r_extern == 0) blk: { + const sect_id = @as(u8, @intCast(rel.r_symbolnum - 1)); + const rel_offset = @as(u32, @intCast(rel.r_address - base_offset)); + assert(rel.r_pcrel == 0 and rel.r_length == 3); + const address_in_section = mem.readIntLittle(u64, code[rel_offset..][0..8]); + const sym_index = object.getSymbolByAddress(address_in_section, sect_id); + break :blk sym_index; + } else object.reverse_symtab_lookup[rel.r_symbolnum]; + + const sym_loc = MachO.SymbolWithLoc{ .sym_index = sym_index, .file = object_id + 1 }; + const sym = macho_file.getSymbol(sym_loc); + + if (sym.sect() and !sym.ext()) { + // Make sure we are not dealing with a local alias. + const atom_index = object.getAtomIndexForSymbol(sym_index) orelse + return sym_loc; + const atom = macho_file.getAtom(atom_index); + return atom.getSymbolWithLoc(); + } else if (object.getGlobal(sym_index)) |global_index| { + return macho_file.globals.items[global_index]; + } else return sym_loc; +} + +fn getRelocs(macho_file: *MachO, object_id: u32, record_id: usize) []const macho.relocation_info { + const object = &macho_file.objects.items[object_id]; + assert(object.hasUnwindRecords()); + const rel_pos = object.unwind_relocs_lookup[record_id].reloc; + const relocs = object.getRelocs(object.unwind_info_sect_id.?); + return relocs[rel_pos.start..][0..rel_pos.len]; +} + +fn isPersonalityFunction(record_id: usize, rel: macho.relocation_info) bool { + const base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))); + const rel_offset = rel.r_address - base_offset; + return rel_offset == 16; +} + +pub fn getPersonalityFunctionReloc( + macho_file: *MachO, + object_id: u32, + record_id: usize, +) ?macho.relocation_info { + const relocs = getRelocs(macho_file, object_id, record_id); + for (relocs) |rel| { + if (isPersonalityFunction(record_id, rel)) return rel; + } + return null; +} + +fn getPersonalityFunction(info: UnwindInfo, global_index: MachO.SymbolWithLoc) ?u2 { + comptime var index: u2 = 0; + inline while (index < max_personalities) : (index += 1) { + if (index >= info.personalities_count) return null; + if (info.personalities[index].eql(global_index)) { + return index; + } + } + return null; +} + +fn isLsda(record_id: usize, rel: macho.relocation_info) bool { + const base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))); + const rel_offset = rel.r_address - base_offset; + return rel_offset == 24; +} + +pub fn getLsdaReloc(macho_file: *MachO, object_id: u32, record_id: usize) ?macho.relocation_info { + const relocs = getRelocs(macho_file, object_id, record_id); + for (relocs) |rel| { + if (isLsda(record_id, rel)) return rel; + } + return null; +} + +pub fn isNull(rec: macho.compact_unwind_entry) bool { + return rec.rangeStart == 0 and + rec.rangeLength == 0 and + rec.compactUnwindEncoding == 0 and + rec.lsda == 0 and + rec.personalityFunction == 0; +} + +inline fn nullRecord() macho.compact_unwind_entry { + return .{ + .rangeStart = 0, + .rangeLength = 0, + .compactUnwindEncoding = 0, + .personalityFunction = 0, + .lsda = 0, + }; +} + +fn appendCommonEncoding(info: *UnwindInfo, enc: macho.compact_unwind_encoding_t) void { + assert(info.common_encodings_count <= max_common_encodings); + info.common_encodings[info.common_encodings_count] = enc; + info.common_encodings_count += 1; +} + +fn getCommonEncoding(info: UnwindInfo, enc: macho.compact_unwind_encoding_t) ?u7 { + comptime var index: u7 = 0; + inline while (index < max_common_encodings) : (index += 1) { + if (index >= info.common_encodings_count) return null; + if (info.common_encodings[index] == enc) { + return index; + } + } + return null; +} + +pub const UnwindEncoding = struct { + pub fn getMode(enc: macho.compact_unwind_encoding_t) u4 { + comptime assert(macho.UNWIND_ARM64_MODE_MASK == macho.UNWIND_X86_64_MODE_MASK); + return @as(u4, @truncate((enc & macho.UNWIND_ARM64_MODE_MASK) >> 24)); + } + + pub fn isDwarf(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) bool { + const mode = getMode(enc); + return switch (cpu_arch) { + .aarch64 => @as(macho.UNWIND_ARM64_MODE, @enumFromInt(mode)) == .DWARF, + .x86_64 => @as(macho.UNWIND_X86_64_MODE, @enumFromInt(mode)) == .DWARF, + else => unreachable, + }; + } + + pub fn setMode(enc: *macho.compact_unwind_encoding_t, mode: anytype) void { + enc.* |= @as(u32, @intCast(@intFromEnum(mode))) << 24; + } + + pub fn hasLsda(enc: macho.compact_unwind_encoding_t) bool { + const has_lsda = @as(u1, @truncate((enc & macho.UNWIND_HAS_LSDA) >> 31)); + return has_lsda == 1; + } + + pub fn setHasLsda(enc: *macho.compact_unwind_encoding_t, has_lsda: bool) void { + const mask = @as(u32, @intCast(@intFromBool(has_lsda))) << 31; + enc.* |= mask; + } + + pub fn getPersonalityIndex(enc: macho.compact_unwind_encoding_t) u2 { + const index = @as(u2, @truncate((enc & macho.UNWIND_PERSONALITY_MASK) >> 28)); + return index; + } + + pub fn setPersonalityIndex(enc: *macho.compact_unwind_encoding_t, index: u2) void { + const mask = @as(u32, @intCast(index)) << 28; + enc.* |= mask; + } + + pub fn getDwarfSectionOffset(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) u24 { + assert(isDwarf(enc, cpu_arch)); + const offset = @as(u24, @truncate(enc)); + return offset; + } + + pub fn setDwarfSectionOffset(enc: *macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch, offset: u24) void { + assert(isDwarf(enc.*, cpu_arch)); + enc.* |= offset; + } +}; diff --git a/src/archive/archive/zld/MachO/dead_strip.zig b/src/archive/archive/zld/MachO/dead_strip.zig new file mode 100644 index 000000000000..e36015e3c110 --- /dev/null +++ b/src/archive/archive/zld/MachO/dead_strip.zig @@ -0,0 +1,472 @@ +const std = @import("std"); +const assert = std.debug.assert; +const eh_frame = @import("eh_frame.zig"); +const log = std.log.scoped(.dead_strip); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const trace = @import("../tracy.zig").trace; + +const Allocator = mem.Allocator; +const AtomIndex = MachO.AtomIndex; +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; +const UnwindInfo = @import("UnwindInfo.zig"); + +const AtomTable = std.AutoHashMap(AtomIndex, void); + +pub fn gcAtoms(macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + var arena = std.heap.ArenaAllocator.init(gpa); + defer arena.deinit(); + + var roots = AtomTable.init(arena.allocator()); + try roots.ensureUnusedCapacity(@as(u32, @intCast(macho_file.globals.items.len))); + + var alive = AtomTable.init(arena.allocator()); + try alive.ensureTotalCapacity(@as(u32, @intCast(macho_file.atoms.items.len))); + + try collectRoots(macho_file, &roots); + try mark(macho_file, roots, &alive); + prune(macho_file, alive); +} + +fn collectRoots(macho_file: *MachO, roots: *AtomTable) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const output_mode = macho_file.options.output_mode; + + log.debug("collecting roots", .{}); + + switch (output_mode) { + .exe => { + // Add entrypoint as GC root + const global: SymbolWithLoc = macho_file.getEntryPoint(); + const object = macho_file.objects.items[global.getFile().?]; + const atom_index = object.getAtomIndexForSymbol(global.sym_index).?; // panic here means fatal error + _ = try roots.getOrPut(atom_index); + + log.debug("root(ATOM({d}, %{d}, {?d}))", .{ + atom_index, + macho_file.getAtom(atom_index).sym_index, + macho_file.getAtom(atom_index).getFile(), + }); + }, + else => |other| { + assert(other == .lib); + // Add exports as GC roots + for (macho_file.globals.items) |global| { + const sym = macho_file.getSymbol(global); + if (sym.undf()) continue; + + const file = global.getFile() orelse continue; // synthetic globals are atomless + const object = macho_file.objects.items[file]; + const atom_index = object.getAtomIndexForSymbol(global.sym_index).?; // panic here means fatal error + _ = try roots.getOrPut(atom_index); + + log.debug("root(ATOM({d}, %{d}, {?d}))", .{ + atom_index, + macho_file.getAtom(atom_index).sym_index, + macho_file.getAtom(atom_index).getFile(), + }); + } + }, + } + + for (macho_file.objects.items) |object| { + const has_subsections = object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + + for (object.atoms.items) |atom_index| { + const is_gc_root = blk: { + // Modelled after ld64 which treats each object file compiled without MH_SUBSECTIONS_VIA_SYMBOLS + // as a root. + if (!has_subsections) break :blk true; + + const atom = macho_file.getAtom(atom_index); + const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_sect - 1 + else sect_id: { + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + break :sect_id sect_id; + }; + const source_sect = object.getSourceSection(sect_id); + if (source_sect.isDontDeadStrip()) break :blk true; + switch (source_sect.type()) { + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => break :blk true, + else => break :blk false, + } + }; + if (is_gc_root) { + try roots.putNoClobber(atom_index, {}); + + log.debug("root(ATOM({d}, %{d}, {?d}))", .{ + atom_index, + macho_file.getAtom(atom_index).sym_index, + macho_file.getAtom(atom_index).getFile(), + }); + } + } + } +} + +fn markLive( + macho_file: *MachO, + atom_index: AtomIndex, + alive: *AtomTable, +) void { + const tracy = trace(@src()); + defer tracy.end(); + + if (alive.contains(atom_index)) return; + + const atom = macho_file.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + + log.debug("mark(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() }); + + alive.putAssumeCapacityNoClobber(atom_index, {}); + + macho_file.logAtom(atom_index, log); + + const cpu_arch = macho_file.options.target.cpu_arch.?; + + const sym = macho_file.getSymbol(sym_loc); + const header = macho_file.sections.items(.header)[sym.n_sect - 1]; + if (header.isZerofill()) return; + + const relocs = Atom.getAtomRelocs(macho_file, atom_index); + for (relocs) |rel| { + const target = switch (cpu_arch) { + .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_ADDEND => continue, + else => Atom.parseRelocTarget(macho_file, atom_index, rel), + }, + .x86_64 => Atom.parseRelocTarget(macho_file, atom_index, rel), + else => unreachable, + }; + const target_sym = macho_file.getSymbol(target); + + if (target_sym.undf()) continue; + if (target.getFile() == null) { + const target_sym_name = macho_file.getSymbolName(target); + if (mem.eql(u8, "__mh_execute_header", target_sym_name)) continue; + if (mem.eql(u8, "___dso_handle", target_sym_name)) continue; + + unreachable; // referenced symbol not found + } + + const object = macho_file.objects.items[target.getFile().?]; + const target_atom_index = object.getAtomIndexForSymbol(target.sym_index).?; + log.debug(" following ATOM({d}, %{d}, {?d})", .{ + target_atom_index, + macho_file.getAtom(target_atom_index).sym_index, + macho_file.getAtom(target_atom_index).getFile(), + }); + + markLive(macho_file, target_atom_index, alive); + } +} + +fn refersLive(macho_file: *MachO, atom_index: AtomIndex, alive: AtomTable) bool { + const tracy = trace(@src()); + defer tracy.end(); + + const atom = macho_file.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + + log.debug("refersLive(ATOM({d}, %{d}, {?d}))", .{ + atom_index, + sym_loc.sym_index, + sym_loc.getFile(), + }); + + const cpu_arch = macho_file.options.target.cpu_arch.?; + + const sym = macho_file.getSymbol(sym_loc); + const header = macho_file.sections.items(.header)[sym.n_sect - 1]; + assert(!header.isZerofill()); + + const relocs = Atom.getAtomRelocs(macho_file, atom_index); + for (relocs) |rel| { + const target = switch (cpu_arch) { + .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_ADDEND => continue, + else => Atom.parseRelocTarget(macho_file, atom_index, rel), + }, + .x86_64 => Atom.parseRelocTarget(macho_file, atom_index, rel), + else => unreachable, + }; + + const object = macho_file.objects.items[target.getFile().?]; + const target_atom_index = object.getAtomIndexForSymbol(target.sym_index) orelse { + log.debug("atom for symbol '{s}' not found; skipping...", .{macho_file.getSymbolName(target)}); + continue; + }; + if (alive.contains(target_atom_index)) { + if (alive.contains(target_atom_index)) { + log.debug(" refers live ATOM({d}, %{d}, {?d})", .{ + target_atom_index, + macho_file.getAtom(target_atom_index).sym_index, + macho_file.getAtom(target_atom_index).getFile(), + }); + return true; + } + } + } + + return false; +} + +fn mark(macho_file: *MachO, roots: AtomTable, alive: *AtomTable) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var it = roots.keyIterator(); + while (it.next()) |root| { + markLive(macho_file, root.*, alive); + } + + var loop: bool = true; + while (loop) { + loop = false; + + for (macho_file.objects.items) |object| { + for (object.atoms.items) |atom_index| { + if (alive.contains(atom_index)) continue; + + const atom = macho_file.getAtom(atom_index); + const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_sect - 1 + else blk: { + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + break :blk sect_id; + }; + const source_sect = object.getSourceSection(sect_id); + + if (source_sect.isDontDeadStripIfReferencesLive()) { + if (refersLive(macho_file, atom_index, alive.*)) { + markLive(macho_file, atom_index, alive); + loop = true; + } + } + } + } + } + + for (macho_file.objects.items, 0..) |_, object_id| { + // Traverse unwind and eh_frame records noting if the source symbol has been marked, and if so, + // marking all references as live. + try markUnwindRecords(macho_file, @as(u32, @intCast(object_id)), alive); + } +} + +fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) !void { + const object = &macho_file.objects.items[object_id]; + const cpu_arch = macho_file.options.target.cpu_arch.?; + + const unwind_records = object.getUnwindRecords(); + + for (object.exec_atoms.items) |atom_index| { + if (!object.hasUnwindRecords()) { + if (object.eh_frame_records_lookup.get(atom_index)) |fde_offset| { + const ptr = object.eh_frame_relocs_lookup.getPtr(fde_offset).?; + if (ptr.dead) continue; // already marked + if (!alive.contains(atom_index)) { + // Mark dead and continue. + ptr.dead = true; + } else { + // Mark references live and continue. + try markEhFrameRecord(macho_file, object_id, atom_index, alive); + } + continue; + } + } + + const record_id = object.unwind_records_lookup.get(atom_index) orelse continue; + if (object.unwind_relocs_lookup[record_id].dead) continue; // already marked, nothing to do + if (!alive.contains(atom_index)) { + // Mark the record dead and continue. + object.unwind_relocs_lookup[record_id].dead = true; + if (object.eh_frame_records_lookup.get(atom_index)) |fde_offset| { + object.eh_frame_relocs_lookup.getPtr(fde_offset).?.dead = true; + } + continue; + } + + const record = unwind_records[record_id]; + if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { + try markEhFrameRecord(macho_file, object_id, atom_index, alive); + } else { + if (UnwindInfo.getPersonalityFunctionReloc(macho_file, object_id, record_id)) |rel| { + const target = UnwindInfo.parseRelocTarget( + macho_file, + object_id, + rel, + mem.asBytes(&record), + @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), + ); + const target_sym = macho_file.getSymbol(target); + if (!target_sym.undf()) { + const target_object = macho_file.objects.items[target.getFile().?]; + const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; + markLive(macho_file, target_atom_index, alive); + } + } + + if (UnwindInfo.getLsdaReloc(macho_file, object_id, record_id)) |rel| { + const target = UnwindInfo.parseRelocTarget( + macho_file, + object_id, + rel, + mem.asBytes(&record), + @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), + ); + const target_object = macho_file.objects.items[target.getFile().?]; + const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; + markLive(macho_file, target_atom_index, alive); + } + } + } +} + +fn markEhFrameRecord(macho_file: *MachO, object_id: u32, atom_index: AtomIndex, alive: *AtomTable) !void { + const cpu_arch = macho_file.options.target.cpu_arch.?; + const object = &macho_file.objects.items[object_id]; + var it = object.getEhFrameRecordsIterator(); + + const fde_offset = object.eh_frame_records_lookup.get(atom_index).?; + it.seekTo(fde_offset); + const fde = (try it.next()).?; + + const cie_ptr = fde.getCiePointer(); + const cie_offset = fde_offset + 4 - cie_ptr; + it.seekTo(cie_offset); + const cie = (try it.next()).?; + + switch (cpu_arch) { + .aarch64 => { + // Mark FDE references which should include any referenced LSDA record + const relocs = eh_frame.getRelocs(macho_file, object_id, fde_offset); + for (relocs) |rel| { + const target = UnwindInfo.parseRelocTarget( + macho_file, + object_id, + rel, + fde.data, + @as(i32, @intCast(fde_offset)) + 4, + ); + const target_sym = macho_file.getSymbol(target); + if (!target_sym.undf()) blk: { + const target_object = macho_file.objects.items[target.getFile().?]; + const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index) orelse + break :blk; + markLive(macho_file, target_atom_index, alive); + } + } + }, + .x86_64 => { + const sect = object.getSourceSection(object.eh_frame_sect_id.?); + const lsda_ptr = try fde.getLsdaPointer(cie, .{ + .base_addr = sect.addr, + .base_offset = fde_offset, + }); + if (lsda_ptr) |lsda_address| { + // Mark LSDA record as live + const sym_index = object.getSymbolByAddress(lsda_address, null); + const target_atom_index = object.getAtomIndexForSymbol(sym_index).?; + markLive(macho_file, target_atom_index, alive); + } + }, + else => unreachable, + } + + // Mark CIE references which should include any referenced personalities + // that are defined locally. + if (cie.getPersonalityPointerReloc(macho_file, object_id, cie_offset)) |target| { + const target_sym = macho_file.getSymbol(target); + if (!target_sym.undf()) { + const target_object = macho_file.objects.items[target.getFile().?]; + const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; + markLive(macho_file, target_atom_index, alive); + } + } +} + +fn prune(macho_file: *MachO, alive: AtomTable) void { + const tracy = trace(@src()); + defer tracy.end(); + + log.debug("pruning dead atoms", .{}); + for (macho_file.objects.items) |*object| { + var i: usize = 0; + while (i < object.atoms.items.len) { + const atom_index = object.atoms.items[i]; + if (alive.contains(atom_index)) { + i += 1; + continue; + } + + const atom = macho_file.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + + log.debug("prune(ATOM({d}, %{d}, {?d}))", .{ + atom_index, + sym_loc.sym_index, + sym_loc.getFile(), + }); + log.debug(" {s} in {s}", .{ macho_file.getSymbolName(sym_loc), object.name }); + + const sym = macho_file.getSymbolPtr(sym_loc); + const sect_id = sym.n_sect - 1; + var section = macho_file.sections.get(sect_id); + section.header.size -= atom.size; + + if (atom.prev_index) |prev_index| { + const prev = macho_file.getAtomPtr(prev_index); + prev.next_index = atom.next_index; + } else { + if (atom.next_index) |next_index| { + section.first_atom_index = next_index; + } + } + if (atom.next_index) |next_index| { + const next = macho_file.getAtomPtr(next_index); + next.prev_index = atom.prev_index; + } else { + if (atom.prev_index) |prev_index| { + section.last_atom_index = prev_index; + } else { + assert(section.header.size == 0); + section.first_atom_index = undefined; + section.last_atom_index = undefined; + } + } + + macho_file.sections.set(sect_id, section); + _ = object.atoms.swapRemove(i); + + sym.n_desc = MachO.N_DEAD; + + var inner_sym_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); + while (inner_sym_it.next()) |inner| { + const inner_sym = macho_file.getSymbolPtr(inner); + inner_sym.n_desc = MachO.N_DEAD; + } + + if (Atom.getSectionAlias(macho_file, atom_index)) |alias| { + const alias_sym = macho_file.getSymbolPtr(alias); + alias_sym.n_desc = MachO.N_DEAD; + } + } + } +} diff --git a/src/archive/archive/zld/MachO/dyld_info/Rebase.zig b/src/archive/archive/zld/MachO/dyld_info/Rebase.zig new file mode 100644 index 000000000000..976ba0164097 --- /dev/null +++ b/src/archive/archive/zld/MachO/dyld_info/Rebase.zig @@ -0,0 +1,574 @@ +const Rebase = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; + +entries: std.ArrayListUnmanaged(Entry) = .{}, +buffer: std.ArrayListUnmanaged(u8) = .{}, + +const Entry = struct { + offset: u64, + segment_id: u8, + + pub fn lessThan(ctx: void, entry: Entry, other: Entry) bool { + _ = ctx; + if (entry.segment_id == other.segment_id) { + return entry.offset < other.offset; + } + return entry.segment_id < other.segment_id; + } +}; + +pub fn deinit(rebase: *Rebase, gpa: Allocator) void { + rebase.entries.deinit(gpa); + rebase.buffer.deinit(gpa); +} + +pub fn size(rebase: Rebase) u64 { + return @as(u64, @intCast(rebase.buffer.items.len)); +} + +pub fn finalize(rebase: *Rebase, gpa: Allocator) !void { + if (rebase.entries.items.len == 0) return; + + const writer = rebase.buffer.writer(gpa); + + std.sort.sort(Entry, rebase.entries.items, {}, Entry.lessThan); + + try setTypePointer(writer); + + var start: usize = 0; + var seg_id: ?u8 = null; + for (rebase.entries.items, 0..) |entry, i| { + if (seg_id != null and seg_id.? == entry.segment_id) continue; + try finalizeSegment(rebase.entries.items[start..i], writer); + seg_id = entry.segment_id; + start = i; + } + + try finalizeSegment(rebase.entries.items[start..], writer); + try done(writer); +} + +fn finalizeSegment(entries: []const Entry, writer: anytype) !void { + if (entries.len == 0) return; + + const segment_id = entries[0].segment_id; + var offset = entries[0].offset; + try setSegmentOffset(segment_id, offset, writer); + + var count: usize = 0; + var skip: u64 = 0; + var state: enum { + start, + times, + times_skip, + } = .times; + + var i: usize = 0; + while (i < entries.len) : (i += 1) { + log.debug("{x}, {d}, {x}, {s}", .{ offset, count, skip, @tagName(state) }); + const current_offset = entries[i].offset; + log.debug(" => {x}", .{current_offset}); + switch (state) { + .start => { + if (offset < current_offset) { + const delta = current_offset - offset; + try addAddr(delta, writer); + offset += delta; + } + state = .times; + offset += @sizeOf(u64); + count = 1; + }, + .times => { + const delta = current_offset - offset; + if (delta == 0) { + count += 1; + offset += @sizeOf(u64); + continue; + } + if (count == 1) { + state = .times_skip; + skip = delta; + offset += skip; + i -= 1; + } else { + try rebaseTimes(count, writer); + state = .start; + i -= 1; + } + }, + .times_skip => { + if (current_offset < offset) { + count -= 1; + if (count == 1) { + try rebaseAddAddr(skip, writer); + } else { + try rebaseTimesSkip(count, skip, writer); + } + state = .start; + offset = offset - (@sizeOf(u64) + skip); + i -= 2; + continue; + } + + const delta = current_offset - offset; + if (delta == 0) { + count += 1; + offset += @sizeOf(u64) + skip; + } else { + try rebaseTimesSkip(count, skip, writer); + state = .start; + i -= 1; + } + }, + } + } + + switch (state) { + .start => unreachable, + .times => { + try rebaseTimes(count, writer); + }, + .times_skip => { + try rebaseTimesSkip(count, skip, writer); + }, + } +} + +fn setTypePointer(writer: anytype) !void { + log.debug(">>> set type: {d}", .{macho.REBASE_TYPE_POINTER}); + try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @as(u4, @truncate(macho.REBASE_TYPE_POINTER))); +} + +fn setSegmentOffset(segment_id: u8, offset: u64, writer: anytype) !void { + log.debug(">>> set segment: {d} and offset: {x}", .{ segment_id, offset }); + try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @as(u4, @truncate(segment_id))); + try std.leb.writeULEB128(writer, offset); +} + +fn rebaseAddAddr(addr: u64, writer: anytype) !void { + log.debug(">>> rebase with add: {x}", .{addr}); + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB); + try std.leb.writeULEB128(writer, addr); +} + +fn rebaseTimes(count: usize, writer: anytype) !void { + log.debug(">>> rebase with count: {d}", .{count}); + if (count <= 0xf) { + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @as(u4, @truncate(count))); + } else { + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES); + try std.leb.writeULEB128(writer, count); + } +} + +fn rebaseTimesSkip(count: usize, skip: u64, writer: anytype) !void { + log.debug(">>> rebase with count: {d} and skip: {x}", .{ count, skip }); + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB); + try std.leb.writeULEB128(writer, count); + try std.leb.writeULEB128(writer, skip); +} + +fn addAddr(addr: u64, writer: anytype) !void { + log.debug(">>> add: {x}", .{addr}); + if (std.mem.isAligned(addr, @sizeOf(u64))) { + const imm = @divExact(addr, @sizeOf(u64)); + if (imm <= 0xf) { + try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | @as(u4, @truncate(imm))); + return; + } + } + try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_ULEB); + try std.leb.writeULEB128(writer, addr); +} + +fn done(writer: anytype) !void { + log.debug(">>> done", .{}); + try writer.writeByte(macho.REBASE_OPCODE_DONE); +} + +pub fn write(rebase: Rebase, writer: anytype) !void { + if (rebase.size() == 0) return; + try writer.writeAll(rebase.buffer.items); +} + +test "rebase - no entries" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + try rebase.finalize(gpa); + try testing.expectEqual(@as(u64, 0), rebase.size()); +} + +test "rebase - single entry" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x10, + }); + try rebase.finalize(gpa); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x10, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - emitTimes - IMM" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + var i: u64 = 0; + while (i < 10) : (i += 1) { + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = i * @sizeOf(u64), + }); + } + + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 10, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - emitTimes - ULEB" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + var i: u64 = 0; + while (i < 100) : (i += 1) { + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = i * @sizeOf(u64), + }); + } + + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES, + 0x64, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - emitTimes followed by addAddr followed by emitTimes" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + var offset: u64 = 0; + var i: u64 = 0; + while (i < 15) : (i += 1) { + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = offset, + }); + offset += @sizeOf(u64); + } + + offset += @sizeOf(u64); + + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = offset, + }); + + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 15, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 1, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - emitTimesSkip" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + var offset: u64 = 0; + var i: u64 = 0; + while (i < 15) : (i += 1) { + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = offset, + }); + offset += 2 * @sizeOf(u64); + } + + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0xf, + 0x8, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - complex" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x10, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x40, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x48, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x50, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x58, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x70, + }); + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x8, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 4, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 4, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 2, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - complex 2" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x10, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x28, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x48, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x78, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xb8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 2, + .offset = 0x0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 2, + .offset = 0x8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 2, + .offset = 0x10, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 2, + .offset = 0x18, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x20, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x40, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x60, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x68, + }); + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x8, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 1, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x18, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 2, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x38, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 2, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 4, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 3, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x3, + 0x18, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 2, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - composite" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x38, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xa0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xa8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xb0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xc0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xc8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xd0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xd8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xe0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xe8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xf0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xf8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x108, + }); + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x8, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x28, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 7, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 3, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 1, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 8, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 1, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} diff --git a/src/archive/archive/zld/MachO/dyld_info/bind.zig b/src/archive/archive/zld/MachO/dyld_info/bind.zig new file mode 100644 index 000000000000..4847e64280a7 --- /dev/null +++ b/src/archive/archive/zld/MachO/dyld_info/bind.zig @@ -0,0 +1,740 @@ +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; + +pub fn Bind(comptime Ctx: type, comptime Target: type) type { + return struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + + const Self = @This(); + + const Entry = struct { + target: Target, + offset: u64, + segment_id: u8, + addend: i64, + + pub fn lessThan(ctx: Ctx, entry: Entry, other: Entry) bool { + if (entry.segment_id == other.segment_id) { + if (entry.target.eql(other.target)) { + return entry.offset < other.offset; + } + const entry_name = ctx.getSymbolName(entry.target); + const other_name = ctx.getSymbolName(other.target); + return std.mem.lessThan(u8, entry_name, other_name); + } + return entry.segment_id < other.segment_id; + } + }; + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @as(u64, @intCast(self.buffer.items.len)); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void { + if (self.entries.items.len == 0) return; + + const writer = self.buffer.writer(gpa); + + std.sort.sort(Entry, self.entries.items, ctx, Entry.lessThan); + + var start: usize = 0; + var seg_id: ?u8 = null; + for (self.entries.items, 0..) |entry, i| { + if (seg_id != null and seg_id.? == entry.segment_id) continue; + try finalizeSegment(self.entries.items[start..i], ctx, writer); + seg_id = entry.segment_id; + start = i; + } + + try finalizeSegment(self.entries.items[start..], ctx, writer); + try done(writer); + } + + fn finalizeSegment(entries: []const Entry, ctx: Ctx, writer: anytype) !void { + if (entries.len == 0) return; + + const seg_id = entries[0].segment_id; + try setSegmentOffset(seg_id, 0, writer); + + var offset: u64 = 0; + var addend: i64 = 0; + var count: usize = 0; + var skip: u64 = 0; + var target: ?Target = null; + + var state: enum { + start, + bind_single, + bind_times_skip, + } = .start; + + var i: usize = 0; + while (i < entries.len) : (i += 1) { + const current = entries[i]; + if (target == null or !target.?.eql(current.target)) { + switch (state) { + .start => {}, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), + } + state = .start; + target = current.target; + + const sym = ctx.getSymbol(current.target); + const name = ctx.getSymbolName(current.target); + const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; + const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); + + try setSymbol(name, flags, writer); + try setTypePointer(writer); + try setDylibOrdinal(ordinal, writer); + + if (current.addend != addend) { + addend = current.addend; + try setAddend(addend, writer); + } + } + + log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) }); + log.debug(" => {x}", .{current.offset}); + switch (state) { + .start => { + if (current.offset < offset) { + try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer); + offset = offset - (offset - current.offset); + } else if (current.offset > offset) { + const delta = current.offset - offset; + try addAddr(delta, writer); + offset += delta; + } + state = .bind_single; + offset += @sizeOf(u64); + count = 1; + }, + .bind_single => { + if (current.offset == offset) { + try doBind(writer); + state = .start; + } else if (current.offset > offset) { + const delta = current.offset - offset; + state = .bind_times_skip; + skip = @as(u64, @intCast(delta)); + offset += skip; + } else unreachable; + i -= 1; + }, + .bind_times_skip => { + if (current.offset < offset) { + count -= 1; + if (count == 1) { + try doBindAddAddr(skip, writer); + } else { + try doBindTimesSkip(count, skip, writer); + } + state = .start; + offset = offset - (@sizeOf(u64) + skip); + i -= 2; + } else if (current.offset == offset) { + count += 1; + offset += @sizeOf(u64) + skip; + } else { + try doBindTimesSkip(count, skip, writer); + state = .start; + i -= 1; + } + }, + } + } + + switch (state) { + .start => unreachable, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), + } + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } + }; +} + +pub fn LazyBind(comptime Ctx: type, comptime Target: type) type { + return struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + offsets: std.ArrayListUnmanaged(u32) = .{}, + + const Self = @This(); + + const Entry = struct { + target: Target, + offset: u64, + segment_id: u8, + addend: i64, + }; + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + self.offsets.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @as(u64, @intCast(self.buffer.items.len)); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void { + if (self.entries.items.len == 0) return; + + try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len); + + var cwriter = std.io.countingWriter(self.buffer.writer(gpa)); + const writer = cwriter.writer(); + + var addend: i64 = 0; + + for (self.entries.items) |entry| { + self.offsets.appendAssumeCapacity(@as(u32, @intCast(cwriter.bytes_written))); + + const sym = ctx.getSymbol(entry.target); + const name = ctx.getSymbolName(entry.target); + const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; + const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); + + try setSegmentOffset(entry.segment_id, entry.offset, writer); + try setSymbol(name, flags, writer); + try setDylibOrdinal(ordinal, writer); + + if (entry.addend != addend) { + try setAddend(entry.addend, writer); + addend = entry.addend; + } + + try doBind(writer); + try done(writer); + } + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } + }; +} + +fn setSegmentOffset(segment_id: u8, offset: u64, writer: anytype) !void { + log.debug(">>> set segment: {d} and offset: {x}", .{ segment_id, offset }); + try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @as(u4, @truncate(segment_id))); + try std.leb.writeULEB128(writer, offset); +} + +fn setSymbol(name: []const u8, flags: u8, writer: anytype) !void { + log.debug(">>> set symbol: {s} with flags: {x}", .{ name, flags }); + try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | @as(u4, @truncate(flags))); + try writer.writeAll(name); + try writer.writeByte(0); +} + +fn setTypePointer(writer: anytype) !void { + log.debug(">>> set type: {d}", .{macho.BIND_TYPE_POINTER}); + try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @as(u4, @truncate(macho.BIND_TYPE_POINTER))); +} + +fn setDylibOrdinal(ordinal: i16, writer: anytype) !void { + if (ordinal <= 0) { + switch (ordinal) { + macho.BIND_SPECIAL_DYLIB_SELF, + macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE, + macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP, + => {}, + else => unreachable, // Invalid dylib special binding + } + log.debug(">>> set dylib special: {d}", .{ordinal}); + const cast = @as(u16, @bitCast(ordinal)); + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @as(u4, @truncate(cast))); + } else { + const cast = @as(u16, @bitCast(ordinal)); + log.debug(">>> set dylib ordinal: {d}", .{ordinal}); + if (cast <= 0xf) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @as(u4, @truncate(cast))); + } else { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + try std.leb.writeULEB128(writer, cast); + } + } +} + +fn setAddend(addend: i64, writer: anytype) !void { + log.debug(">>> set addend: {x}", .{addend}); + try writer.writeByte(macho.BIND_OPCODE_SET_ADDEND_SLEB); + try std.leb.writeILEB128(writer, addend); +} + +fn doBind(writer: anytype) !void { + log.debug(">>> bind", .{}); + try writer.writeByte(macho.BIND_OPCODE_DO_BIND); +} + +fn doBindAddAddr(addr: u64, writer: anytype) !void { + log.debug(">>> bind with add: {x}", .{addr}); + if (std.mem.isAligned(addr, @sizeOf(u64))) { + const imm = @divExact(addr, @sizeOf(u64)); + if (imm <= 0xf) { + try writer.writeByte( + macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED | @as(u4, @truncate(imm)), + ); + return; + } + } + try writer.writeByte(macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB); + try std.leb.writeULEB128(writer, addr); +} + +fn doBindTimesSkip(count: usize, skip: u64, writer: anytype) !void { + log.debug(">>> bind with count: {d} and skip: {x}", .{ count, skip }); + try writer.writeByte(macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB); + try std.leb.writeULEB128(writer, count); + try std.leb.writeULEB128(writer, skip); +} + +fn addAddr(addr: u64, writer: anytype) !void { + log.debug(">>> add: {x}", .{addr}); + try writer.writeByte(macho.BIND_OPCODE_ADD_ADDR_ULEB); + try std.leb.writeULEB128(writer, addr); +} + +fn done(writer: anytype) !void { + log.debug(">>> done", .{}); + try writer.writeByte(macho.BIND_OPCODE_DONE); +} + +const TestContext = struct { + symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, + strtab: std.ArrayListUnmanaged(u8) = .{}, + + const Target = struct { + index: u32, + + fn eql(this: Target, other: Target) bool { + return this.index == other.index; + } + }; + + fn deinit(ctx: *TestContext, gpa: Allocator) void { + ctx.symbols.deinit(gpa); + ctx.strtab.deinit(gpa); + } + + fn addSymbol(ctx: *TestContext, gpa: Allocator, name: []const u8, ordinal: i16, flags: u16) !void { + const n_strx = try ctx.addString(gpa, name); + var n_desc = @as(u16, @bitCast(ordinal * macho.N_SYMBOL_RESOLVER)); + n_desc |= flags; + try ctx.symbols.append(gpa, .{ + .n_value = 0, + .n_strx = n_strx, + .n_desc = n_desc, + .n_type = macho.N_EXT, + .n_sect = 0, + }); + } + + fn addString(ctx: *TestContext, gpa: Allocator, name: []const u8) !u32 { + const n_strx = @as(u32, @intCast(ctx.strtab.items.len)); + try ctx.strtab.appendSlice(gpa, name); + try ctx.strtab.append(gpa, 0); + return n_strx; + } + + fn getSymbol(ctx: TestContext, target: Target) macho.nlist_64 { + return ctx.symbols.items[target.index]; + } + + fn getSymbolName(ctx: TestContext, target: Target) []const u8 { + const sym = ctx.getSymbol(target); + assert(sym.n_strx < ctx.strtab.items.len); + return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + sym.n_strx)), 0); + } +}; + +fn generateTestContext() !TestContext { + const gpa = testing.allocator; + var ctx = TestContext{}; + try ctx.addSymbol(gpa, "_import_1", 1, 0); + try ctx.addSymbol(gpa, "_import_2", 1, 0); + try ctx.addSymbol(gpa, "_import_3", 1, 0); + try ctx.addSymbol(gpa, "_import_4", 2, 0); + try ctx.addSymbol(gpa, "_import_5_weak", 2, macho.N_WEAK_REF); + try ctx.addSymbol(gpa, "_import_6", 2, 0); + return ctx; +} + +test "bind - no entries" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.finalize(gpa, test_context); + try testing.expectEqual(@as(u64, 0), bind.size()); +} + +test "bind - single entry" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x10, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0x10, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} + +test "bind - multiple occurrences within the same segment" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x10, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x18, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x20, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x28, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0x10, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} + +test "bind - multiple occurrences with skip and addend" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x0, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x10, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x20, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x30, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0x10, + }); + + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_SET_ADDEND_SLEB, + 0x10, + macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, + 0x4, + 0x8, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} + +test "bind - complex" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x58, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x100, + .segment_id = 1, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x110, + .segment_id = 1, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x130, + .segment_id = 1, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x140, + .segment_id = 1, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x148, + .segment_id = 1, + .target = TestContext.Target{ .index = 2 }, + .addend = 0, + }); + + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0x58, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x32, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_SET_ADDEND_SLEB, + 0x10, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0xa0, + 0x1, + macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x8, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0x10, + macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x8, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x33, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_SET_ADDEND_SLEB, + 0x0, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0xf8, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0x1, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} + +test "lazy bind" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = LazyBind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x10, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x20, + .segment_id = 2, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x10, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 2, + 0x20, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x32, + 0x0, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_SET_ADDEND_SLEB, + 0x10, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} diff --git a/src/archive/archive/zld/MachO/eh_frame.zig b/src/archive/archive/zld/MachO/eh_frame.zig new file mode 100644 index 000000000000..59b797caaa0e --- /dev/null +++ b/src/archive/archive/zld/MachO/eh_frame.zig @@ -0,0 +1,621 @@ +const std = @import("std"); +const assert = std.debug.assert; +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const leb = std.leb; +const log = std.log.scoped(.eh_frame); + +const Allocator = mem.Allocator; +const AtomIndex = MachO.AtomIndex; +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const UnwindInfo = @import("UnwindInfo.zig"); + +pub fn scanRelocs(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + + for (macho_file.objects.items, 0..) |*object, object_id| { + var cies = std.AutoHashMap(u32, void).init(gpa); + defer cies.deinit(); + + var it = object.getEhFrameRecordsIterator(); + + for (object.exec_atoms.items) |atom_index| { + const fde_offset = object.eh_frame_records_lookup.get(atom_index) orelse continue; + if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; + it.seekTo(fde_offset); + const fde = (try it.next()).?; + + const cie_ptr = fde.getCiePointer(); + const cie_offset = fde_offset + 4 - cie_ptr; + + if (!cies.contains(cie_offset)) { + try cies.putNoClobber(cie_offset, {}); + it.seekTo(cie_offset); + const cie = (try it.next()).?; + try cie.scanRelocs(macho_file, @as(u32, @intCast(object_id)), cie_offset); + } + } + } +} + +pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) !void { + const sect_id = macho_file.getSectionByName("__TEXT", "__eh_frame") orelse return; + const sect = &macho_file.sections.items(.header)[sect_id]; + sect.@"align" = 3; + sect.size = 0; + + const cpu_arch = macho_file.options.target.cpu_arch.?; + const gpa = macho_file.base.allocator; + var size: u32 = 0; + + for (macho_file.objects.items) |*object| { + var cies = std.AutoHashMap(u32, u32).init(gpa); + defer cies.deinit(); + + var eh_it = object.getEhFrameRecordsIterator(); + + for (object.exec_atoms.items) |atom_index| { + const fde_record_offset = object.eh_frame_records_lookup.get(atom_index) orelse continue; + if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue; + + const record_id = unwind_info.records_lookup.get(atom_index) orelse continue; + const record = unwind_info.records.items[record_id]; + + // TODO skip this check if no __compact_unwind is present + const is_dwarf = UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); + if (!is_dwarf) continue; + + eh_it.seekTo(fde_record_offset); + const source_fde_record = (try eh_it.next()).?; + + const cie_ptr = source_fde_record.getCiePointer(); + const cie_offset = fde_record_offset + 4 - cie_ptr; + + const gop = try cies.getOrPut(cie_offset); + if (!gop.found_existing) { + eh_it.seekTo(cie_offset); + const source_cie_record = (try eh_it.next()).?; + gop.value_ptr.* = size; + size += source_cie_record.getSize(); + } + + size += source_fde_record.getSize(); + } + } + + sect.size = size; +} + +pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void { + const sect_id = macho_file.getSectionByName("__TEXT", "__eh_frame") orelse return; + const sect = macho_file.sections.items(.header)[sect_id]; + const seg_id = macho_file.sections.items(.segment_index)[sect_id]; + const seg = macho_file.segments.items[seg_id]; + + const cpu_arch = macho_file.options.target.cpu_arch.?; + + const gpa = macho_file.base.allocator; + var eh_records = std.AutoArrayHashMap(u32, EhFrameRecord(true)).init(gpa); + defer { + for (eh_records.values()) |*rec| { + rec.deinit(gpa); + } + eh_records.deinit(); + } + + var eh_frame_offset: u32 = 0; + + for (macho_file.objects.items, 0..) |*object, object_id| { + try eh_records.ensureUnusedCapacity(2 * @as(u32, @intCast(object.exec_atoms.items.len))); + + var cies = std.AutoHashMap(u32, u32).init(gpa); + defer cies.deinit(); + + var eh_it = object.getEhFrameRecordsIterator(); + + for (object.exec_atoms.items) |atom_index| { + const fde_record_offset = object.eh_frame_records_lookup.get(atom_index) orelse continue; + if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue; + + const record_id = unwind_info.records_lookup.get(atom_index) orelse continue; + const record = &unwind_info.records.items[record_id]; + + // TODO skip this check if no __compact_unwind is present + const is_dwarf = UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); + if (!is_dwarf) continue; + + eh_it.seekTo(fde_record_offset); + const source_fde_record = (try eh_it.next()).?; + + const cie_ptr = source_fde_record.getCiePointer(); + const cie_offset = fde_record_offset + 4 - cie_ptr; + + const gop = try cies.getOrPut(cie_offset); + if (!gop.found_existing) { + eh_it.seekTo(cie_offset); + const source_cie_record = (try eh_it.next()).?; + var cie_record = try source_cie_record.toOwned(gpa); + try cie_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ + .source_offset = cie_offset, + .out_offset = eh_frame_offset, + .sect_addr = sect.addr, + }); + eh_records.putAssumeCapacityNoClobber(eh_frame_offset, cie_record); + gop.value_ptr.* = eh_frame_offset; + eh_frame_offset += cie_record.getSize(); + } + + var fde_record = try source_fde_record.toOwned(gpa); + fde_record.setCiePointer(eh_frame_offset + 4 - gop.value_ptr.*); + try fde_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ + .source_offset = fde_record_offset, + .out_offset = eh_frame_offset, + .sect_addr = sect.addr, + }); + + switch (cpu_arch) { + .aarch64 => {}, // relocs take care of LSDA pointers + .x86_64 => { + // We need to relocate target symbol address ourselves. + const atom = macho_file.getAtom(atom_index); + const atom_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + try fde_record.setTargetSymbolAddress(atom_sym.n_value, .{ + .base_addr = sect.addr, + .base_offset = eh_frame_offset, + }); + + // We need to parse LSDA pointer and relocate ourselves. + const cie_record = eh_records.get( + eh_frame_offset + 4 - fde_record.getCiePointer(), + ).?; + const eh_frame_sect = object.getSourceSection(object.eh_frame_sect_id.?); + const source_lsda_ptr = try fde_record.getLsdaPointer(cie_record, .{ + .base_addr = eh_frame_sect.addr, + .base_offset = fde_record_offset, + }); + if (source_lsda_ptr) |ptr| { + const sym_index = object.getSymbolByAddress(ptr, null); + const sym = object.symtab[sym_index]; + try fde_record.setLsdaPointer(cie_record, sym.n_value, .{ + .base_addr = sect.addr, + .base_offset = eh_frame_offset, + }); + } + }, + else => unreachable, + } + + eh_records.putAssumeCapacityNoClobber(eh_frame_offset, fde_record); + + UnwindInfo.UnwindEncoding.setDwarfSectionOffset( + &record.compactUnwindEncoding, + cpu_arch, + @as(u24, @intCast(eh_frame_offset)), + ); + + const cie_record = eh_records.get( + eh_frame_offset + 4 - fde_record.getCiePointer(), + ).?; + const lsda_ptr = try fde_record.getLsdaPointer(cie_record, .{ + .base_addr = sect.addr, + .base_offset = eh_frame_offset, + }); + if (lsda_ptr) |ptr| { + record.lsda = ptr - seg.vmaddr; + } + + eh_frame_offset += fde_record.getSize(); + } + } + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + const writer = buffer.writer(); + + for (eh_records.values()) |record| { + try writer.writeIntLittle(u32, record.size); + try buffer.appendSlice(record.data); + } + + try macho_file.base.file.pwriteAll(buffer.items, sect.offset); +} +const EhFrameRecordTag = enum { cie, fde }; + +pub fn EhFrameRecord(comptime is_mutable: bool) type { + return struct { + tag: EhFrameRecordTag, + size: u32, + data: if (is_mutable) []u8 else []const u8, + + const Record = @This(); + + pub fn deinit(rec: *Record, gpa: Allocator) void { + comptime assert(is_mutable); + gpa.free(rec.data); + } + + pub fn toOwned(rec: Record, gpa: Allocator) Allocator.Error!EhFrameRecord(true) { + const data = try gpa.dupe(u8, rec.data); + return EhFrameRecord(true){ + .tag = rec.tag, + .size = rec.size, + .data = data, + }; + } + + pub inline fn getSize(rec: Record) u32 { + return 4 + rec.size; + } + + pub fn scanRelocs( + rec: Record, + macho_file: *MachO, + object_id: u32, + source_offset: u32, + ) !void { + if (rec.getPersonalityPointerReloc(macho_file, object_id, source_offset)) |target| { + try Atom.addGotEntry(macho_file, target); + } + } + + pub fn getTargetSymbolAddress(rec: Record, ctx: struct { + base_addr: u64, + base_offset: u64, + }) u64 { + assert(rec.tag == .fde); + const addend = mem.readIntLittle(i64, rec.data[4..][0..8]); + return @as(u64, @intCast(@as(i64, @intCast(ctx.base_addr + ctx.base_offset + 8)) + addend)); + } + + pub fn setTargetSymbolAddress(rec: *Record, value: u64, ctx: struct { + base_addr: u64, + base_offset: u64, + }) !void { + assert(rec.tag == .fde); + const addend = @as(i64, @intCast(value)) - @as(i64, @intCast(ctx.base_addr + ctx.base_offset + 8)); + mem.writeIntLittle(i64, rec.data[4..][0..8], addend); + } + + pub fn getPersonalityPointerReloc( + rec: Record, + macho_file: *MachO, + object_id: u32, + source_offset: u32, + ) ?MachO.SymbolWithLoc { + const cpu_arch = macho_file.options.target.cpu_arch.?; + const relocs = getRelocs(macho_file, object_id, source_offset); + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + switch (rel_type) { + .ARM64_RELOC_SUBTRACTOR, + .ARM64_RELOC_UNSIGNED, + => continue, + .ARM64_RELOC_POINTER_TO_GOT => {}, + else => unreachable, + } + }, + .x86_64 => { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + switch (rel_type) { + .X86_64_RELOC_GOT => {}, + else => unreachable, + } + }, + else => unreachable, + } + const target = UnwindInfo.parseRelocTarget( + macho_file, + object_id, + rel, + rec.data, + @as(i32, @intCast(source_offset)) + 4, + ); + return target; + } + return null; + } + + pub fn relocate(rec: *Record, macho_file: *MachO, object_id: u32, ctx: struct { + source_offset: u32, + out_offset: u32, + sect_addr: u64, + }) !void { + comptime assert(is_mutable); + + const cpu_arch = macho_file.options.target.cpu_arch.?; + const relocs = getRelocs(macho_file, object_id, ctx.source_offset); + + for (relocs) |rel| { + const target = UnwindInfo.parseRelocTarget( + macho_file, + object_id, + rel, + rec.data, + @as(i32, @intCast(ctx.source_offset)) + 4, + ); + const rel_offset = @as(u32, @intCast(rel.r_address - @as(i32, @intCast(ctx.source_offset)) - 4)); + const source_addr = ctx.sect_addr + rel_offset + ctx.out_offset + 4; + + switch (cpu_arch) { + .aarch64 => { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + switch (rel_type) { + .ARM64_RELOC_SUBTRACTOR => { + // Address of the __eh_frame in the source object file + }, + .ARM64_RELOC_POINTER_TO_GOT => { + const target_addr = try Atom.getRelocTargetAddress(macho_file, target, true, false); + const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse + return error.Overflow; + mem.writeIntLittle(i32, rec.data[rel_offset..][0..4], result); + }, + .ARM64_RELOC_UNSIGNED => { + assert(rel.r_extern == 1); + const target_addr = try Atom.getRelocTargetAddress(macho_file, target, false, false); + const result = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); + mem.writeIntLittle(i64, rec.data[rel_offset..][0..8], @as(i64, @intCast(result))); + }, + else => unreachable, + } + }, + .x86_64 => { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + switch (rel_type) { + .X86_64_RELOC_GOT => { + const target_addr = try Atom.getRelocTargetAddress(macho_file, target, true, false); + const addend = mem.readIntLittle(i32, rec.data[rel_offset..][0..4]); + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + mem.writeIntLittle(i32, rec.data[rel_offset..][0..4], disp); + }, + else => unreachable, + } + }, + else => unreachable, + } + } + } + + pub fn getCiePointer(rec: Record) u32 { + assert(rec.tag == .fde); + return mem.readIntLittle(u32, rec.data[0..4]); + } + + pub fn setCiePointer(rec: *Record, ptr: u32) void { + assert(rec.tag == .fde); + mem.writeIntLittle(u32, rec.data[0..4], ptr); + } + + pub fn getAugmentationString(rec: Record) []const u8 { + assert(rec.tag == .cie); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(rec.data.ptr + 5)), 0); + } + + pub fn getPersonalityPointer(rec: Record, ctx: struct { + base_addr: u64, + base_offset: u64, + }) !?u64 { + assert(rec.tag == .cie); + const aug_str = rec.getAugmentationString(); + + var stream = std.io.fixedBufferStream(rec.data[9 + aug_str.len ..]); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + for (aug_str, 0..) |ch, i| switch (ch) { + 'z' => if (i > 0) { + return error.MalformedAugmentationString; + } else { + _ = try leb.readULEB128(u64, reader); + }, + 'R' => { + _ = try reader.readByte(); + }, + 'P' => { + const enc = try reader.readByte(); + const offset = ctx.base_offset + 13 + aug_str.len + creader.bytes_read; + const ptr = try getEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), reader); + return ptr; + }, + 'L' => { + _ = try reader.readByte(); + }, + 'S', 'B', 'G' => {}, + else => return error.UnknownAugmentationStringValue, + }; + + return null; + } + + pub fn getLsdaPointer(rec: Record, cie: Record, ctx: struct { + base_addr: u64, + base_offset: u64, + }) !?u64 { + assert(rec.tag == .fde); + const enc = (try cie.getLsdaEncoding()) orelse return null; + var stream = std.io.fixedBufferStream(rec.data[20..]); + const reader = stream.reader(); + _ = try reader.readByte(); + const offset = ctx.base_offset + 25; + const ptr = try getEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), reader); + return ptr; + } + + pub fn setLsdaPointer(rec: *Record, cie: Record, value: u64, ctx: struct { + base_addr: u64, + base_offset: u64, + }) !void { + assert(rec.tag == .fde); + const enc = (try cie.getLsdaEncoding()) orelse unreachable; + var stream = std.io.fixedBufferStream(rec.data[21..]); + const writer = stream.writer(); + const offset = ctx.base_offset + 25; + try setEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), value, writer); + } + + fn getLsdaEncoding(rec: Record) !?u8 { + assert(rec.tag == .cie); + const aug_str = rec.getAugmentationString(); + + const base_offset = 9 + aug_str.len; + var stream = std.io.fixedBufferStream(rec.data[base_offset..]); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + for (aug_str, 0..) |ch, i| switch (ch) { + 'z' => if (i > 0) { + return error.MalformedAugmentationString; + } else { + _ = try leb.readULEB128(u64, reader); + }, + 'R' => { + _ = try reader.readByte(); + }, + 'P' => { + const enc = try reader.readByte(); + _ = try getEncodedPointer(enc, 0, reader); + }, + 'L' => { + const enc = try reader.readByte(); + return enc; + }, + 'S', 'B', 'G' => {}, + else => return error.UnknownAugmentationStringValue, + }; + + return null; + } + + fn getEncodedPointer(enc: u8, pcrel_offset: i64, reader: anytype) !?u64 { + if (enc == EH_PE.omit) return null; + + var ptr: i64 = switch (enc & 0x0F) { + EH_PE.absptr => @as(i64, @bitCast(try reader.readIntLittle(u64))), + EH_PE.udata2 => @as(i16, @bitCast(try reader.readIntLittle(u16))), + EH_PE.udata4 => @as(i32, @bitCast(try reader.readIntLittle(u32))), + EH_PE.udata8 => @as(i64, @bitCast(try reader.readIntLittle(u64))), + EH_PE.uleb128 => @as(i64, @bitCast(try leb.readULEB128(u64, reader))), + EH_PE.sdata2 => try reader.readIntLittle(i16), + EH_PE.sdata4 => try reader.readIntLittle(i32), + EH_PE.sdata8 => try reader.readIntLittle(i64), + EH_PE.sleb128 => try leb.readILEB128(i64, reader), + else => return null, + }; + + switch (enc & 0x70) { + EH_PE.absptr => {}, + EH_PE.pcrel => ptr += pcrel_offset, + EH_PE.datarel, + EH_PE.textrel, + EH_PE.funcrel, + EH_PE.aligned, + => return null, + else => return null, + } + + return @as(u64, @bitCast(ptr)); + } + + fn setEncodedPointer(enc: u8, pcrel_offset: i64, value: u64, writer: anytype) !void { + if (enc == EH_PE.omit) return; + + var actual = @as(i64, @intCast(value)); + + switch (enc & 0x70) { + EH_PE.absptr => {}, + EH_PE.pcrel => actual -= pcrel_offset, + EH_PE.datarel, + EH_PE.textrel, + EH_PE.funcrel, + EH_PE.aligned, + => unreachable, + else => unreachable, + } + + switch (enc & 0x0F) { + EH_PE.absptr => try writer.writeIntLittle(u64, @as(u64, @bitCast(actual))), + EH_PE.udata2 => try writer.writeIntLittle(u16, @as(u16, @bitCast(@as(i16, @intCast(actual))))), + EH_PE.udata4 => try writer.writeIntLittle(u32, @as(u32, @bitCast(@as(i32, @intCast(actual))))), + EH_PE.udata8 => try writer.writeIntLittle(u64, @as(u64, @bitCast(actual))), + EH_PE.uleb128 => try leb.writeULEB128(writer, @as(u64, @bitCast(actual))), + EH_PE.sdata2 => try writer.writeIntLittle(i16, @as(i16, @intCast(actual))), + EH_PE.sdata4 => try writer.writeIntLittle(i32, @as(i32, @intCast(actual))), + EH_PE.sdata8 => try writer.writeIntLittle(i64, actual), + EH_PE.sleb128 => try leb.writeILEB128(writer, actual), + else => unreachable, + } + } + }; +} + +pub fn getRelocs(macho_file: *MachO, object_id: u32, source_offset: u32) []const macho.relocation_info { + const object = &macho_file.objects.items[object_id]; + assert(object.hasEhFrameRecords()); + const urel = object.eh_frame_relocs_lookup.get(source_offset) orelse + return &[0]macho.relocation_info{}; + const all_relocs = object.getRelocs(object.eh_frame_sect_id.?); + return all_relocs[urel.reloc.start..][0..urel.reloc.len]; +} + +pub const Iterator = struct { + data: []const u8, + pos: u32 = 0, + + pub fn next(it: *Iterator) !?EhFrameRecord(false) { + if (it.pos >= it.data.len) return null; + + var stream = std.io.fixedBufferStream(it.data[it.pos..]); + const reader = stream.reader(); + + var size = try reader.readIntLittle(u32); + if (size == 0xFFFFFFFF) { + log.err("MachO doesn't support 64bit DWARF CFI __eh_frame records", .{}); + return error.UnsupportedDwarfCfiFormat; + } + + const id = try reader.readIntLittle(u32); + const tag: EhFrameRecordTag = if (id == 0) .cie else .fde; + const offset: u32 = 4; + const record = EhFrameRecord(false){ + .tag = tag, + .size = size, + .data = it.data[it.pos + offset ..][0..size], + }; + + it.pos += size + offset; + + return record; + } + + pub fn reset(it: *Iterator) void { + it.pos = 0; + } + + pub fn seekTo(it: *Iterator, pos: u32) void { + assert(pos >= 0 and pos < it.data.len); + it.pos = pos; + } +}; + +pub const EH_PE = struct { + pub const absptr = 0x00; + pub const uleb128 = 0x01; + pub const udata2 = 0x02; + pub const udata4 = 0x03; + pub const udata8 = 0x04; + pub const sleb128 = 0x09; + pub const sdata2 = 0x0A; + pub const sdata4 = 0x0B; + pub const sdata8 = 0x0C; + pub const pcrel = 0x10; + pub const textrel = 0x20; + pub const datarel = 0x30; + pub const funcrel = 0x40; + pub const aligned = 0x50; + pub const indirect = 0x80; + pub const omit = 0xFF; +}; diff --git a/src/archive/archive/zld/MachO/fat.zig b/src/archive/archive/zld/MachO/fat.zig new file mode 100644 index 000000000000..4a5a30d1a1d4 --- /dev/null +++ b/src/archive/archive/zld/MachO/fat.zig @@ -0,0 +1,55 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const log = std.log.scoped(.macho); +const macho = std.macho; +const mem = std.mem; +const native_endian = builtin.target.cpu.arch.endian(); + +pub fn decodeArch(cputype: macho.cpu_type_t, comptime logError: bool) !std.Target.Cpu.Arch { + const cpu_arch: std.Target.Cpu.Arch = switch (cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => { + if (logError) { + log.err("unsupported cpu architecture 0x{x}", .{cputype}); + } + return error.UnsupportedCpuArchitecture; + }, + }; + return cpu_arch; +} + +fn readFatStruct(reader: anytype, comptime T: type) !T { + // Fat structures (fat_header & fat_arch) are always written and read to/from + // disk in big endian order. + var res = try reader.readStruct(T); + if (native_endian != std.builtin.Endian.Big) { + mem.byteSwapAllFields(T, &res); + } + return res; +} + +pub fn getLibraryOffset(reader: anytype, cpu_arch: std.Target.Cpu.Arch) !u64 { + const fat_header = try readFatStruct(reader, macho.fat_header); + if (fat_header.magic != macho.FAT_MAGIC) return 0; + + var fat_arch_index: u32 = 0; + while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) { + const fat_arch = try readFatStruct(reader, macho.fat_arch); + // If we come across an architecture that we do not know how to handle, that's + // fine because we can keep looking for one that might match. + const lib_arch = decodeArch(fat_arch.cputype, false) catch |err| switch (err) { + error.UnsupportedCpuArchitecture => continue, + else => |e| return e, + }; + if (lib_arch == cpu_arch) { + // We have found a matching architecture! + return fat_arch.offset; + } + } else { + log.err("Could not find matching cpu architecture in fat library: expected {s}", .{ + @tagName(cpu_arch), + }); + return error.MismatchedCpuArchitecture; + } +} diff --git a/src/archive/archive/zld/MachO/load_commands.zig b/src/archive/archive/zld/MachO/load_commands.zig new file mode 100644 index 000000000000..3e1e6515acdd --- /dev/null +++ b/src/archive/archive/zld/MachO/load_commands.zig @@ -0,0 +1,312 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Dylib = @import("Dylib.zig"); +const Options = @import("../MachO.zig").Options; + +pub const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; + +fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { + const darwin_path_max = 1024; + const name_len = if (assume_max_path_len) darwin_path_max else name.len + 1; + return mem.alignForwardGeneric(u64, cmd_size + name_len, @alignOf(u64)); +} + +const CalcLCsSizeCtx = struct { + segments: []const macho.segment_command_64, + dylibs: []const Dylib, + referenced_dylibs: []u16, + wants_function_starts: bool = true, +}; + +fn calcLCsSize(gpa: Allocator, options: *const Options, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { + var has_text_segment: bool = false; + var sizeofcmds: u64 = 0; + for (ctx.segments) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); + if (mem.eql(u8, seg.segName(), "__TEXT")) { + has_text_segment = true; + } + } + + // LC_DYLD_INFO_ONLY + sizeofcmds += @sizeOf(macho.dyld_info_command); + // LC_FUNCTION_STARTS + if (has_text_segment and ctx.wants_function_starts) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + // LC_LOAD_DYLINKER + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylinker_command), + mem.sliceTo(default_dyld_path, 0), + false, + ); + // LC_MAIN + if (options.output_mode == .exe) { + sizeofcmds += @sizeOf(macho.entry_point_command); + } + // LC_ID_DYLIB + if (options.output_mode == .lib) { + sizeofcmds += blk: { + const emit = options.emit; + const install_name = options.install_name orelse emit.sub_path; + break :blk calcInstallNameLen( + @sizeOf(macho.dylib_command), + install_name, + assume_max_path_len, + ); + }; + } + // LC_RPATH + { + var it = RpathIterator.init(gpa, options.rpath_list); + defer it.deinit(); + while (try it.next()) |rpath| { + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.rpath_command), + rpath, + assume_max_path_len, + ); + } + } + // LC_SOURCE_VERSION + sizeofcmds += @sizeOf(macho.source_version_command); + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + // LC_UUID + sizeofcmds += @sizeOf(macho.uuid_command); + // LC_LOAD_DYLIB + for (ctx.referenced_dylibs) |id| { + const dylib = ctx.dylibs[id]; + const dylib_id = dylib.id orelse unreachable; + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylib_command), + dylib_id.name, + assume_max_path_len, + ); + } + // LC_CODE_SIGNATURE + { + const target = options.target; + const requires_codesig = blk: { + if (options.entitlements) |_| break :blk true; + if (target.cpu_arch.? == .aarch64 and (target.os_tag.? == .macos or target.abi.? == .simulator)) + break :blk true; + break :blk false; + }; + if (requires_codesig) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + } + + return @as(u32, @intCast(sizeofcmds)); +} + +pub fn calcMinHeaderPad(gpa: Allocator, options: *const Options, ctx: CalcLCsSizeCtx) !u64 { + var padding: u32 = (try calcLCsSize(gpa, options, ctx, false)) + (options.headerpad orelse 0); + log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); + + if (options.headerpad_max_install_names) { + var min_headerpad_size: u32 = try calcLCsSize(gpa, options, ctx, true); + log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ + min_headerpad_size + @sizeOf(macho.mach_header_64), + }); + padding = @max(padding, min_headerpad_size); + } + + const offset = @sizeOf(macho.mach_header_64) + padding; + log.debug("actual headerpad size 0x{x}", .{offset}); + + return offset; +} + +pub fn calcNumOfLCs(lc_buffer: []const u8) u32 { + var ncmds: u32 = 0; + var pos: usize = 0; + while (true) { + if (pos >= lc_buffer.len) break; + const cmd = @as(*align(1) const macho.load_command, @ptrCast(lc_buffer.ptr + pos)).*; + ncmds += 1; + pos += cmd.cmdsize; + } + return ncmds; +} + +pub fn writeDylinkerLC(lc_writer: anytype) !void { + const name_len = mem.sliceTo(default_dyld_path, 0).len; + const cmdsize = @as(u32, @intCast(mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + name_len, + @sizeOf(u64), + ))); + try lc_writer.writeStruct(macho.dylinker_command{ + .cmd = .LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); + const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } +} + +const WriteDylibLCCtx = struct { + cmd: macho.LC, + name: []const u8, + timestamp: u32 = 2, + current_version: u32 = 0x10000, + compatibility_version: u32 = 0x10000, +}; + +fn writeDylibLC(ctx: WriteDylibLCCtx, lc_writer: anytype) !void { + const name_len = ctx.name.len + 1; + const cmdsize = @as(u32, @intCast(mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylib_command) + name_len, + @sizeOf(u64), + ))); + try lc_writer.writeStruct(macho.dylib_command{ + .cmd = ctx.cmd, + .cmdsize = cmdsize, + .dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = ctx.timestamp, + .current_version = ctx.current_version, + .compatibility_version = ctx.compatibility_version, + }, + }); + try lc_writer.writeAll(ctx.name); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } +} + +pub fn writeDylibIdLC(options: *const Options, lc_writer: anytype) !void { + assert(options.output_mode == .lib); + const emit = options.emit; + const install_name = options.install_name orelse emit.sub_path; + const curr = options.current_version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + const compat = options.compatibility_version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + try writeDylibLC(.{ + .cmd = .ID_DYLIB, + .name = install_name, + .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, + .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, + }, lc_writer); +} + +const RpathIterator = struct { + buffer: []const []const u8, + table: std.StringHashMap(void), + count: usize = 0, + + fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { + return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; + } + + fn deinit(it: *RpathIterator) void { + it.table.deinit(); + } + + fn next(it: *RpathIterator) !?[]const u8 { + while (true) { + if (it.count >= it.buffer.len) return null; + const rpath = it.buffer[it.count]; + it.count += 1; + const gop = try it.table.getOrPut(rpath); + if (gop.found_existing) continue; + return rpath; + } + } +}; + +pub fn writeRpathLCs(gpa: Allocator, options: *const Options, lc_writer: anytype) !void { + var it = RpathIterator.init(gpa, options.rpath_list); + defer it.deinit(); + + while (try it.next()) |rpath| { + const rpath_len = rpath.len + 1; + const cmdsize = @as(u32, @intCast(mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath_len, + @sizeOf(u64), + ))); + try lc_writer.writeStruct(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + try lc_writer.writeAll(rpath); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + } +} + +pub fn writeBuildVersionLC(options: *const Options, lc_writer: anytype) !void { + const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + const platform_version = blk: { + const ver = options.platform_version; + const platform_version = ver.major << 16 | ver.minor << 8; + break :blk platform_version; + }; + const sdk_version = blk: { + const ver = options.sdk_version; + const sdk_version = ver.major << 16 | ver.minor << 8; + break :blk sdk_version; + }; + const is_simulator_abi = options.target.abi.? == .simulator; + try lc_writer.writeStruct(macho.build_version_command{ + .cmdsize = cmdsize, + .platform = switch (options.target.os_tag.?) { + .macos => .MACOS, + .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, + .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, + .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, + else => unreachable, + }, + .minos = platform_version, + .sdk = sdk_version, + .ntools = 1, + }); + try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = .LD, + .version = 0x0, + })); +} + +pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, lc_writer: anytype) !void { + for (referenced) |index| { + const dylib = dylibs[index]; + const dylib_id = dylib.id orelse unreachable; + try writeDylibLC(.{ + .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, + .name = dylib_id.name, + .timestamp = dylib_id.timestamp, + .current_version = dylib_id.current_version, + .compatibility_version = dylib_id.compatibility_version, + }, lc_writer); + } +} diff --git a/src/archive/archive/zld/MachO/thunks.zig b/src/archive/archive/zld/MachO/thunks.zig new file mode 100644 index 000000000000..62a66c8bf1f5 --- /dev/null +++ b/src/archive/archive/zld/MachO/thunks.zig @@ -0,0 +1,355 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.thunks); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const aarch64 = @import("../aarch64.zig"); + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const AtomIndex = MachO.AtomIndex; +const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; + +pub const ThunkIndex = u32; + +/// Branch instruction has 26 bits immediate but 4 byte aligned. +const jump_bits = @bitSizeOf(i28); + +const max_distance = (1 << (jump_bits - 1)); + +/// A branch will need an extender if its target is larger than +/// `2^(jump_bits - 1) - margin` where margin is some arbitrary number. +/// mold uses 5MiB margin, while ld64 uses 4MiB margin. We will follow mold +/// and assume margin to be 5MiB. +const max_allowed_distance = max_distance - 0x500_000; + +pub const Thunk = struct { + start_index: AtomIndex, + len: u32, + + lookup: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, AtomIndex) = .{}, + + pub fn deinit(self: *Thunk, gpa: Allocator) void { + self.lookup.deinit(gpa); + } + + pub fn getStartAtomIndex(self: Thunk) AtomIndex { + assert(self.len != 0); + return self.start_index; + } + + pub fn getEndAtomIndex(self: Thunk) AtomIndex { + assert(self.len != 0); + return self.start_index + self.len - 1; + } + + pub fn getSize(self: Thunk) u64 { + return 12 * self.len; + } + + pub fn getAlignment() u32 { + return @alignOf(u32); + } + + pub fn getTrampolineForSymbol(self: Thunk, macho_file: *MachO, target: SymbolWithLoc) ?SymbolWithLoc { + const atom_index = self.lookup.get(target) orelse return null; + const atom = macho_file.getAtom(atom_index); + return atom.getSymbolWithLoc(); + } +}; + +pub fn createThunks(macho_file: *MachO, sect_id: u8) !void { + const header = &macho_file.sections.items(.header)[sect_id]; + if (header.size == 0) return; + + const gpa = macho_file.base.allocator; + const first_atom_index = macho_file.sections.items(.first_atom_index)[sect_id]; + assert(first_atom_index != 0); + + header.size = 0; + header.@"align" = 0; + + var atom_count: u32 = 0; + + { + var atom_index = first_atom_index; + while (true) { + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = 0; + atom_count += 1; + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + var allocated = std.AutoHashMap(AtomIndex, void).init(gpa); + defer allocated.deinit(); + try allocated.ensureTotalCapacity(atom_count); + + var group_start = first_atom_index; + var group_end = first_atom_index; + var offset: u64 = 0; + + while (true) { + const group_start_atom = macho_file.getAtom(group_start); + + while (true) { + const atom = macho_file.getAtom(group_end); + offset = mem.alignForwardGeneric(u64, offset, try math.powi(u32, 2, atom.alignment)); + + const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = offset; + offset += atom.size; + + macho_file.logAtom(group_end, log); + + header.@"align" = @max(header.@"align", atom.alignment); + + allocated.putAssumeCapacityNoClobber(group_end, {}); + + const group_start_sym = macho_file.getSymbol(group_start_atom.getSymbolWithLoc()); + if (offset - group_start_sym.n_value >= max_allowed_distance) break; + + if (atom.next_index) |next_index| { + group_end = next_index; + } else break; + } + + // Insert thunk at group_end + const thunk_index = @as(u32, @intCast(macho_file.thunks.items.len)); + try macho_file.thunks.append(gpa, .{ .start_index = undefined, .len = 0 }); + + // Scan relocs in the group and create trampolines for any unreachable callsite. + var atom_index = group_start; + while (true) { + const atom = macho_file.getAtom(atom_index); + try scanRelocs( + macho_file, + atom_index, + allocated, + thunk_index, + group_end, + ); + + if (atom_index == group_end) break; + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + + offset = mem.alignForwardGeneric(u64, offset, Thunk.getAlignment()); + allocateThunk(macho_file, thunk_index, offset, header); + offset += macho_file.thunks.items[thunk_index].getSize(); + + const thunk = macho_file.thunks.items[thunk_index]; + if (thunk.len == 0) { + const group_end_atom = macho_file.getAtom(group_end); + if (group_end_atom.next_index) |next_index| { + group_start = next_index; + group_end = next_index; + } else break; + } else { + const thunk_end_atom_index = thunk.getEndAtomIndex(); + const thunk_end_atom = macho_file.getAtom(thunk_end_atom_index); + if (thunk_end_atom.next_index) |next_index| { + group_start = next_index; + group_end = next_index; + } else break; + } + } + + header.size = @as(u32, @intCast(offset)); +} + +fn allocateThunk( + macho_file: *MachO, + thunk_index: ThunkIndex, + base_offset: u64, + header: *macho.section_64, +) void { + const thunk = macho_file.thunks.items[thunk_index]; + if (thunk.len == 0) return; + + const first_atom_index = thunk.getStartAtomIndex(); + const end_atom_index = thunk.getEndAtomIndex(); + + var atom_index = first_atom_index; + var offset = base_offset; + while (true) { + const atom = macho_file.getAtom(atom_index); + offset = mem.alignForwardGeneric(u64, offset, Thunk.getAlignment()); + + const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = offset; + offset += atom.size; + + macho_file.logAtom(atom_index, log); + + header.@"align" = @max(header.@"align", atom.alignment); + + if (end_atom_index == atom_index) break; + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } +} + +fn scanRelocs( + macho_file: *MachO, + atom_index: AtomIndex, + allocated: std.AutoHashMap(AtomIndex, void), + thunk_index: ThunkIndex, + group_end: AtomIndex, +) !void { + const atom = macho_file.getAtom(atom_index); + const object = macho_file.objects.items[atom.getFile().?]; + + const base_offset = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + break :blk @as(i32, @intCast(source_sym.n_value - source_sect.addr)); + } else 0; + + const relocs = Atom.getAtomRelocs(macho_file, atom_index); + for (relocs) |rel| { + if (!relocNeedsThunk(rel)) continue; + + const target = Atom.parseRelocTarget(macho_file, atom_index, rel); + if (isReachable(macho_file, atom_index, rel, base_offset, target, allocated)) continue; + + log.debug("{x}: source = {s}@{x}, target = {s}@{x} unreachable", .{ + rel.r_address - base_offset, + macho_file.getSymbolName(atom.getSymbolWithLoc()), + macho_file.getSymbol(atom.getSymbolWithLoc()).n_value, + macho_file.getSymbolName(target), + macho_file.getSymbol(target).n_value, + }); + + const gpa = macho_file.base.allocator; + const target_sym = macho_file.getSymbol(target); + + const actual_target: SymbolWithLoc = if (target_sym.undf()) blk: { + const stub_atom_index = macho_file.getStubsAtomIndexForSymbol(target).?; + break :blk .{ .sym_index = macho_file.getAtom(stub_atom_index).sym_index }; + } else target; + + const thunk = &macho_file.thunks.items[thunk_index]; + const gop = try thunk.lookup.getOrPut(gpa, actual_target); + if (!gop.found_existing) { + const thunk_atom_index = try createThunkAtom(macho_file); + gop.value_ptr.* = thunk_atom_index; + + const thunk_atom = macho_file.getAtomPtr(thunk_atom_index); + const end_atom_index = if (thunk.len == 0) group_end else thunk.getEndAtomIndex(); + const end_atom = macho_file.getAtomPtr(end_atom_index); + + if (end_atom.next_index) |first_after_index| { + const first_after_atom = macho_file.getAtomPtr(first_after_index); + first_after_atom.prev_index = thunk_atom_index; + thunk_atom.next_index = first_after_index; + } + + end_atom.next_index = thunk_atom_index; + thunk_atom.prev_index = end_atom_index; + + if (thunk.len == 0) { + thunk.start_index = thunk_atom_index; + } + + thunk.len += 1; + } + + try macho_file.thunk_table.put(gpa, atom_index, thunk_index); + } +} + +inline fn relocNeedsThunk(rel: macho.relocation_info) bool { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + return rel_type == .ARM64_RELOC_BRANCH26; +} + +fn isReachable( + macho_file: *MachO, + atom_index: AtomIndex, + rel: macho.relocation_info, + base_offset: i32, + target: SymbolWithLoc, + allocated: std.AutoHashMap(AtomIndex, void), +) bool { + if (macho_file.getStubsAtomIndexForSymbol(target)) |_| return false; + + const source_atom = macho_file.getAtom(atom_index); + const source_sym = macho_file.getSymbol(source_atom.getSymbolWithLoc()); + + const target_object = macho_file.objects.items[target.getFile().?]; + const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; + const target_atom = macho_file.getAtom(target_atom_index); + const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc()); + + if (source_sym.n_sect != target_sym.n_sect) return false; + + if (!allocated.contains(target_atom_index)) return false; + + const source_addr = source_sym.n_value + @as(u32, @intCast(rel.r_address - base_offset)); + const is_via_got = Atom.relocRequiresGot(macho_file, rel); + const target_addr = Atom.getRelocTargetAddress(macho_file, target, is_via_got, false) catch unreachable; + _ = Atom.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch + return false; + + return true; +} + +fn createThunkAtom(macho_file: *MachO) !AtomIndex { + const sym_index = try macho_file.allocateSymbol(); + const atom_index = try macho_file.createEmptyAtom(sym_index, @sizeOf(u32) * 3, 2); + const sym = macho_file.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = macho_file.getSectionByName("__TEXT", "__text") orelse unreachable; + sym.n_sect = sect_id + 1; + + return atom_index; +} + +fn getThunkIndex(macho_file: *MachO, atom_index: AtomIndex) ?ThunkIndex { + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + for (macho_file.thunks.items, 0..) |thunk, i| { + if (thunk.len == 0) continue; + + const thunk_atom_index = thunk.getStartAtomIndex(); + const thunk_atom = macho_file.getAtom(thunk_atom_index); + const thunk_sym = macho_file.getSymbol(thunk_atom.getSymbolWithLoc()); + const start_addr = thunk_sym.n_value; + const end_addr = start_addr + thunk.getSize(); + + if (start_addr <= sym.n_value and sym.n_value < end_addr) { + return @as(u32, @intCast(i)); + } + } + return null; +} + +pub fn writeThunkCode(macho_file: *MachO, atom_index: AtomIndex, writer: anytype) !void { + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const source_addr = sym.n_value; + const thunk = macho_file.thunks.items[getThunkIndex(macho_file, atom_index).?]; + const target_addr = for (thunk.lookup.keys()) |target| { + const target_atom_index = thunk.lookup.get(target).?; + if (atom_index == target_atom_index) break macho_file.getSymbol(target).n_value; + } else unreachable; + + const pages = Atom.calcNumberOfPages(source_addr, target_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); + const off = try Atom.calcPageOffset(target_addr, .arithmetic); + try writer.writeIntLittle(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32()); + try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); +} diff --git a/src/archive/archive/zld/ThreadPool.zig b/src/archive/archive/zld/ThreadPool.zig new file mode 100644 index 000000000000..758536317dc0 --- /dev/null +++ b/src/archive/archive/zld/ThreadPool.zig @@ -0,0 +1,155 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const ThreadPool = @This(); +const WaitGroup = @import("WaitGroup.zig"); + +mutex: std.Thread.Mutex = .{}, +cond: std.Thread.Condition = .{}, +run_queue: RunQueue = .{}, +is_running: bool = true, +allocator: std.mem.Allocator, +threads: []std.Thread, + +const RunQueue = std.SinglyLinkedList(Runnable); +const Runnable = struct { + runFn: RunProto, +}; + +const RunProto = switch (builtin.zig_backend) { + .stage1 => fn (*Runnable) void, + else => *const fn (*Runnable) void, +}; + +pub fn init(pool: *ThreadPool, allocator: std.mem.Allocator) !void { + pool.* = .{ + .allocator = allocator, + .threads = &[_]std.Thread{}, + }; + + if (builtin.single_threaded) { + return; + } + + const thread_count = std.math.max(1, std.Thread.getCpuCount() catch 1); + pool.threads = try allocator.alloc(std.Thread, thread_count); + errdefer allocator.free(pool.threads); + + // kill and join any threads we spawned previously on error. + var spawned: usize = 0; + errdefer pool.join(spawned); + + for (pool.threads) |*thread| { + thread.* = try std.Thread.spawn(.{}, worker, .{pool}); + spawned += 1; + } +} + +pub fn deinit(pool: *ThreadPool) void { + pool.join(pool.threads.len); // kill and join all threads. + pool.* = undefined; +} + +fn join(pool: *ThreadPool, spawned: usize) void { + if (builtin.single_threaded) { + return; + } + + { + pool.mutex.lock(); + defer pool.mutex.unlock(); + + // ensure future worker threads exit the dequeue loop + pool.is_running = false; + } + + // wake up any sleeping threads (this can be done outside the mutex) + // then wait for all the threads we know are spawned to complete. + pool.cond.broadcast(); + for (pool.threads[0..spawned]) |thread| { + thread.join(); + } + + pool.allocator.free(pool.threads); +} + +pub fn spawn(pool: *ThreadPool, comptime func: anytype, args: anytype) !void { + if (builtin.single_threaded) { + @call(.{}, func, args); + return; + } + + const Args = @TypeOf(args); + const Closure = struct { + arguments: Args, + pool: *ThreadPool, + run_node: RunQueue.Node = .{ .data = .{ .runFn = runFn } }, + + fn runFn(runnable: *Runnable) void { + const run_node = @fieldParentPtr(RunQueue.Node, "data", runnable); + const closure = @fieldParentPtr(@This(), "run_node", run_node); + @call(.auto, func, closure.arguments); + + // The thread pool's allocator is protected by the mutex. + const mutex = &closure.pool.mutex; + mutex.lock(); + defer mutex.unlock(); + + closure.pool.allocator.destroy(closure); + } + }; + + { + pool.mutex.lock(); + defer pool.mutex.unlock(); + + const closure = try pool.allocator.create(Closure); + closure.* = .{ + .arguments = args, + .pool = pool, + }; + + pool.run_queue.prepend(&closure.run_node); + } + + // Notify waiting threads outside the lock to try and keep the critical section small. + pool.cond.signal(); +} + +fn worker(pool: *ThreadPool) void { + pool.mutex.lock(); + defer pool.mutex.unlock(); + + while (true) { + while (pool.run_queue.popFirst()) |run_node| { + // Temporarily unlock the mutex in order to execute the run_node + pool.mutex.unlock(); + defer pool.mutex.lock(); + + const runFn = run_node.data.runFn; + runFn(&run_node.data); + } + + // Stop executing instead of waiting if the thread pool is no longer running. + if (pool.is_running) { + pool.cond.wait(&pool.mutex); + } else { + break; + } + } +} + +pub fn waitAndWork(pool: *ThreadPool, wait_group: *WaitGroup) void { + while (!wait_group.isDone()) { + if (blk: { + pool.mutex.lock(); + defer pool.mutex.unlock(); + break :blk pool.run_queue.popFirst(); + }) |run_node| { + run_node.data.runFn(&run_node.data); + continue; + } + + wait_group.wait(); + return; + } +} diff --git a/src/archive/archive/zld/WaitGroup.zig b/src/archive/archive/zld/WaitGroup.zig new file mode 100644 index 000000000000..c8be6658db76 --- /dev/null +++ b/src/archive/archive/zld/WaitGroup.zig @@ -0,0 +1,46 @@ +const std = @import("std"); +const Atomic = std.atomic.Atomic; +const assert = std.debug.assert; +const WaitGroup = @This(); + +const is_waiting: usize = 1 << 0; +const one_pending: usize = 1 << 1; + +state: Atomic(usize) = Atomic(usize).init(0), +event: std.Thread.ResetEvent = .{}, + +pub fn start(self: *WaitGroup) void { + const state = self.state.fetchAdd(one_pending, .Monotonic); + assert((state / one_pending) < (std.math.maxInt(usize) / one_pending)); +} + +pub fn finish(self: *WaitGroup) void { + const state = self.state.fetchSub(one_pending, .Release); + assert((state / one_pending) > 0); + + if (state == (one_pending | is_waiting)) { + self.state.fence(.Acquire); + self.event.set(); + } +} + +pub fn wait(self: *WaitGroup) void { + var state = self.state.fetchAdd(is_waiting, .Acquire); + assert(state & is_waiting == 0); + + if ((state / one_pending) > 0) { + self.event.wait(); + } +} + +pub fn reset(self: *WaitGroup) void { + self.state.store(0, .Monotonic); + self.event.reset(); +} + +pub fn isDone(wg: *WaitGroup) bool { + const state = wg.state.load(.Acquire); + assert(state & is_waiting == 0); + + return (state / one_pending) == 0; +} diff --git a/src/archive/archive/zld/Wasm.zig b/src/archive/archive/zld/Wasm.zig new file mode 100644 index 000000000000..26204f104606 --- /dev/null +++ b/src/archive/archive/zld/Wasm.zig @@ -0,0 +1,1957 @@ +//! Wasm represents the final binary +const Wasm = @This(); + +const std = @import("std"); +const Zld = @import("Zld.zig"); +const Atom = @import("Wasm/Atom.zig"); +const Object = @import("Wasm/Object.zig"); +const Archive = @import("Wasm/Archive.zig"); +const Symbol = @import("Wasm/Symbol.zig"); +const sections = @import("Wasm/sections.zig"); +const types = @import("Wasm/types.zig"); +pub const Options = @import("Wasm/Options.zig"); +const ThreadPool = @import("ThreadPool.zig"); + +const leb = std.leb; +const fs = std.fs; +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const mem = std.mem; + +const log = std.log.scoped(.wasm); + +base: Zld, +/// Configuration of the linker provided by the user +options: Options, +/// A list with references to objects we link to during `flush()` +objects: std.ArrayListUnmanaged(Object) = .{}, +/// A list of archive files which are lazily linked with the final binary. +/// Referencing a Symbol from any of its object files will cause the object +/// file to be linked into the final binary. +archives: std.ArrayListUnmanaged(Archive) = .{}, +/// A map of global names to their symbol location in an object file +global_symbols: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, +/// Contains all atoms that have been created, used to clean up +managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, +/// Maps atoms to their segment index +atoms: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, +/// Maps a symbol's location to an atom. This can be used to find meta +/// data of a symbol, such as its size, or its offset to perform a relocation. +/// Undefined (and synthetic) symbols do not have an Atom and therefore cannot be mapped. +symbol_atom: std.AutoHashMapUnmanaged(SymbolWithLoc, *Atom) = .{}, +/// All symbols created by the linker, rather than through +/// object files will be inserted in this list to manage them. +synthetic_symbols: std.StringArrayHashMapUnmanaged(Symbol) = .{}, +/// List of all symbol locations which have been resolved by the linker +/// and will be emit into the final binary. +resolved_symbols: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, void) = .{}, +/// Maps discarded symbols and their positions to the location of the symbol +/// it was resolved to. +discarded: std.AutoHashMapUnmanaged(SymbolWithLoc, SymbolWithLoc) = .{}, +/// Symbols that remain undefined after symbol resolution. +undefs: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, + +/// String table, used to deduplicate all symbol names +string_table: StringTable = .{}, + +// OUTPUT SECTIONS // +/// Output function signature types +func_types: sections.Types = .{}, +/// Output import section +imports: sections.Imports = .{}, +/// Output function section +functions: sections.Functions = .{}, +/// Output table section +tables: sections.Tables = .{}, +/// Output memory section, this will only be used when `options.import_memory` +/// is set to false. The limits will be set, based on the total data section size +/// and other configuration options. +memories: types.Memory = .{ .limits = .{ + .flags = 0, + .min = 0, + .max = null, +} }, +/// Output global section +globals: sections.Globals = .{}, +/// Output export section +exports: sections.Exports = .{}, +/// Output element section +elements: sections.Elements = .{}, +/// Features which are used by the resulting binary +used_features: FeatureSet = .{}, +/// Index to a function defining the entry of the wasm file +entry: ?u32 = null, +/// Output data section, keyed by the segment name +/// Represents non-synthetic section entries +/// Used for code, data and custom sections. +segments: std.ArrayListUnmanaged(Segment) = .{}, +/// Maps a data segment key (such as .rodata) to the index into `segments` +data_segments: std.StringArrayHashMapUnmanaged(u32) = .{}, + +/// Index into `atoms` that represents the code section +code_section_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_info' section. +debug_info_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_line' section. +debug_line_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_loc' section. +debug_loc_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_ranges' section. +debug_ranges_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_pubnames' section. +debug_pubnames_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_pubtypes' section. +debug_pubtypes_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_pubtypes' section. +debug_str_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_pubtypes' section. +debug_abbrev_index: ?u32 = null, + +/// List of initialization functions, these must be called in order of priority +/// by the synthetic __wasm_call_ctors function. +init_funcs: std.ArrayListUnmanaged(InitFuncLoc) = .{}, + +pub const Segment = struct { + alignment: u32, + size: u32, + offset: u32, + flags: u32, + + pub const Flag = enum(u32) { + WASM_DATA_SEGMENT_IS_PASSIVE = 0x01, + WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02, + }; + + /// For a given segment, determines if it needs passive initialization + fn needsPassiveInitialization(segment: Segment, import_mem: bool, name: []const u8) bool { + if (import_mem and !std.mem.eql(u8, name, ".bss")) { + return true; + } + return segment.isPassive(); + } + + pub inline fn isPassive(segment: Segment) bool { + return segment.flags & @intFromEnum(Flag.WASM_DATA_SEGMENT_IS_PASSIVE) != 0; + } +}; + +/// Contains the location of the function symbol, as well as +/// the priority itself of the initialization function. +pub const InitFuncLoc = struct { + file: u16, + index: u32, + priority: u32, + + /// From a given `InitFuncLoc` returns the corresponding function symbol + pub fn getSymbol(loc: InitFuncLoc, wasm: *const Wasm) *Symbol { + return getSymbolLoc(loc).getSymbol(wasm); + } + + /// Turns the given `InitFuncLoc` into a `SymbolWithLoc` + pub fn getSymbolLoc(loc: InitFuncLoc) SymbolWithLoc { + return .{ .file = loc.file, .sym_index = loc.index }; + } +}; + +/// Describes the location of a symbol +pub const SymbolWithLoc = struct { + /// Symbol entry index within the object/binary file + sym_index: u32, + /// When file is `null`, this symbol refers to a synthetic symbol. + file: ?u16, + + /// From a given location, find the corresponding symbol in the wasm binary. + pub fn getSymbol(loc: SymbolWithLoc, wasm: *const Wasm) *Symbol { + if (wasm.discarded.get(loc)) |new_loc| return new_loc.getSymbol(wasm); + + if (loc.file) |file_index| { + const object = wasm.objects.items[file_index]; + return &object.symtable[loc.sym_index]; + } + return &wasm.synthetic_symbols.values()[loc.sym_index]; + } + + /// From a given location, returns the name of the symbol. + pub fn getName(loc: SymbolWithLoc, wasm_bin: *const Wasm) []const u8 { + if (wasm_bin.discarded.get(loc)) |new_loc| { + return new_loc.getName(wasm_bin); + } + if (loc.file) |object_index| { + const object: Object = wasm_bin.objects.items[object_index]; + return object.string_table.get(object.symtable[loc.sym_index].name); + } + return wasm_bin.string_table.get(wasm_bin.synthetic_symbols.values()[loc.sym_index].name); + } + + /// From a given symbol location, returns the final location. + /// e.g. when a symbol was resolved and replaced by the symbol + /// in a different file, this will return said location. + /// If the symbol wasn't replaced by another, this will return + /// the given location itwasm. + pub fn finalLoc(loc: SymbolWithLoc, wasm_bin: *const Wasm) SymbolWithLoc { + if (wasm_bin.discarded.get(loc)) |new_loc| { + return new_loc.finalLoc(wasm_bin); + } + return loc; + } +}; + +const FeatureSet = struct { + set: SetType = .{ .mask = 0 }, // everything disabled by default + + const SetType = std.bit_set.IntegerBitSet(types.known_features.kvs.len); + + const Iterator = struct { + /// The iterator that will return the index of the next feature + /// This should never be used directly, unless the index of a feature + /// is required directly. + inner: SetType.Iterator(.{}), + + /// Returns the next feature in the set + pub fn next(it: *Iterator) ?types.Feature.Tag { + const index = it.inner.next() orelse return null; + return @as(types.Feature.Tag, @enumFromInt(index)); + } + }; + + /// Returns true when a given `feature` is enabled + pub fn isEnabled(set: FeatureSet, feature: types.Feature.Tag) bool { + return set.set.isSet(@intFromEnum(feature)); + } + + /// Enables the given `feature` + pub fn enable(set: *FeatureSet, feature: types.Feature.Tag) void { + set.set.set(@intFromEnum(feature)); + } + + /// The amount of features that have been set + pub fn count(set: FeatureSet) u32 { + return @as(u32, @intCast(set.set.count())); + } + + /// Returns an iterator through the features in the set by its index + pub fn iterator(set: *const FeatureSet) Iterator { + return .{ .inner = set.set.iterator(.{}) }; + } +}; + +/// Initializes a new wasm binary file at the given path. +/// Will overwrite any existing file at said path. +pub fn openPath(gpa: Allocator, options: Options, thread_pool: *ThreadPool) !*Wasm { + const file = try options.emit.directory.createFile(options.emit.sub_path, .{ + .truncate = true, + .read = true, + }); + errdefer file.close(); + + const wasm = try createEmpty(gpa, options, thread_pool); + errdefer gpa.destroy(wasm); + wasm.base.file = file; + return wasm; +} + +fn createEmpty(gpa: Allocator, options: Options, thread_pool: *ThreadPool) !*Wasm { + const wasm = try gpa.create(Wasm); + wasm.* = .{ + .base = .{ + .tag = .wasm, + .allocator = gpa, + .file = undefined, + .thread_pool = thread_pool, + }, + .options = options, + }; + return wasm; +} + +/// Releases any resources that is owned by `Wasm`, +/// usage after calling deinit is illegal behaviour. +pub fn deinit(wasm: *Wasm) void { + const gpa = wasm.base.allocator; + for (wasm.objects.items) |*object| { + object.deinit(gpa); + } + for (wasm.archives.items) |*archive| { + archive.deinit(gpa); + } + for (wasm.managed_atoms.items) |atom| { + atom.deinit(gpa); + } + wasm.synthetic_symbols.deinit(gpa); + wasm.symbol_atom.deinit(gpa); + wasm.discarded.deinit(gpa); + wasm.resolved_symbols.deinit(gpa); + wasm.managed_atoms.deinit(gpa); + wasm.atoms.deinit(gpa); + wasm.data_segments.deinit(gpa); + wasm.segments.deinit(gpa); + wasm.global_symbols.deinit(gpa); + wasm.objects.deinit(gpa); + wasm.archives.deinit(gpa); + wasm.functions.deinit(gpa); + wasm.func_types.deinit(gpa); + wasm.imports.deinit(gpa); + wasm.globals.deinit(gpa); + wasm.exports.deinit(gpa); + wasm.elements.deinit(gpa); + wasm.tables.deinit(gpa); + wasm.string_table.deinit(gpa); + wasm.undefs.deinit(gpa); +} + +fn hasPassiveInitializationSegments(wasm: *const Wasm) bool { + var it = wasm.data_segments.iterator(); + while (it.next()) |entry| { + const segment: Segment = wasm.segments.items[entry.value_ptr.*]; + if (segment.needsPassiveInitialization(wasm.options.import_memory, entry.key_ptr.*)) { + return true; + } + } + return false; +} + +pub fn closeFiles(wasm: *const Wasm) void { + _ = wasm; +} + +fn parsePositionals(wasm: *Wasm, files: []const []const u8) !void { + for (files) |path| { + if (try wasm.parseObjectFile(wasm.base.allocator, path)) continue; + if (try wasm.parseArchive(wasm.base.allocator, path, false)) continue; // load archives lazily + log.warn("Unexpected file format at path: '{s}'", .{path}); + } +} + +/// Attempts to parse an object file. Returns `false` when given path +/// does not represent an object file. +fn parseObjectFile(wasm: *Wasm, gpa: Allocator, path: []const u8) !bool { + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => { + log.err("File not found: '{s}'", .{path}); + return err; + }, + else => |e| return e, + }; + + var object = Object.create(gpa, file, path, null) catch |err| switch (err) { + error.InvalidMagicByte, error.NotObjectFile => { + return false; + }, + else => |e| return e, + }; + errdefer object.deinit(gpa); + try wasm.objects.append(gpa, object); + return true; +} + +/// Parses an archive file and will then parse each object file +/// that was found in the archive file. +/// Returns false when the file is not an archive file. +/// May return an error instead when parsing failed. +/// +/// When `force_load` is `true`, it will for link all object files in the archive. +/// When false, it will only link with object files that contain symbols that +/// are referenced by other object files or Zig code. +fn parseArchive(wasm: *Wasm, gpa: Allocator, path: []const u8, force_load: bool) !bool { + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => { + log.err("File not found: '{s}'", .{path}); + return err; + }, + else => |e| return e, + }; + errdefer file.close(); + + var archive: Archive = .{ + .file = file, + .name = path, + }; + archive.parse(gpa) catch |err| switch (err) { + error.EndOfStream, error.NotArchive => { + archive.deinit(gpa); + return false; + }, + else => |e| return e, + }; + + if (!force_load) { + errdefer archive.deinit(gpa); + try wasm.archives.append(gpa, archive); + return true; + } + defer archive.deinit(gpa); + + // In this case we must force link all embedded object files within the archive + // We loop over all symbols, and then group them by offset as the offset + // notates where the object file starts. + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); + defer offsets.deinit(); + for (archive.toc.values()) |symbol_offsets| { + for (symbol_offsets.items) |sym_offset| { + try offsets.put(sym_offset, {}); + } + } + + for (offsets.keys()) |file_offset| { + const object = try wasm.objects.addOne(gpa); + object.* = try archive.parseObject(gpa, file_offset); + } + + return true; +} + +/// Returns the data section entry count, skipping the .bss section +pub fn dataCount(wasm: Wasm) u32 { + var i: u32 = 0; + for (wasm.data_segments.keys()) |key| { + if (std.mem.eql(u8, key, ".bss") and !wasm.options.import_memory) continue; + i += 1; + } + return i; +} + +/// Flushes the `Wasm` construct into a final wasm binary by linking +/// the objects, ensuring the final binary file has no collisions. +pub fn flush(wasm: *Wasm) !void { + try wasm.parsePositionals(wasm.options.positionals); + try wasm.setupLinkerSymbols(); + for (wasm.objects.items, 0..) |_, obj_idx| { + try wasm.resolveSymbolsInObject(@as(u16, @intCast(obj_idx))); + } + try wasm.resolveSymbolsInArchives(); + try wasm.resolveLazySymbols(); + try wasm.setupInitFunctions(); + try wasm.checkUndefinedSymbols(); + for (wasm.objects.items, 0..) |*object, obj_idx| { + try object.parseIntoAtoms(@as(u16, @intCast(obj_idx)), wasm); + } + try wasm.validateFeatures(); + try wasm.setupStart(); + try wasm.mergeImports(); + try wasm.allocateAtoms(); + try wasm.setupMemory(); + try wasm.setupInitMemoryFunction(); + try wasm.setupTLSRelocationsFunction(); + wasm.mapFunctionTable(); + try wasm.mergeSections(); + try wasm.mergeTypes(); + try wasm.initializeCallCtorsFunction(); + try wasm.initializeTLSFunction(); + try wasm.setupExports(); + + try @import("Wasm/emit_wasm.zig").emit(wasm); +} + +/// Generic string table that duplicates strings +/// and converts them into offsets instead. +pub const StringTable = struct { + /// Table that maps string offsets, which is used to de-duplicate strings. + /// Rather than having the offset map to the data, the `StringContext` holds all bytes of the string. + /// The strings are stored as a contigious array where each string is zero-terminated. + string_table: std.HashMapUnmanaged( + u32, + void, + std.hash_map.StringIndexContext, + std.hash_map.default_max_load_percentage, + ) = .{}, + /// Holds the actual data of the string table. + string_data: std.ArrayListUnmanaged(u8) = .{}, + + /// Accepts a string and searches for a corresponding string. + /// When found, de-duplicates the string and returns the existing offset instead. + /// When the string is not found in the `string_table`, a new entry will be inserted + /// and the new offset to its data will be returned. + pub fn put(table: *StringTable, allocator: Allocator, string: []const u8) !u32 { + const gop = try table.string_table.getOrPutContextAdapted( + allocator, + string, + std.hash_map.StringIndexAdapter{ .bytes = &table.string_data }, + .{ .bytes = &table.string_data }, + ); + if (gop.found_existing) { + const off = gop.key_ptr.*; + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + try table.string_data.ensureUnusedCapacity(allocator, string.len + 1); + const offset = @as(u32, @intCast(table.string_data.items.len)); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, offset }); + + table.string_data.appendSliceAssumeCapacity(string); + table.string_data.appendAssumeCapacity(0); + + gop.key_ptr.* = offset; + + return offset; + } + + /// From a given offset, returns its corresponding string value. + /// Asserts offset does not exceed bounds. + pub fn get(table: StringTable, off: u32) []const u8 { + assert(off < table.string_data.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(table.string_data.items.ptr + off)), 0); + } + + /// Returns the offset of a given string when it exists. + /// Will return null if the given string does not yet exist within the string table. + pub fn getOffset(table: *StringTable, string: []const u8) ?u32 { + return table.string_table.getKeyAdapted( + string, + std.hash_map.StringIndexAdapter{ .bytes = &table.string_data }, + ); + } + + /// Frees all resources of the string table. Any references pointing + /// to the strings will be invalid. + pub fn deinit(table: *StringTable, allocator: Allocator) void { + table.string_data.deinit(allocator); + table.string_table.deinit(allocator); + table.* = undefined; + } +}; + +fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { + const object: Object = wasm.objects.items[object_index]; + log.debug("Resolving symbols in object: '{s}'", .{object.name}); + + for (object.symtable, 0..) |symbol, i| { + const sym_index = @as(u32, @intCast(i)); + const location: SymbolWithLoc = .{ + .file = object_index, + .sym_index = sym_index, + }; + const sym_name = object.string_table.get(symbol.name); + const sym_name_index = try wasm.string_table.put(wasm.base.allocator, sym_name); + + if (symbol.isLocal()) { + if (symbol.isUndefined()) { + log.err("Local symbols are not allowed to reference imports", .{}); + log.err(" symbol '{s}' defined in '{s}'", .{ sym_name, object.name }); + return error.UndefinedLocal; + } + try wasm.resolved_symbols.putNoClobber(wasm.base.allocator, location, {}); + continue; + } + + const maybe_existing = try wasm.global_symbols.getOrPut(wasm.base.allocator, sym_name_index); + if (!maybe_existing.found_existing) { + maybe_existing.value_ptr.* = location; + try wasm.resolved_symbols.putNoClobber(wasm.base.allocator, location, {}); + + if (symbol.isUndefined()) { + try wasm.undefs.putNoClobber(wasm.base.allocator, sym_name, location); + } + continue; + } + + const existing_loc = maybe_existing.value_ptr.*; + const existing_sym: *Symbol = existing_loc.getSymbol(wasm); + + const existing_file_path = if (existing_loc.file) |file| blk: { + break :blk wasm.objects.items[file].name; + } else wasm.options.emit.sub_path; + + if (!existing_sym.isUndefined()) outer: { + if (!symbol.isUndefined()) inner: { + if (symbol.isWeak()) { + break :inner; // ignore the new symbol (discard it) + } + if (existing_sym.isWeak()) { + break :outer; // existing is weak, while new one isn't. Replace it. + } + // both are defined and weak, we have a symbol collision. + log.err("symbol '{s}' defined multiple times", .{sym_name}); + log.err(" first definition in '{s}'", .{existing_file_path}); + log.err(" next definition in '{s}'", .{object.name}); + return error.SymbolCollision; + } + + try wasm.discarded.put(wasm.base.allocator, location, existing_loc); + continue; // Do not overwrite defined symbols with undefined symbols + } + + if (symbol.tag != existing_sym.tag) { + log.err("symbol '{s}' mismatching type '{s}", .{ sym_name, @tagName(symbol.tag) }); + log.err(" first definition in '{s}'", .{existing_file_path}); + log.err(" next definition in '{s}'", .{object.name}); + return error.SymbolMismatchingType; + } + + // only verify module/import name for function symbols + if (existing_sym.isUndefined() and symbol.isUndefined()) { + if (symbol.tag == .function) { + const file_index = existing_loc.file.?; + const obj = wasm.objects.items[file_index]; + const name_index = obj.findImport(symbol.tag.externalType(), existing_sym.index).module_name; + const existing_name = obj.string_table.get(name_index); + + const module_index = object.findImport(symbol.tag.externalType(), symbol.index).module_name; + const module_name = object.string_table.get(module_index); + if (!mem.eql(u8, existing_name, module_name)) { + log.err("symbol '{s}' module name mismatch. Expected '{s}', but found '{s}'", .{ + sym_name, + existing_name, + module_name, + }); + log.err(" first definition in '{s}'", .{existing_file_path}); + log.err(" next definition in '{s}'", .{object.name}); + return error.ModuleNameMismatch; + } + } + + try wasm.discarded.put(wasm.base.allocator, location, existing_loc); + continue; // both undefined so skip overwriting existing symbol and discard the new symbol + } + + if (existing_sym.tag == .global) { + const existing_ty = wasm.getGlobalType(existing_loc); + const new_ty = wasm.getGlobalType(location); + if (existing_ty.mutable != new_ty.mutable or existing_ty.valtype != new_ty.valtype) { + log.err("symbol '{s}' mismatching global types", .{sym_name}); + log.err(" first definition in '{s}'", .{existing_file_path}); + log.err(" next definition in '{s}'", .{object.name}); + return error.GlobalTypeMismatch; + } + } + + if (existing_sym.tag == .function) { + const existing_ty = wasm.getFunctionSignature(existing_loc); + const new_ty = wasm.getFunctionSignature(location); + if (!existing_ty.eql(new_ty)) { + log.err("symbol '{s}' mismatching function signatures.", .{sym_name}); + log.err(" expected signature {}, but found signature {}", .{ existing_ty, new_ty }); + log.err(" first definition in '{s}'", .{existing_file_path}); + log.err(" next definition in '{s}'", .{object.name}); + return error.FunctionSignatureMismatch; + } + } + + // when both symbols are weak, we skip overwriting unless the existing + // symbol is weak and the new one isn't, in which case we *do* overwrite it. + if (existing_sym.isWeak() and symbol.isWeak()) blk: { + if (existing_sym.isUndefined() and !symbol.isUndefined()) break :blk; + try wasm.discarded.put(wasm.base.allocator, location, existing_loc); + continue; + } + + // simply overwrite with the new symbol + log.debug("Overwriting symbol '{s}'", .{sym_name}); + log.debug(" old definition in '{s}'", .{existing_file_path}); + log.debug(" new definition in '{s}'", .{object.name}); + try wasm.discarded.putNoClobber(wasm.base.allocator, existing_loc, location); + maybe_existing.value_ptr.* = location; + try wasm.global_symbols.put(wasm.base.allocator, sym_name_index, location); + try wasm.resolved_symbols.put(wasm.base.allocator, location, {}); + assert(wasm.resolved_symbols.swapRemove(existing_loc)); + if (existing_sym.isUndefined()) { + _ = wasm.undefs.swapRemove(sym_name); + } + } +} + +/// Resolves the symbols in each archive file. +/// When resolved to a symbol from an object file, +/// this will result into loading the object file within +/// the archive file and linking with it. +fn resolveSymbolsInArchives(wasm: *Wasm) !void { + if (wasm.archives.items.len == 0) return; + + log.debug("Resolving symbols in archives", .{}); + var index: u32 = 0; + undef_loop: while (index < wasm.undefs.count()) { + const sym_name = wasm.undefs.keys()[index]; + + for (wasm.archives.items) |archive| { + const offset = archive.toc.get(sym_name) orelse { + // symbol does not exist in this archive + continue; + }; + + log.debug("Detected symbol '{s}' in archive '{s}', parsing objects..", .{ sym_name, archive.name }); + // Symbol is found in unparsed object file within current archive. + // Parse object and and resolve symbols again before we check remaining + // undefined symbols. + const object_file_index = @as(u16, @intCast(wasm.objects.items.len)); + var object = try archive.parseObject(wasm.base.allocator, offset.items[0]); + try wasm.objects.append(wasm.base.allocator, object); + try wasm.resolveSymbolsInObject(object_file_index); + + // continue loop for any remaining undefined symbols that still exist + // after resolving last object file + continue :undef_loop; + } + index += 1; + } +} + +/// Creates synthetic linker-symbols, but only if they are being referenced from +/// any object file. For instance, the `__heap_base` symbol will only be created, +/// if one or multiple undefined references exist. When none exist, the symbol will +/// not be created, ensuring we don't unneccesarily emit unreferenced symbols. +fn resolveLazySymbols(wasm: *Wasm) !void { + if (wasm.undefs.fetchSwapRemove("__heap_base")) |kv| { + const loc = try wasm.createSyntheticSymbol("__heap_base", .data); + try wasm.discarded.putNoClobber(wasm.base.allocator, kv.value, loc); + _ = wasm.resolved_symbols.swapRemove(loc); // we don't want to emit this symbol, only use it for relocations. + + const atom = try Atom.create(wasm.base.allocator); + atom.size = 0; + atom.sym_index = loc.sym_index; + atom.file = null; + // va/offset will be set during `setupMemory` + try wasm.symbol_atom.put(wasm.base.allocator, loc, atom); + } + + if (wasm.undefs.fetchSwapRemove("__heap_end")) |kv| { + const loc = try wasm.createSyntheticSymbol("__heap_end", .data); + try wasm.discarded.putNoClobber(wasm.base.allocator, kv.value, loc); + _ = wasm.resolved_symbols.swapRemove(loc); + + const atom = try Atom.create(wasm.base.allocator); + atom.size = 0; + atom.sym_index = loc.sym_index; + atom.file = null; + try wasm.symbol_atom.put(wasm.base.allocator, loc, atom); + } + + if (!wasm.options.shared_memory) { + if (wasm.undefs.fetchSwapRemove("__tls_base")) |kv| { + const loc = try wasm.createSyntheticSymbol("__tls_base", .global); + try wasm.discarded.putNoClobber(wasm.base.allocator, kv.value, loc); + } + } +} + +/// From a given symbol location, returns its `wasm.GlobalType`. +/// Asserts the Symbol represents a global. +fn getGlobalType(wasm: *const Wasm, loc: SymbolWithLoc) std.wasm.GlobalType { + const symbol = loc.getSymbol(wasm); + assert(symbol.tag == .global); + const is_undefined = symbol.isUndefined(); + if (loc.file) |file_index| { + const obj: Object = wasm.objects.items[file_index]; + if (is_undefined) { + return obj.findImport(.global, symbol.index).kind.global; + } + const import_global_count = obj.importedCountByKind(.global); + return obj.globals[symbol.index - import_global_count].global_type; + } + assert(!is_undefined); + return wasm.globals.items.items[symbol.index].global_type; +} + +/// From a given symbol location, returns its `wasm.Type`. +/// Asserts the Symbol represents a function. +fn getFunctionSignature(wasm: *const Wasm, loc: SymbolWithLoc) std.wasm.Type { + const symbol = loc.getSymbol(wasm); + assert(symbol.tag == .function); + const is_undefined = symbol.isUndefined(); + if (loc.file) |file_index| { + const obj: Object = wasm.objects.items[file_index]; + if (is_undefined) { + const ty_index = obj.findImport(.function, symbol.index).kind.function; + return obj.func_types[ty_index]; + } + const import_function_count = obj.importedCountByKind(.function); + const type_index = obj.functions[symbol.index - import_function_count].type_index; + return obj.func_types[type_index]; + } + assert(!is_undefined); + return wasm.func_types.get(wasm.functions.items.values()[symbol.index].type_index).*; +} + +/// Assigns indexes to all indirect functions. +/// Starts at offset 1, where the value `0` represents an unresolved function pointer +/// or null-pointer +fn mapFunctionTable(wasm: *Wasm) void { + var it = wasm.elements.indirect_functions.valueIterator(); + var index: u32 = 1; + while (it.next()) |value_ptr| : (index += 1) { + value_ptr.* = index; + } +} + +/// Calculates the new indexes for symbols and their respective symbols +fn mergeSections(wasm: *Wasm) !void { + // first append the indirect function table if initialized + const function_pointers = wasm.elements.functionCount(); + if (function_pointers > 0 and !wasm.options.import_table) { + log.debug("Appending indirect function table", .{}); + const loc = wasm.findGlobalSymbol("__indirect_function_table").?; + const symbol = loc.getSymbol(wasm); + symbol.index = try wasm.tables.append( + wasm.base.allocator, + wasm.imports.tableCount(), + .{ + // index starts at 1, so add 1 extra element + .limits = .{ .flags = 0x1, .min = function_pointers + 1, .max = function_pointers + 1 }, + .reftype = .funcref, + }, + ); + } + + log.debug("Merging sections", .{}); + for (wasm.resolved_symbols.keys()) |sym_with_loc| { + const file_index = sym_with_loc.file orelse continue; // synthetic symbols do not need to be merged + const object = wasm.objects.items[file_index]; + const symbol: *Symbol = &object.symtable[sym_with_loc.sym_index]; + if (symbol.isUndefined() or (symbol.tag != .function and symbol.tag != .global and symbol.tag != .table)) { + // Skip undefined symbols as they go in the `import` section + // Also skip symbols that do not need to have a section merged. + continue; + } + + const offset = object.importedCountByKind(symbol.tag.externalType()); + const index = symbol.index - offset; + switch (symbol.tag) { + .function => { + const original_func = object.functions[index]; + symbol.index = try wasm.functions.append( + wasm.base.allocator, + .{ .file = file_index, .index = symbol.index }, + wasm.imports.functionCount(), + original_func, + ); + }, + .global => { + const original_global = object.globals[index]; + symbol.index = try wasm.globals.append( + wasm.base.allocator, + wasm.imports.globalCount(), + original_global, + ); + }, + .table => { + const original_table = object.tables[index]; + symbol.index = try wasm.tables.append( + wasm.base.allocator, + wasm.imports.tableCount(), + original_table, + ); + }, + else => unreachable, + } + } + log.debug("Merged ({d}) functions", .{wasm.functions.count()}); + log.debug("Merged ({d}) globals", .{wasm.globals.count()}); + log.debug("Merged ({d}) tables", .{wasm.tables.count()}); +} + +/// Merges function types of all object files into the final +/// 'types' section, while assigning the type index to the representing +/// section (import, export, function). +fn mergeTypes(wasm: *Wasm) !void { + log.debug("Merging types", .{}); + // A map to track which functions have already had their + // type inserted. If we do this for the same function multiple times, + // it will be overwritten with the incorrect type. + var dirty = std.AutoHashMap(u32, void).init(wasm.base.allocator); + try dirty.ensureUnusedCapacity(wasm.functions.count()); + defer dirty.deinit(); + + for (wasm.resolved_symbols.keys()) |sym_with_loc| { + const object = wasm.objects.items[sym_with_loc.file orelse continue]; // synthetic symbols do not need to be merged + const symbol: Symbol = object.symtable[sym_with_loc.sym_index]; + if (symbol.tag == .function) { + if (symbol.isUndefined()) { + log.debug("Adding type from extern function '{s}'", .{object.string_table.get(symbol.name)}); + const value = &wasm.imports.imported_functions.values()[symbol.index]; + value.type = try wasm.func_types.append(wasm.base.allocator, object.func_types[value.type]); + continue; + } else if (!dirty.contains(symbol.index)) { + log.debug("Adding type from function '{s}'", .{object.string_table.get(symbol.name)}); + const func = &wasm.functions.items.values()[symbol.index - wasm.imports.functionCount()]; + func.type_index = try wasm.func_types.append(wasm.base.allocator, object.func_types[func.type_index]); + dirty.putAssumeCapacity(symbol.index, {}); + } + } + } + log.debug("Completed merging and deduplicating types. Total count: ({d})", .{wasm.func_types.count()}); +} + +fn setupExports(wasm: *Wasm) !void { + log.debug("Building exports from symbols", .{}); + + // When importing memory option is false, + // we export the memory. + if (!wasm.options.import_memory) { + try wasm.exports.append(wasm.base.allocator, .{ .name = "memory", .kind = .memory, .index = 0 }); + } + + if (wasm.options.exports.len > 0) { + var failed_exports = try std.ArrayList([]const u8).initCapacity(wasm.base.allocator, wasm.options.exports.len); + defer failed_exports.deinit(); + + for (wasm.options.exports) |export_name| { + const loc = wasm.findGlobalSymbol(export_name) orelse { + failed_exports.appendAssumeCapacity(export_name); + continue; + }; + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_EXPORTED); + } + + if (failed_exports.items.len > 0) { + for (failed_exports.items) |export_name| { + log.err("Failed to export symbol '{s}' using `--export`, symbol was not found", .{export_name}); + } + + return error.ExportedSymbolNotFound; + } + } + + for (wasm.resolved_symbols.keys()) |sym_loc| { + const symbol = sym_loc.getSymbol(wasm); + if (!symbol.isExported(wasm.options.export_dynamic)) continue; + + const name = sym_loc.getName(wasm); + const exported: std.wasm.Export = if (symbol.tag == .data) exp: { + const atom = wasm.symbol_atom.get(sym_loc).?; + const va = atom.getVA(wasm, symbol); + const offset = wasm.imports.globalCount(); + const global_index = try wasm.globals.append(wasm.base.allocator, offset, .{ + .global_type = .{ .valtype = .i32, .mutable = false }, + .init = .{ .i32_const = @as(i32, @intCast(va)) }, + }); + break :exp .{ + .name = name, + .kind = .global, + .index = global_index, + }; + } else .{ + .name = name, + .kind = symbol.tag.externalType(), + .index = symbol.index, + }; + + log.debug("Appending export from symbol '{s}' using name: '{s}' index: {d}", .{ + name, name, symbol.index, + }); + try wasm.exports.append(wasm.base.allocator, exported); + } + log.debug("Completed building exports. Total count: ({d})", .{wasm.exports.count()}); +} + +/// Creates symbols that are made by the linker, rather than the compiler/object file +/// TODO: We should support re-merging synthetic symbols so we can create the corresponding +/// symbol objects initially here and later update them. It makes them safer to use, for +/// insignificant performance degredation. +fn setupLinkerSymbols(wasm: *Wasm) !void { + // stack pointer symbol + { + const loc = try wasm.createSyntheticSymbol("__stack_pointer", .global); + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + const global: std.wasm.Global = .{ + .init = .{ .i32_const = 0 }, + .global_type = .{ .valtype = .i32, .mutable = true }, + }; + symbol.index = try wasm.globals.append(wasm.base.allocator, 0, global); + } + + // indirect function table symbol + { + const loc = try wasm.createSyntheticSymbol("__indirect_function_table", .table); + const symbol = loc.getSymbol(wasm); + if (wasm.options.export_table) { + symbol.setFlag(.WASM_SYM_EXPORTED); + } else if (wasm.options.import_table) { + symbol.setUndefined(true); + } else { + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + } + // do need to create table here, as we only create it if there's any + // function pointers to be stored. This is done in `mergeSections` + } + + // __wasm_call_ctors + { + const loc = try wasm.createSyntheticSymbol("__wasm_call_ctors", .function); + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + // We set the type and function index later so we do not need to merge them later. + } + + // shared-memory symbols for TLS support + if (wasm.options.shared_memory) { + // __tls_base + { + const loc = try wasm.createSyntheticSymbol("__tls_base", .global); + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + } + // __tls_size + { + const loc = try wasm.createSyntheticSymbol("__tls_size", .global); + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + } + // __tls_align + { + const loc = try wasm.createSyntheticSymbol("__tls_align", .global); + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + } + + // __tls_init + { + const loc = try wasm.createSyntheticSymbol("__wasm_init_tls", .function); + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + } + } +} + +/// For a given name, creates a new global synthetic symbol. +/// Leaves index undefined and the default flags (0). +fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !SymbolWithLoc { + const name_offset = try wasm.string_table.put(wasm.base.allocator, name); + const sym_index = @as(u32, @intCast(wasm.synthetic_symbols.count())); + const loc: SymbolWithLoc = .{ .sym_index = sym_index, .file = null }; + try wasm.synthetic_symbols.putNoClobber(wasm.base.allocator, name, .{ + .name = name_offset, + .flags = 0, + .tag = tag, + .index = undefined, + }); + try wasm.resolved_symbols.putNoClobber(wasm.base.allocator, loc, {}); + try wasm.global_symbols.put(wasm.base.allocator, name_offset, loc); + return loc; +} + +/// Tries to find a global symbol by its name. Returns null when not found, +/// and its location when it is found. +fn findGlobalSymbol(wasm: *Wasm, name: []const u8) ?SymbolWithLoc { + const offset = wasm.string_table.getOffset(name) orelse return null; + return wasm.global_symbols.get(offset); +} + +/// Verifies if we have any undefined, non-function symbols left. +/// Emits an error if one or multiple undefined references are found. +/// This will be disabled when the user passes `--import-symbols` +fn checkUndefinedSymbols(wasm: *const Wasm) !void { + if (wasm.options.import_symbols) return; + + var found_undefined_symbols = false; + for (wasm.undefs.values()) |undef| { + const symbol = undef.getSymbol(wasm); + if (symbol.tag == .data) { + found_undefined_symbols = true; + const file_name = wasm.objects.items[undef.file.?].name; + const obj = wasm.objects.items[undef.file.?]; + const name_index = if (symbol.tag == .function) name_index: { + break :name_index obj.findImport(symbol.tag.externalType(), symbol.index).name; + } else symbol.name; + const import_name = obj.string_table.get(name_index); + log.err("could not resolve undefined symbol '{s}'", .{import_name}); + log.err(" defined in '{s}'", .{file_name}); + } + } + if (found_undefined_symbols) { + return error.UndefinedSymbol; + } +} + +/// Obtains all initfuncs from each object file, verifies its function signature, +/// and then appends it to our final `init_funcs` list. +/// After all functions have been inserted, the functions will be ordered based +/// on their priority. +fn setupInitFunctions(wasm: *Wasm) !void { + for (wasm.objects.items, 0..) |object, file_index| { + try wasm.init_funcs.ensureUnusedCapacity(wasm.base.allocator, object.init_funcs.len); + for (object.init_funcs) |init_func| { + const symbol = object.symtable[init_func.symbol_index]; + const func_index = symbol.index - object.importedCountByKind(.function); + const func = object.functions[func_index]; + const ty = object.func_types[func.type_index]; + + if (ty.params.len != 0) { + log.err("constructor functions cannot take arguments: '{s}'", .{object.string_table.get(symbol.name)}); + return error.InvalidInitFunc; + } + log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)}); + wasm.init_funcs.appendAssumeCapacity(.{ + .index = init_func.symbol_index, + .file = @as(u16, @intCast(file_index)), + .priority = init_func.priority, + }); + } + } + + // sort the initfunctions based on their priority + std.sort.sort(InitFuncLoc, wasm.init_funcs.items, {}, struct { + fn lessThan(ctx: void, lhs: InitFuncLoc, rhs: InitFuncLoc) bool { + _ = ctx; + return lhs.priority < rhs.priority; + } + }.lessThan); +} + +fn setupInitMemoryFunction(wasm: *Wasm) !void { + // Passive segments are used to avoid memory being reinitialized on each + // thread's instantiation. These passive segments are initialized and + // dropped in __wasm_init_memory, which is registered as the start function + // We also initialize bss segments (using memory.fill) as part of this + // function. + if (!wasm.hasPassiveInitializationSegments()) { + return; + } + + const flag_address: u32 = if (wasm.options.shared_memory) address: { + // when we have passive initialization segments and shared memory + // `setupMemory` will create this symbol and set its virtual address. + const loc = wasm.findGlobalSymbol("__wasm_init_memory_flag").?; + const atom = wasm.symbol_atom.get(loc).?; + break :address atom.getVA(wasm, loc.getSymbol(wasm)); + } else 0; + + var function_body = std.ArrayList(u8).init(wasm.base.allocator); + defer function_body.deinit(); + const writer = function_body.writer(); + + // we have 0 locals + try leb.writeULEB128(writer, @as(u32, 0)); + + if (wasm.options.shared_memory) { + // destination blocks + // based on values we jump to corresponding label + try writer.writeByte(std.wasm.opcode(.block)); // $drop + try writer.writeByte(std.wasm.block_empty); // block type + + try writer.writeByte(std.wasm.opcode(.block)); // $wait + try writer.writeByte(std.wasm.block_empty); // block type + + try writer.writeByte(std.wasm.opcode(.block)); // $init + try writer.writeByte(std.wasm.block_empty); // block type + + // atomically check + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, flag_address); + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, @as(u32, 0)); + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, @as(u32, 1)); + try writer.writeByte(0xfe); // atomic prefix (TODO: Add this to zig's std) + try leb.writeULEB128(writer, @as(u32, 0x48)); // i32.atomic.rmw.cmpxchg + try leb.writeULEB128(writer, @as(u32, 2)); // alignment + try leb.writeULEB128(writer, @as(u32, 0)); // offset + + // based on the value from the atomic check, jump to the label. + try writer.writeByte(std.wasm.opcode(.br_table)); + try leb.writeULEB128(writer, @as(u32, 2)); // length of the table (we have 3 blocks but because of the mandatory default the length is 2). + try leb.writeULEB128(writer, @as(u32, 0)); // $init + try leb.writeULEB128(writer, @as(u32, 1)); // $wait + try leb.writeULEB128(writer, @as(u32, 2)); // $drop + try writer.writeByte(std.wasm.opcode(.end)); + } + + var it = wasm.data_segments.iterator(); + var segment_index: u32 = 0; + while (it.next()) |entry| : (segment_index += 1) { + const segment: Segment = wasm.segments.items[entry.value_ptr.*]; + if (segment.needsPassiveInitialization(wasm.options.import_memory, entry.key_ptr.*)) { + // For passive BSS segments we can simple issue a memory.fill(0). + // For non-BSS segments we do a memory.init. Both these + // instructions take as their first argument the destination + // address. + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, segment.offset); + + if (wasm.options.shared_memory and std.mem.eql(u8, entry.key_ptr.*, ".tdata")) { + // When we initialize the TLS segment we also set the `__tls_base` + // global. This allows the runtime to use this static copy of the + // TLS data for the first/main thread. + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, segment.offset); + try writer.writeByte(std.wasm.opcode(.global_set)); + const loc = wasm.findGlobalSymbol("__tls_base").?; + try leb.writeULEB128(writer, loc.getSymbol(wasm).index); + } + + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, @as(u32, 0)); + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, segment.size); + try writer.writeByte(std.wasm.opcode(.prefixed)); + if (std.mem.eql(u8, entry.key_ptr.*, ".bss")) { + // fill bss segment with zeroes + try leb.writeULEB128(writer, @intFromEnum(std.wasm.PrefixedOpcode.memory_fill)); + } else { + // initialize the segment + try leb.writeULEB128(writer, @intFromEnum(std.wasm.PrefixedOpcode.memory_init)); + try leb.writeULEB128(writer, segment_index); + } + try writer.writeByte(0); // memory index immediate + } + } + + if (wasm.options.shared_memory) { + // we set the init memory flag to value '2' + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, flag_address); + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, @as(u32, 2)); + try writer.writeByte(0xfe); // atomics prefix + try leb.writeULEB128(writer, @as(u32, 0x17)); // i32.atomic.store + try leb.writeULEB128(writer, @as(u32, 2)); // alignment + try leb.writeULEB128(writer, @as(u32, 0)); // offset + + // notify any waiters for segment initialization completion + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, flag_address); + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeILEB128(writer, @as(i32, -1)); // number of waiters + try writer.writeByte(0xfe); // atomics prefix + try leb.writeULEB128(writer, @as(u32, 0x0)); // memory.atomic.notify + try leb.writeULEB128(writer, @as(u32, 2)); // alignment + try leb.writeULEB128(writer, @as(u32, 0)); // offset + try writer.writeByte(std.wasm.opcode(.drop)); + + // branch and drop segments + try writer.writeByte(std.wasm.opcode(.br)); + try leb.writeULEB128(writer, @as(u32, 1)); + + // wait for thread to initialize memory segments + try writer.writeByte(std.wasm.opcode(.end)); // end $wait + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, flag_address); + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, @as(u32, 1)); // expected flag value + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeILEB128(writer, @as(i32, -1)); // timeout + try writer.writeByte(0xfe); // atomics prefix + try leb.writeULEB128(writer, @as(u32, 0x1)); // memory.atomic.wait32 + try leb.writeULEB128(writer, @as(u32, 2)); // alignment + try leb.writeULEB128(writer, @as(u32, 0)); // offset + try writer.writeByte(std.wasm.opcode(.drop)); + + try writer.writeByte(std.wasm.opcode(.end)); // end $drop + } + + it.reset(); + segment_index = 0; + while (it.next()) |entry| : (segment_index += 1) { + const name = entry.key_ptr.*; + const segment: Segment = wasm.segments.items[entry.value_ptr.*]; + if (segment.needsPassiveInitialization(wasm.options.import_memory, name) and + !std.mem.eql(u8, name, ".bss")) + { + // The TLS region should not be dropped since its is needed + // during the initialization of each thread (__wasm_init_tls). + if (wasm.options.shared_memory and std.mem.eql(u8, name, ".tdata")) { + continue; + } + + try writer.writeByte(std.wasm.opcode(.prefixed)); + try leb.writeULEB128(writer, @intFromEnum(std.wasm.PrefixedOpcode.data_drop)); + try leb.writeULEB128(writer, segment_index); + } + } + + // End of the function body + try writer.writeByte(std.wasm.opcode(.end)); + + try wasm.createSyntheticFunction( + "__wasm_init_memory", + std.wasm.Type{ .params = &.{}, .returns = &.{} }, + &function_body, + ); +} + +/// Constructs a synthetic function that performs runtime relocations for +/// TLS symbols. This function is called by `__wasm_init_tls`. +fn setupTLSRelocationsFunction(wasm: *Wasm) !void { + // When we have TLS GOT entries and shared memory is enabled, + // we must perform runtime relocations or else we don't create the function. + if (!(wasm.options.shared_memory and wasm.globals.requiresTLSReloc(wasm))) { + return; + } + + var function_body = std.ArrayList(u8).init(wasm.base.allocator); + defer function_body.deinit(); + const writer = function_body.writer(); + + // locals (we have none) + try writer.writeByte(0); + for (wasm.globals.got_symbols.items, 0..) |got_loc, got_index| { + const sym: *Symbol = got_loc.getSymbol(wasm); + if (!sym.isTLS()) continue; // only relocate TLS symbols + if (sym.tag == .data and sym.isDefined()) { + // get __tls_base + try writer.writeByte(std.wasm.opcode(.global_get)); + try leb.writeULEB128(writer, wasm.findGlobalSymbol("__tls_base").?.getSymbol(wasm).index); + + // add the virtual address of the symbol + try writer.writeByte(std.wasm.opcode(.i32_const)); + const va = wasm.symbol_atom.get(got_loc).?.getVA(wasm, sym); + try leb.writeULEB128(writer, va); + } else if (sym.tag == .function) { + @panic("TODO: relocate GOT entry of function"); + } else continue; + + try writer.writeByte(std.wasm.opcode(.i32_add)); + try writer.writeByte(std.wasm.opcode(.global_set)); + try leb.writeULEB128(writer, wasm.imports.globalCount() + wasm.globals.count() + @as(u32, @intCast(got_index))); + } + try writer.writeByte(std.wasm.opcode(.end)); + + try wasm.createSyntheticFunction( + "__wasm_apply_global_tls_relocs", + std.wasm.Type{ .params = &.{}, .returns = &.{} }, + &function_body, + ); +} + +fn initializeCallCtorsFunction(wasm: *Wasm) !void { + var function_body = std.ArrayList(u8).init(wasm.base.allocator); + defer function_body.deinit(); + const writer = function_body.writer(); + + // Write locals count (we have none) + try leb.writeULEB128(writer, @as(u32, 0)); + + // call constructors + const import_count = wasm.imports.functionCount(); + for (wasm.init_funcs.items) |init_func_loc| { + const symbol = init_func_loc.getSymbol(wasm); + const func = wasm.functions.items.values()[symbol.index - import_count]; + const ty = wasm.func_types.items.items[func.type_index]; + + // Call function by its function index + try writer.writeByte(std.wasm.opcode(.call)); + try leb.writeULEB128(writer, symbol.index); + + // drop all returned values from the stack as __wasm_call_ctors has no return value + for (ty.returns) |_| { + try writer.writeByte(std.wasm.opcode(.drop)); + } + } + + // End function body + try writer.writeByte(std.wasm.opcode(.end)); + + try wasm.createSyntheticFunction( + "__wasm_call_ctors", + std.wasm.Type{ .params = &.{}, .returns = &.{} }, + &function_body, + ); +} + +fn createSyntheticFunction( + wasm: *Wasm, + symbol_name: []const u8, + func_ty: std.wasm.Type, + function_body: *std.ArrayList(u8), +) !void { + const loc = wasm.findGlobalSymbol(symbol_name) orelse + try wasm.createSyntheticSymbol(symbol_name, .function); + + // Update the symbol + const symbol = loc.getSymbol(wasm); + // create type (() -> nil) + const ty_index = try wasm.func_types.append(wasm.base.allocator, func_ty); + // create function with above type + symbol.index = try wasm.functions.append( + wasm.base.allocator, + .{ .file = null, .index = loc.sym_index }, + wasm.imports.functionCount(), + .{ .type_index = ty_index }, + ); + + // create the atom that will be output into the final binary + const atom = try wasm.base.allocator.create(Atom); + errdefer wasm.base.allocator.destroy(atom); + atom.* = .{ + .size = @as(u32, @intCast(function_body.items.len)), + .offset = 0, + .sym_index = loc.sym_index, + .file = null, + .alignment = 1, + .next = null, + .prev = null, + .code = function_body.moveToUnmanaged(), + }; + try wasm.managed_atoms.append(wasm.base.allocator, atom); + try wasm.appendAtomAtIndex(wasm.base.allocator, wasm.code_section_index.?, atom); + try wasm.symbol_atom.putNoClobber(wasm.base.allocator, loc, atom); + atom.offset = atom.prev.?.offset + atom.prev.?.size; +} + +fn initializeTLSFunction(wasm: *Wasm) !void { + if (!wasm.options.shared_memory) return; + + var function_body = std.ArrayList(u8).init(wasm.base.allocator); + defer function_body.deinit(); + const writer = function_body.writer(); + + // locals + try writer.writeByte(0); + + // If there's a TLS segment, initialize it during runtime using the bulk-memory feature + if (wasm.data_segments.getIndex(".tdata")) |data_index| { + const segment_index = wasm.data_segments.entries.items(.value)[data_index]; + const segment = wasm.segments.items[segment_index]; + + const param_local: u32 = 0; + + try writer.writeByte(std.wasm.opcode(.local_get)); + try leb.writeULEB128(writer, param_local); + + const tls_base_loc = wasm.findGlobalSymbol("__tls_base").?; + try writer.writeByte(std.wasm.opcode(.global_get)); + try leb.writeULEB128(writer, tls_base_loc.getSymbol(wasm).index); + + // load stack values for the bulk-memory operation + { + try writer.writeByte(std.wasm.opcode(.local_get)); + try leb.writeULEB128(writer, param_local); + + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, @as(u32, 0)); //segment offset + + try writer.writeByte(std.wasm.opcode(.i32_const)); + try leb.writeULEB128(writer, @as(u32, segment.size)); //segment offset + } + + // perform the bulk-memory operation to initialize the data segment + try writer.writeByte(std.wasm.opcode(.prefixed)); + try leb.writeULEB128(writer, @intFromEnum(std.wasm.PrefixedOpcode.memory_init)); + // segment immediate + try leb.writeULEB128(writer, @as(u32, @intCast(data_index))); + // memory index immediate (always 0) + try writer.writeByte(@as(u32, 0)); + } + + // If we have to perform any TLS relocations, call the corresponding function + // which performs all runtime TLS relocations. This is a synthetic function, + // generated by the linker. + if (wasm.findGlobalSymbol("__wasm_apply_global_tls_relocs")) |loc| { + try writer.writeByte(std.wasm.opcode(.call)); + try leb.writeULEB128(writer, loc.getSymbol(wasm).index); + } + + try writer.writeByte(std.wasm.opcode(.end)); + + try wasm.createSyntheticFunction( + "__wasm_init_tls", + std.wasm.Type{ .params = &.{.i32}, .returns = &.{} }, + &function_body, + ); +} + +fn mergeImports(wasm: *Wasm) !void { + if (wasm.options.import_table and wasm.elements.functionCount() > 0) { + const loc = wasm.findGlobalSymbol("__indirect_function_table").?; + const symbol = loc.getSymbol(wasm); + symbol.index = wasm.imports.tableCount(); + try wasm.imports.imported_tables.putNoClobber(wasm.base.allocator, .{ + .module_name = "env", + .name = "__indirect_function_table", + }, .{ .index = symbol.index, .table = .{ + .limits = .{ .min = wasm.elements.functionCount(), .max = null }, + .reftype = .funcref, + } }); + try wasm.imports.imported_symbols.append(wasm.base.allocator, loc); + } + + for (wasm.resolved_symbols.keys()) |sym_with_loc| { + const symbol = sym_with_loc.getSymbol(wasm); + if (symbol.tag != .data) { + if (!symbol.requiresImport()) { + continue; + } + if (std.mem.eql(u8, sym_with_loc.getName(wasm), "__indirect_function_table")) { + continue; + } + log.debug("Symbol '{s}' will be imported", .{sym_with_loc.getName(wasm)}); + try wasm.imports.appendSymbol(wasm.base.allocator, wasm, sym_with_loc); + } + } +} + +/// Sets up the memory section of the wasm module, as well as the stack. +fn setupMemory(wasm: *Wasm) !void { + log.debug("Setting up memory layout", .{}); + const page_size = std.wasm.page_size; + const stack_size = wasm.options.stack_size orelse page_size; + const stack_alignment = 16; // wasm's stack alignment as specified by tool-convention + const heap_alignment = 16; // wasm's heap alignment as specified by tool-convention + + // Always place the stack at the start by default + // unless the user specified the global-base flag + var place_stack_first = true; + var memory_ptr: u64 = if (wasm.options.global_base) |base| blk: { + place_stack_first = false; + break :blk base; + } else 0; + + if (place_stack_first) { + memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, stack_alignment); + memory_ptr += stack_size; + // We always put the stack pointer global at index 0 + wasm.globals.items.items[0].init.i32_const = @as(i32, @bitCast(@as(u32, @intCast(memory_ptr)))); + } + + var offset: u32 = @as(u32, @intCast(memory_ptr)); + var seg_it = wasm.data_segments.iterator(); + while (seg_it.next()) |entry| { + const segment: *Segment = &wasm.segments.items[entry.value_ptr.*]; + memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, segment.alignment); + + // set TLS-related symbols + if (mem.eql(u8, entry.key_ptr.*, ".tdata")) { + const global_count = wasm.imports.globalCount(); + if (wasm.findGlobalSymbol("__tls_size")) |loc| { + const sym = loc.getSymbol(wasm); + sym.index = try wasm.globals.append(wasm.base.allocator, global_count, .{ + .global_type = .{ .valtype = .i32, .mutable = false }, + .init = .{ .i32_const = @as(i32, @intCast(segment.size)) }, + }); + } + if (wasm.findGlobalSymbol("__tls_align")) |loc| { + const sym = loc.getSymbol(wasm); + sym.index = try wasm.globals.append(wasm.base.allocator, global_count, .{ + .global_type = .{ .valtype = .i32, .mutable = false }, + .init = .{ .i32_const = @as(i32, @intCast(segment.alignment)) }, + }); + } + if (wasm.findGlobalSymbol("__tls_base")) |loc| { + const sym = loc.getSymbol(wasm); + sym.index = try wasm.globals.append(wasm.base.allocator, wasm.imports.globalCount(), .{ + .global_type = .{ .valtype = .i32, .mutable = wasm.options.shared_memory }, + .init = .{ .i32_const = if (wasm.options.shared_memory) @as(i32, 0) else @as(i32, @intCast(memory_ptr)) }, + }); + } + } + + memory_ptr += segment.size; + segment.offset = offset; + offset += segment.size; + } + + // create the memory init flag which is used by the init memory function + if (wasm.options.shared_memory and wasm.hasPassiveInitializationSegments()) { + // align to pointer size + memory_ptr = mem.alignForwardGeneric(u64, memory_ptr, 4); + const loc = try wasm.createSyntheticSymbol("__wasm_init_memory_flag", .data); + const atom = try Atom.create(wasm.base.allocator); + atom.size = 0; + atom.sym_index = loc.sym_index; + atom.file = null; + try wasm.symbol_atom.put(wasm.base.allocator, loc, atom); + atom.offset = @as(u32, @intCast(memory_ptr)); + memory_ptr += 4; + } + + if (!place_stack_first) { + memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, stack_alignment); + memory_ptr += stack_size; + wasm.globals.items.items[0].init.i32_const = @as(i32, @bitCast(@as(u32, @intCast(memory_ptr)))); + } + + if (wasm.findGlobalSymbol("__heap_base")) |loc| { + const atom = wasm.symbol_atom.get(loc).?; + atom.offset = @as(u32, @intCast(mem.alignForwardGeneric(u64, memory_ptr, heap_alignment))); + } + + // Setup the max amount of pages + // For now we only support wasm32 by setting the maximum allowed memory size 2^32-1 + const max_memory_allowed: u64 = (1 << 32) - 1; + + if (wasm.options.initial_memory) |initial_memory| { + if (!std.mem.isAlignedGeneric(u64, initial_memory, page_size)) { + log.err("Initial memory must be {d}-byte aligned", .{page_size}); + return error.MissAlignment; + } + if (memory_ptr > initial_memory) { + log.err("Initial memory too small, must be at least {d} bytes", .{memory_ptr}); + return error.MemoryTooSmall; + } + if (initial_memory > max_memory_allowed) { + log.err("Initial memory exceeds maximum memory {d}", .{max_memory_allowed}); + return error.MemoryTooBig; + } + memory_ptr = initial_memory; + } + + memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, page_size); + + // In case we do not import memory, but define it ourselves, + // set the minimum amount of pages on the memory section. + wasm.memories.limits.min = @as(u32, @intCast(memory_ptr / page_size)); + log.debug("Total memory pages: {d}", .{wasm.memories.limits.min}); + + if (wasm.findGlobalSymbol("__heap_end")) |loc| { + const atom = wasm.symbol_atom.get(loc).?; + atom.offset = @as(u32, @intCast(memory_ptr)); + } + + if (wasm.options.max_memory != null or wasm.options.shared_memory) { + const max_memory = wasm.options.max_memory orelse memory_ptr; + if (!std.mem.isAlignedGeneric(u64, max_memory, page_size)) { + log.err("Maximum memory must be {d}-byte aligned", .{page_size}); + return error.MissAlignment; + } + if (memory_ptr > max_memory) { + log.err("Maxmimum memory too small, must be at least {d} bytes", .{memory_ptr}); + return error.MemoryTooSmall; + } + if (max_memory > max_memory_allowed) { + log.err("Maximum memory exceeds maxmium amount {d}", .{max_memory_allowed}); + return error.MemoryTooBig; + } + wasm.memories.limits.max = @as(u32, @intCast(max_memory / page_size)); + wasm.memories.limits.setFlag(.WASM_LIMITS_FLAG_HAS_MAX); + if (wasm.options.shared_memory) { + wasm.memories.limits.setFlag(.WASM_LIMITS_FLAG_IS_SHARED); + } + log.debug("Maximum memory pages: {?d}", .{wasm.memories.limits.max}); + } +} + +/// From a given object's index and the index of the segment, returns the corresponding +/// index of the segment within the final data section. When the segment does not yet +/// exist, a new one will be initialized and appended. The new index will be returned in that case. +pub fn getMatchingSegment(wasm: *Wasm, gpa: Allocator, object_index: u16, relocatable_index: u32) !?u32 { + const object: Object = wasm.objects.items[object_index]; + const relocatable_data = object.relocatable_data[relocatable_index]; + const index = @as(u32, @intCast(wasm.segments.items.len)); + + switch (relocatable_data.type) { + .data => { + const segment_info = object.segment_info[relocatable_data.index]; + const segment_name = segment_info.outputName(wasm.options.merge_data_segments); + const result = try wasm.data_segments.getOrPut(gpa, segment_name); + if (!result.found_existing) { + result.value_ptr.* = index; + var flags: u32 = 0; + if (wasm.options.shared_memory) { + flags |= @intFromEnum(Segment.Flag.WASM_DATA_SEGMENT_IS_PASSIVE); + } + try wasm.segments.append(gpa, .{ + .alignment = 1, + .size = 0, + .offset = 0, + .flags = flags, + }); + return index; + } else return result.value_ptr.*; + }, + .code => return wasm.code_section_index orelse blk: { + wasm.code_section_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }, + .debug => { + const debug_name = object.getDebugName(relocatable_data); + if (mem.eql(u8, debug_name, ".debug_info")) { + return wasm.debug_info_index orelse blk: { + wasm.debug_info_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }; + } else if (mem.eql(u8, debug_name, ".debug_line")) { + return wasm.debug_line_index orelse blk: { + wasm.debug_line_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }; + } else if (mem.eql(u8, debug_name, ".debug_loc")) { + return wasm.debug_loc_index orelse blk: { + wasm.debug_loc_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }; + } else if (mem.eql(u8, debug_name, ".debug_ranges")) { + return wasm.debug_line_index orelse blk: { + wasm.debug_ranges_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }; + } else if (mem.eql(u8, debug_name, ".debug_pubnames")) { + return wasm.debug_pubnames_index orelse blk: { + wasm.debug_pubnames_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }; + } else if (mem.eql(u8, debug_name, ".debug_pubtypes")) { + return wasm.debug_pubtypes_index orelse blk: { + wasm.debug_pubtypes_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }; + } else if (mem.eql(u8, debug_name, ".debug_abbrev")) { + return wasm.debug_abbrev_index orelse blk: { + wasm.debug_abbrev_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }; + } else if (mem.eql(u8, debug_name, ".debug_str")) { + return wasm.debug_str_index orelse blk: { + wasm.debug_str_index = index; + try wasm.appendDummySegment(gpa); + break :blk index; + }; + } else { + log.warn("found unknown debug section '{s}'", .{debug_name}); + log.warn(" debug section will be skipped", .{}); + return null; + } + }, + } +} + +/// Appends a new segment with default field values +fn appendDummySegment(wasm: *Wasm, gpa: Allocator) !void { + try wasm.segments.append(gpa, .{ + .alignment = 1, + .size = 0, + .offset = 0, + .flags = 0, + }); +} + +/// From a given index, append the given `Atom` at the back of the linked list. +/// Simply inserts it into the map of atoms when it doesn't exist yet. +pub fn appendAtomAtIndex(wasm: *Wasm, gpa: Allocator, index: u32, atom: *Atom) !void { + if (wasm.atoms.getPtr(index)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try wasm.atoms.putNoClobber(gpa, index, atom); + } +} + +/// Sorts the data segments into the preferred order of: +/// - .rodata +/// - .data +/// - .text +/// - (.bss) +fn sortDataSegments(wasm: *Wasm, gpa: Allocator) !void { + var new_mapping: std.StringArrayHashMapUnmanaged(u32) = .{}; + try new_mapping.ensureUnusedCapacity(gpa, wasm.data_segments.count()); + errdefer new_mapping.deinit(gpa); + + const keys = try gpa.dupe([]const u8, wasm.data_segments.keys()); + defer gpa.free(keys); + + const SortContext = struct { + fn sort(_: void, lhs: []const u8, rhs: []const u8) bool { + return order(lhs) <= order(rhs); + } + + fn order(name: []const u8) u8 { + if (mem.startsWith(u8, name, ".rodata")) return 0; + if (mem.startsWith(u8, name, ".data")) return 1; + if (mem.startsWith(u8, name, ".text")) return 2; + return 3; + } + }; + + std.sort.sort([]const u8, keys, {}, SortContext.sort); + for (keys) |key| { + const segment_index = wasm.data_segments.get(key).?; + new_mapping.putAssumeCapacity(key, segment_index); + } + wasm.data_segments.deinit(gpa); + wasm.data_segments = new_mapping; +} + +fn allocateAtoms(wasm: *Wasm) !void { + // first sort the data segments + try wasm.sortDataSegments(wasm.base.allocator); + + var it = wasm.atoms.iterator(); + while (it.next()) |entry| { + const segment = &wasm.segments.items[entry.key_ptr.*]; + var atom: *Atom = entry.value_ptr.*.getFirst(); + var offset: u32 = 0; + while (true) { + const symbol_loc = atom.symbolLoc(); + if (wasm.code_section_index) |index| { + if (entry.key_ptr.* == index) { + if (!wasm.resolved_symbols.contains(symbol_loc)) { + atom = atom.next orelse break; + continue; + } + } + } + offset = std.mem.alignForwardGeneric(u32, offset, atom.alignment); + atom.offset = offset; + offset += atom.size; + atom = atom.next orelse break; + } + segment.size = std.mem.alignForwardGeneric(u32, offset, segment.alignment); + } +} + +fn setupStart(wasm: *Wasm) !void { + if (wasm.options.no_entry) return; + const entry_name = wasm.options.entry_name orelse "_start"; + const entry_loc = wasm.findGlobalSymbol(entry_name) orelse { + log.err("Entry symbol '{s}' does not exist, use '--no-entry' to suppress", .{entry_name}); + return error.MissingSymbol; + }; + + const symbol = entry_loc.getSymbol(wasm); + if (symbol.tag != .function) { + log.err("Entry symbol '{s}' is not a function", .{entry_name}); + return error.InvalidEntryKind; + } + // Simply export the symbol as the start function is reserved + // for synthetic symbols such as __wasm_start, __wasm_init_memory, and + // __wasm_apply_global_relocs + symbol.setFlag(.WASM_SYM_EXPORTED); +} + +fn validateFeatures(wasm: *Wasm) !void { + const infer = wasm.options.features.len == 0; // when the user did not define any features, we infer them from linked objects. + const known_features_count = types.known_features.kvs.len; + + var allowed: FeatureSet = .{}; + var used = [_]u17{0} ** known_features_count; + var disallowed = [_]u17{0} ** known_features_count; + var required = [_]u17{0} ** known_features_count; + + // when false, we fail linking. We only verify this after a loop to catch all invalid features. + var valid_feature_set = true; + + // When the user has given an explicit list of features to enable, + // we extract them and insert each into the 'allowed' list. + if (!infer) { + var it = std.mem.split(u8, wasm.options.features, ","); + while (it.next()) |feature_name| { + const feature = types.known_features.get(feature_name) orelse { + log.err("Unknown feature name '{s}' passed as option", .{feature_name}); + return error.UnknownFeature; + }; + allowed.enable(feature); + } + } + + // will be set to true when there's any TLS segment found in any of the object files + var has_tls = false; + + // extract all the used, disallowed and required features from each + // linked object file so we can test them. + for (wasm.objects.items, 0..) |object, object_index| { + for (object.features) |feature| { + const value = @as(u16, @intCast(object_index)) << 1 | @as(u1, 1); + switch (feature.prefix) { + .used => { + used[@intFromEnum(feature.tag)] = value; + }, + .disallowed => { + disallowed[@intFromEnum(feature.tag)] = value; + }, + .required => { + required[@intFromEnum(feature.tag)] = value; + used[@intFromEnum(feature.tag)] = value; + }, + } + } + + for (object.segment_info) |segment| { + if (segment.isTLS()) { + has_tls = true; + } + } + } + + // when we infer the features, we allow each feature found in the 'used' set + // and insert it into the 'allowed' set. When features are not inferred, + // we validate that a used feature is allowed. + for (used, 0..) |used_set, used_index| { + const is_enabled = @as(u1, @truncate(used_set)) != 0; + if (!is_enabled) continue; + const feature = @as(types.Feature.Tag, @enumFromInt(used_index)); + if (infer) { + allowed.enable(feature); + } else if (!allowed.isEnabled(feature)) { + log.err("feature '{}' not allowed, but used by linked object", .{feature}); + log.err(" defined in '{s}'", .{wasm.objects.items[used_set >> 1].name}); + valid_feature_set = false; + } + } + + if (!valid_feature_set) { + return error.InvalidFeatureSet; + } + + if (wasm.options.shared_memory) { + const disallowed_feature = disallowed[@intFromEnum(types.Feature.Tag.shared_mem)]; + if (@as(u1, @truncate(disallowed_feature)) != 0) { + log.err( + "--shared-memory is disallowed by '{s}' because it wasn't compiled with 'atomics' and 'bulk-memory' features enabled", + .{wasm.objects.items[disallowed_feature >> 1].name}, + ); + valid_feature_set = false; + } + + for ([_]types.Feature.Tag{ .atomics, .bulk_memory }) |feature| { + if (!allowed.isEnabled(feature)) { + log.err("feature '{}' is not used but is required for --shared-memory", .{feature}); + } + } + } + + if (has_tls) { + for ([_]types.Feature.Tag{ .atomics, .bulk_memory }) |feature| { + if (!allowed.isEnabled(feature)) { + log.err("feature '{}' is not used but is required for thread-local storage", .{feature}); + } + } + } + + // For each linked object, validate the required and disallowed features + for (wasm.objects.items) |object| { + var object_used_features = [_]bool{false} ** known_features_count; + for (object.features) |feature| { + if (feature.prefix == .disallowed) continue; // already defined in 'disallowed' set. + // from here a feature is always used + const disallowed_feature = disallowed[@intFromEnum(feature.tag)]; + if (@as(u1, @truncate(disallowed_feature)) != 0) { + log.err("feature '{}' is disallowed, but used by linked object", .{feature.tag}); + log.err(" disallowed by '{s}'", .{wasm.objects.items[disallowed_feature >> 1].name}); + log.err(" used in '{s}'", .{object.name}); + valid_feature_set = false; + } + + object_used_features[@intFromEnum(feature.tag)] = true; + } + + // validate the linked object file has each required feature + for (required, 0..) |required_feature, feature_index| { + const is_required = @as(u1, @truncate(required_feature)) != 0; + if (is_required and !object_used_features[feature_index]) { + log.err("feature '{}' is required but not used in linked object", .{(@as(types.Feature.Tag, @enumFromInt(feature_index)))}); + log.err(" required by '{s}'", .{wasm.objects.items[required_feature >> 1].name}); + log.err(" missing in '{s}'", .{object.name}); + valid_feature_set = false; + } + } + } + + if (!valid_feature_set) { + return error.InvalidFeatureSet; + } + + wasm.used_features = allowed; +} + +/// From a given unsigned integer, returns the size it takes +/// in bytes to store the integer using leb128-encoding. +pub fn getULEB128Size(uint_value: anytype) u32 { + const T = @TypeOf(uint_value); + const U = if (@typeInfo(T).Int.bits < 8) u8 else T; + var value = @as(U, @intCast(uint_value)); + + var size: u32 = 0; + while (value != 0) : (size += 1) { + value >>= 7; + } + return size; +} diff --git a/src/archive/archive/zld/Wasm/Archive.zig b/src/archive/archive/zld/Wasm/Archive.zig new file mode 100644 index 000000000000..1802ce0afd7a --- /dev/null +++ b/src/archive/archive/zld/Wasm/Archive.zig @@ -0,0 +1,223 @@ +const Archive = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.wasm); +const mem = std.mem; + +const Allocator = mem.Allocator; +const Object = @import("Object.zig"); + +file: fs.File, +name: []const u8, + +header: ar_hdr = undefined, + +/// A list of long file names, delimited by a LF character (0x0a). +/// This is stored as a single slice of bytes, as the header-names +/// point to the character index of a file name, rather than the index +/// in the list. +long_file_names: []const u8 = &.{}, + +/// Parsed table of contents. +/// Each symbol name points to a list of all definition +/// sites within the current static archive. +toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{}, + +// Archive files start with the ARMAG identifying string. Then follows a +// `struct ar_hdr', and as many bytes of member file data as its `ar_size' +// member indicates, for each member file. +/// String that begins an archive file. +const ARMAG: *const [SARMAG:0]u8 = "!\n"; +/// Size of that string. +const SARMAG: u4 = 8; + +/// String in ar_fmag at the end of each header. +const ARFMAG: *const [2:0]u8 = "`\n"; + +const ar_hdr = extern struct { + /// Member file name, sometimes / terminated. + ar_name: [16]u8, + + /// File date, decimal seconds since Epoch. + ar_date: [12]u8, + + /// User ID, in ASCII format. + ar_uid: [6]u8, + + /// Group ID, in ASCII format. + ar_gid: [6]u8, + + /// File mode, in ASCII octal. + ar_mode: [8]u8, + + /// File size, in ASCII decimal. + ar_size: [10]u8, + + /// Always contains ARFMAG. + ar_fmag: [2]u8, + + const NameOrIndex = union(enum) { + name: []const u8, + index: u32, + }; + + fn nameOrIndex(archive: ar_hdr) !NameOrIndex { + const value = getValue(&archive.ar_name); + const slash_index = mem.indexOfScalar(u8, value, '/') orelse return error.MalformedArchive; + const len = value.len; + if (slash_index == len - 1) { + // Name stored directly + return NameOrIndex{ .name = value }; + } else { + // Name follows the header directly and its length is encoded in + // the name field. + const index = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10); + return NameOrIndex{ .index = index }; + } + } + + fn date(archive: ar_hdr) !u64 { + const value = getValue(&archive.ar_date); + return std.fmt.parseInt(u64, value, 10); + } + + fn size(archive: ar_hdr) !u32 { + const value = getValue(&archive.ar_size); + return std.fmt.parseInt(u32, value, 10); + } + + fn getValue(raw: []const u8) []const u8 { + return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)}); + } +}; + +pub fn deinit(archive: *Archive, allocator: Allocator) void { + archive.file.close(); + for (archive.toc.keys()) |*key| { + allocator.free(key.*); + } + for (archive.toc.values()) |*value| { + value.deinit(allocator); + } + archive.toc.deinit(allocator); + allocator.free(archive.long_file_names); +} + +pub fn parse(archive: *Archive, allocator: Allocator) !void { + const reader = archive.file.reader(); + + const magic = try reader.readBytesNoEof(SARMAG); + if (!mem.eql(u8, &magic, ARMAG)) { + log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); + return error.NotArchive; + } + + archive.header = try reader.readStruct(ar_hdr); + if (!mem.eql(u8, &archive.header.ar_fmag, ARFMAG)) { + log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, archive.header.ar_fmag }); + return error.NotArchive; + } + + try archive.parseTableOfContents(allocator, reader); + try archive.parseNameTable(allocator, reader); +} + +fn parseName(archive: *const Archive, header: ar_hdr) ![]const u8 { + const name_or_index = try header.nameOrIndex(); + switch (name_or_index) { + .name => |name| return name, + .index => |index| { + const name = mem.sliceTo(archive.long_file_names[index..], 0x0a); + return mem.trimRight(u8, name, "/"); + }, + } +} + +fn parseTableOfContents(archive: *Archive, allocator: Allocator, reader: anytype) !void { + // size field can have extra spaces padded in front as well as the end, + // so we trim those first before parsing the ASCII value. + const size_trimmed = mem.trim(u8, &archive.header.ar_size, " "); + const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10); + + const num_symbols = try reader.readIntBig(u32); + const symbol_positions = try allocator.alloc(u32, num_symbols); + defer allocator.free(symbol_positions); + for (symbol_positions) |*index| { + index.* = try reader.readIntBig(u32); + } + + const sym_tab = try allocator.alloc(u8, sym_tab_size - 4 - (4 * num_symbols)); + defer allocator.free(sym_tab); + + reader.readNoEof(sym_tab) catch { + log.err("incomplete symbol table: expected symbol table of length 0x{x}", .{sym_tab.len}); + return error.MalformedArchive; + }; + + var i: usize = 0; + var pos: usize = 0; + while (i < num_symbols) : (i += 1) { + const string = mem.sliceTo(sym_tab[pos..], 0); + pos += string.len + 1; + if (string.len == 0) continue; + + const name = try allocator.dupe(u8, string); + errdefer allocator.free(name); + const gop = try archive.toc.getOrPut(allocator, name); + if (gop.found_existing) { + allocator.free(name); + } else { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(allocator, symbol_positions[i]); + } +} + +fn parseNameTable(archive: *Archive, allocator: Allocator, reader: anytype) !void { + const header: ar_hdr = try reader.readStruct(ar_hdr); + if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) { + log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag }); + return error.MalformedArchive; + } + if (!mem.eql(u8, header.ar_name[0..2], "//")) { + log.err("invalid archive. Long name table missing", .{}); + return error.MalformedArchive; + } + const table_size = try header.size(); + const long_file_names = try allocator.alloc(u8, table_size); + errdefer allocator.free(long_file_names); + try reader.readNoEof(long_file_names); + archive.long_file_names = long_file_names; +} + +/// From a given file offset, starts reading for a file header. +/// When found, parses the object file into an `Object` and returns it. +pub fn parseObject(archive: Archive, allocator: Allocator, file_offset: u32) !Object { + try archive.file.seekTo(file_offset); + const reader = archive.file.reader(); + const header = try reader.readStruct(ar_hdr); + const current_offset = try archive.file.getPos(); + try archive.file.seekTo(0); + + if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) { + log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag }); + return error.MalformedArchive; + } + + const object_name = try archive.parseName(header); + const name = name: { + var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.os.realpath(archive.name, &buffer); + break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); + }; + defer allocator.free(name); + + const object_file = try std.fs.cwd().openFile(archive.name, .{}); + errdefer object_file.close(); + + const object_file_size = try header.size(); + try object_file.seekTo(current_offset); + return Object.create(allocator, object_file, name, object_file_size); +} diff --git a/src/archive/archive/zld/Wasm/Atom.zig b/src/archive/archive/zld/Wasm/Atom.zig new file mode 100644 index 000000000000..e2dd57d0911d --- /dev/null +++ b/src/archive/archive/zld/Wasm/Atom.zig @@ -0,0 +1,197 @@ +const Atom = @This(); + +const std = @import("std"); +const types = @import("types.zig"); +const Wasm = @import("../Wasm.zig"); +const Symbol = @import("Symbol.zig"); + +const leb = std.leb; +const log = std.log.scoped(.wasm); +const mem = std.mem; +const Allocator = mem.Allocator; + +/// Local symbol index +sym_index: u32, +/// Index into a list of object files +file: ?u16, +/// Size of the atom, used to calculate section sizes in the final binary +size: u32, +/// List of relocations belonging to this atom +relocs: std.ArrayListUnmanaged(types.Relocation) = .{}, +/// Contains the binary data of an atom, which can be non-relocated +code: std.ArrayListUnmanaged(u8) = .{}, +/// For code this is 1, for data this is set to the highest value of all segments +alignment: u32, +/// Offset into the section where the atom lives, this already accounts +/// for alignment. +offset: u32, + +/// Next atom in relation to this atom. +/// When null, this atom is the last atom +next: ?*Atom, +/// Previous atom in relation to this atom. +/// is null when this atom is the first in its order +prev: ?*Atom, + +/// Represents a default empty wasm `Atom` +pub const empty: Atom = .{ + .alignment = 0, + .file = null, + .next = null, + .offset = 0, + .prev = null, + .size = 0, + .sym_index = undefined, +}; + +/// Creates a new Atom with default fields +pub fn create(gpa: Allocator) !*Atom { + const atom = try gpa.create(Atom); + atom.* = .{ + .sym_index = undefined, + .alignment = 0, + .file = null, + .next = null, + .offset = 0, + .prev = null, + .size = 0, + }; + return atom; +} + +/// Frees all resources owned by this `Atom`. +/// Also destroys itatom, making any usage of this atom illegal. +pub fn deinit(atom: *Atom, gpa: Allocator) void { + atom.relocs.deinit(gpa); + atom.code.deinit(gpa); + gpa.destroy(atom); +} + +pub fn format(atom: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + writer.print("Atom{{ .sym_index = {d}, .alignment = {d}, .size = {d}, .offset = 0x{x:0>8} }}", .{ + atom.sym_index, + atom.alignment, + atom.size, + atom.offset, + }); +} + +/// Returns the first `Atom` from a given atom +pub fn getFirst(atom: *Atom) *Atom { + var tmp = atom; + while (tmp.prev) |prev| tmp = prev; + return tmp; +} + +/// Returns the location of the symbol that represents this `Atom` +pub fn symbolLoc(atom: *const Atom) Wasm.SymbolWithLoc { + return .{ .file = atom.file, .sym_index = atom.sym_index }; +} + +/// Returns the virtual address of the `Atom`. This is the address starting +/// from the first entry within a section. +pub fn getVA(atom: *const Atom, wasm_bin: *const Wasm, symbol: *const Symbol) u32 { + if (symbol.tag == .function) return atom.offset; + std.debug.assert(symbol.tag == .data); + const file_index = atom.file orelse return atom.offset; // offset contains VA for synthetic atoms + const merge_segment = wasm_bin.options.merge_data_segments; + const segment_info = wasm_bin.objects.items[file_index].segment_info; + const segment_name = segment_info[symbol.index].outputName(merge_segment); + const segment_index = wasm_bin.data_segments.get(segment_name).?; + const segment = wasm_bin.segments.items[segment_index]; + return segment.offset + atom.offset; +} + +/// Resolves the relocations within the atom, writing the new value +/// at the calculated offset. +pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { + if (atom.relocs.items.len == 0) return; + + for (atom.relocs.items) |reloc| { + const value = atom.relocationValue(reloc, wasm_bin); + switch (reloc.relocation_type) { + .R_WASM_TABLE_INDEX_I32, + .R_WASM_FUNCTION_OFFSET_I32, + .R_WASM_GLOBAL_INDEX_I32, + .R_WASM_MEMORY_ADDR_I32, + .R_WASM_SECTION_OFFSET_I32, + => std.mem.writeIntLittle(u32, atom.code.items[reloc.offset..][0..4], @as(u32, @intCast(value))), + .R_WASM_TABLE_INDEX_I64, + .R_WASM_MEMORY_ADDR_I64, + => std.mem.writeIntLittle(u64, atom.code.items[reloc.offset..][0..8], value), + .R_WASM_GLOBAL_INDEX_LEB, + .R_WASM_EVENT_INDEX_LEB, + .R_WASM_FUNCTION_INDEX_LEB, + .R_WASM_MEMORY_ADDR_LEB, + .R_WASM_MEMORY_ADDR_SLEB, + .R_WASM_TABLE_INDEX_SLEB, + .R_WASM_TABLE_NUMBER_LEB, + .R_WASM_TYPE_INDEX_LEB, + .R_WASM_MEMORY_ADDR_TLS_SLEB, + => leb.writeUnsignedFixed(5, atom.code.items[reloc.offset..][0..5], @as(u32, @intCast(value))), + .R_WASM_MEMORY_ADDR_LEB64, + .R_WASM_MEMORY_ADDR_SLEB64, + .R_WASM_TABLE_INDEX_SLEB64, + .R_WASM_MEMORY_ADDR_TLS_SLEB64, + => leb.writeUnsignedFixed(10, atom.code.items[reloc.offset..][0..10], value), + } + } +} + +/// From a given `relocation` will return the new value to be written. +/// All values will be represented as a `u64` as all values can fit within it. +/// The final value must be casted to the correct size. +fn relocationValue(atom: *Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 { + const target_loc = (Wasm.SymbolWithLoc{ .file = atom.file, .sym_index = relocation.index }).finalLoc(wasm_bin); + const symbol = target_loc.getSymbol(wasm_bin); + switch (relocation.relocation_type) { + .R_WASM_FUNCTION_INDEX_LEB => return symbol.index, + .R_WASM_TABLE_NUMBER_LEB => return symbol.index, + .R_WASM_TABLE_INDEX_I32, + .R_WASM_TABLE_INDEX_I64, + .R_WASM_TABLE_INDEX_SLEB, + .R_WASM_TABLE_INDEX_SLEB64, + => return wasm_bin.elements.indirect_functions.get(target_loc) orelse 0, + .R_WASM_TYPE_INDEX_LEB => { + const original_type = wasm_bin.objects.items[atom.file.?].func_types[relocation.index]; + return wasm_bin.func_types.find(original_type).?; + }, + .R_WASM_GLOBAL_INDEX_I32, + .R_WASM_GLOBAL_INDEX_LEB, + => return symbol.index, + .R_WASM_MEMORY_ADDR_I32, + .R_WASM_MEMORY_ADDR_I64, + .R_WASM_MEMORY_ADDR_LEB, + .R_WASM_MEMORY_ADDR_LEB64, + .R_WASM_MEMORY_ADDR_SLEB, + .R_WASM_MEMORY_ADDR_SLEB64, + => { + std.debug.assert(symbol.tag == .data); + if (symbol.isUndefined()) { + return 0; + } + const target_atom = wasm_bin.symbol_atom.get(target_loc).?; + const va = @as(i32, @intCast(target_atom.getVA(wasm_bin, symbol))); + return @as(u32, @intCast(va + relocation.addend)); + }, + .R_WASM_EVENT_INDEX_LEB => return symbol.index, + .R_WASM_SECTION_OFFSET_I32 => { + const target_atom = wasm_bin.symbol_atom.get(target_loc).?; + const rel_value = @as(i32, @intCast(target_atom.offset)) + relocation.addend; + return @as(u32, @intCast(rel_value)); + }, + .R_WASM_FUNCTION_OFFSET_I32 => { + const target_atom = wasm_bin.symbol_atom.get(target_loc).?; + const offset: u32 = 11 + Wasm.getULEB128Size(target_atom.size); // Header (11 bytes fixed-size) + body size (leb-encoded) + const rel_value = @as(i32, @intCast(target_atom.offset + offset)) + relocation.addend; + return @as(u32, @intCast(rel_value)); + }, + .R_WASM_MEMORY_ADDR_TLS_SLEB, + .R_WASM_MEMORY_ADDR_TLS_SLEB64, + => { + @panic("TODO: Implement TLS relocations"); + }, + } +} diff --git a/src/archive/archive/zld/Wasm/Object.zig b/src/archive/archive/zld/Wasm/Object.zig new file mode 100644 index 000000000000..331242e64bcb --- /dev/null +++ b/src/archive/archive/zld/Wasm/Object.zig @@ -0,0 +1,1010 @@ +//! Object represents a wasm object file. When initializing a new +//! `Object`, it will parse the contents of a given file handler, and verify +//! the data on correctness. The result can then be used by the linker. +const Object = @This(); + +const Atom = @import("Atom.zig"); +const types = @import("types.zig"); +const std = @import("std"); +const Wasm = @import("../Wasm.zig"); +const Symbol = @import("Symbol.zig"); + +const Allocator = std.mem.Allocator; +const leb = std.leb; +const meta = std.meta; + +const log = std.log.scoped(.wasm); + +/// Wasm spec version used for this `Object` +version: u32 = 0, +/// The file descriptor that represents the wasm object file. +file: ?std.fs.File = null, +/// Name (read path) of the object file. +name: []const u8, +/// Parsed type section +func_types: []const std.wasm.Type = &.{}, +/// A list of all imports for this module +imports: []const types.Import = &.{}, +/// Parsed function section +functions: []const std.wasm.Func = &.{}, +/// Parsed table section +tables: []const types.Table = &.{}, +/// Parsed memory section +memories: []const types.Memory = &.{}, +/// Parsed global section +globals: []const std.wasm.Global = &.{}, +/// Parsed export section +exports: []const types.Export = &.{}, +/// Parsed element section +elements: []const std.wasm.Element = &.{}, +/// Represents the function ID that must be called on startup. +/// This is `null` by default as runtimes may determine the startup +/// function themselves. This is essentially legacy. +start: ?u32 = null, +/// A slice of features that tell the linker what features are mandatory, +/// used (or therefore missing) and must generate an error when another +/// object uses features that are not supported by the other. +features: []const types.Feature = &.{}, +/// A table that maps the relocations we must perform where the key represents +/// the section that the list of relocations applies to. +relocations: std.AutoArrayHashMapUnmanaged(u32, []types.Relocation) = .{}, +/// Table of symbols belonging to this Object file +symtable: []Symbol = &.{}, +/// Extra metadata about the linking section, such as alignment of segments and their name +segment_info: []const types.Segment = &.{}, +/// A sequence of function initializers that must be called on startup +init_funcs: []const types.InitFunc = &.{}, +/// Comdat information +comdat_info: []const types.Comdat = &.{}, +/// Represents non-synthetic sections that can essentially be mem-cpy'd into place +/// after performing relocations. +relocatable_data: []const RelocatableData = &.{}, +/// String table for all strings required by the object file, such as symbol names, +/// import name, module name and export names. Each string will be deduplicated +/// and returns an offset into the table. +string_table: Wasm.StringTable = .{}, +/// All the names of each debug section found in the current object file. +/// Each name is terminated by a null-terminator. The name can be found, +/// from the `index` offset within the `RelocatableData`. +debug_names: [:0]const u8, +/// Contains the entire `producers` section as a single slice of bytes. +/// Must be parsed to extract its data. This is done so we only parse it +/// when its data is actually needed. +producers: []const u8 = &.{}, + +/// Represents a single item within a section (depending on its `type`) +const RelocatableData = struct { + /// The type of the relocatable data + type: enum { data, code, debug }, + /// Pointer to the data of the segment, where its length is written to `size` + data: [*]u8, + /// The size in bytes of the data representing the segment within the section + size: u32, + /// The index within the section itself, or in case of a debug section, + /// the offset within the `string_table`. + index: u32, + /// The offset within the section where the data starts + offset: u32, + /// Represents the index of the section it belongs to + section_index: u32, + + /// Returns the alignment of the segment, by retrieving it from the segment + /// meta data of the given object file. + /// NOTE: Alignment is encoded as a power of 2, so we shift the symbol's + /// alignment to retrieve the natural alignment. + pub fn getAlignment(relocatable_data: RelocatableData, object: *const Object) u32 { + if (relocatable_data.type != .data) return 1; + const data_alignment = object.segment_info[relocatable_data.index].alignment; + if (data_alignment == 0) return 1; + // Decode from power of 2 to natural alignment + return @as(u32, 1) << @as(u5, @intCast(data_alignment)); + } + + /// Returns the symbol kind that corresponds to the relocatable section + pub fn getSymbolKind(relocatable_data: RelocatableData) Symbol.Tag { + return switch (relocatable_data.type) { + .data => .data, + .code => .function, + .debug => .section, + }; + } + + /// Returns the index within a section, or in case of a debug section, + /// returns the section index within the object file. + pub fn getIndex(relocatable_data: RelocatableData) u32 { + if (relocatable_data.type == .debug) return relocatable_data.section_index; + return relocatable_data.index; + } +}; + +pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadError; + +/// Initializes a new `Object` from a wasm object file. +/// This also parses and verifies the object file. +/// When a max size is given, will only parse up to the given size, +/// else will read until the end of the file. +pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_size: ?usize) InitError!Object { + var object: Object = .{ + .file = file, + .name = try gpa.dupe(u8, name), + .debug_names = &.{}, + }; + + var is_object_file: bool = false; + const size = maybe_max_size orelse size: { + errdefer gpa.free(object.name); + const stat = try file.stat(); + break :size @as(usize, @intCast(stat.size)); + }; + + const file_contents = try gpa.alloc(u8, size); + defer gpa.free(file_contents); + var file_reader = file.reader(); + var read: usize = 0; + while (read < size) { + const n = try file_reader.read(file_contents[read..]); + std.debug.assert(n != 0); + read += n; + } + var fbs = std.io.fixedBufferStream(file_contents); + + try object.parse(gpa, fbs.reader(), &is_object_file); + errdefer object.deinit(gpa); + if (!is_object_file) return error.NotObjectFile; + + return object; +} + +/// Frees all memory of `Object` at once. The given `Allocator` must be +/// the same allocator that was used when `init` was called. +pub fn deinit(object: *Object, gpa: Allocator) void { + if (object.file) |file| { + file.close(); + } + for (object.func_types) |func_ty| { + gpa.free(func_ty.params); + gpa.free(func_ty.returns); + } + gpa.free(object.func_types); + gpa.free(object.functions); + gpa.free(object.imports); + gpa.free(object.tables); + gpa.free(object.memories); + gpa.free(object.globals); + gpa.free(object.exports); + for (object.elements) |el| { + gpa.free(el.func_indexes); + } + gpa.free(object.elements); + gpa.free(object.features); + gpa.free(object.producers); + for (object.relocations.values()) |val| { + gpa.free(val); + } + object.relocations.deinit(gpa); + gpa.free(object.symtable); + gpa.free(object.comdat_info); + gpa.free(object.init_funcs); + for (object.segment_info) |info| { + gpa.free(info.name); + } + gpa.free(object.segment_info); + for (object.relocatable_data) |rel_data| { + gpa.free(rel_data.data[0..rel_data.size]); + } + gpa.free(object.relocatable_data); + object.string_table.deinit(gpa); + gpa.free(object.name); + object.* = undefined; +} + +/// Finds the import within the list of imports from a given kind and index of that kind. +/// Asserts the import exists +pub fn findImport(object: *const Object, import_kind: std.wasm.ExternalKind, index: u32) types.Import { + var i: u32 = 0; + return for (object.imports) |import| { + if (std.meta.activeTag(import.kind) == import_kind) { + if (i == index) return import; + i += 1; + } + } else unreachable; // Only existing imports are allowed to be found +} + +/// Counts the entries of imported `kind` and returns the result +pub fn importedCountByKind(object: *const Object, kind: std.wasm.ExternalKind) u32 { + var i: u32 = 0; + return for (object.imports) |imp| { + if (@as(std.wasm.ExternalKind, imp.kind) == kind) i += 1; + } else i; +} + +/// From a given `RelocatableDate`, find the corresponding debug section name +pub fn getDebugName(object: *const Object, relocatable_data: RelocatableData) []const u8 { + return object.string_table.get(relocatable_data.index); +} + +/// Checks if the object file is an MVP version. +/// When that's the case, we check if there's an import table definiton with its name +/// set to '__indirect_function_table". When that's also the case, +/// we initialize a new table symbol that corresponds to that import and return that symbol. +/// +/// When the object file is *NOT* MVP, we return `null`. +fn checkLegacyIndirectFunctionTable(object: *Object) !?Symbol { + var table_count: usize = 0; + for (object.symtable) |sym| { + if (sym.tag == .table) table_count += 1; + } + + const import_table_count = object.importedCountByKind(.table); + + // For each import table, we also have a symbol so this is not a legacy object file + if (import_table_count == table_count) return null; + + if (table_count != 0) { + log.err("Expected a table entry symbol for each of the {d} table(s), but instead got {d} symbols.", .{ + import_table_count, + table_count, + }); + return error.MissingTableSymbols; + } + + // MVP object files cannot have any table definitions, only imports (for the indirect function table). + if (object.tables.len > 0) { + log.err("Unexpected table definition without representing table symbols.", .{}); + return error.UnexpectedTable; + } + + if (import_table_count != 1) { + log.err("Found more than one table import, but no representing table symbols", .{}); + return error.MissingTableSymbols; + } + + var table_import: types.Import = for (object.imports) |imp| { + if (imp.kind == .table) { + break imp; + } + } else unreachable; + + if (!std.mem.eql(u8, object.string_table.get(table_import.name), "__indirect_function_table")) { + log.err("Non-indirect function table import '{s}' is missing a corresponding symbol", .{object.string_table.get(table_import.name)}); + return error.MissingTableSymbols; + } + + var table_symbol: Symbol = .{ + .flags = 0, + .name = table_import.name, + .tag = .table, + .index = 0, + }; + table_symbol.setFlag(.WASM_SYM_UNDEFINED); + table_symbol.setFlag(.WASM_SYM_NO_STRIP); + return table_symbol; +} + +/// Error set containing parsing errors. +/// Merged with reader's errorset by `Parser` +pub const ParseError = error{ + /// The magic byte is either missing or does not contain \0Asm + InvalidMagicByte, + /// The wasm version is either missing or does not match the supported version. + InvalidWasmVersion, + /// Expected the functype byte while parsing the Type section but did not find it. + ExpectedFuncType, + /// Missing an 'end' opcode when defining a constant expression. + MissingEndForExpression, + /// Missing an 'end' opcode at the end of a body expression. + MissingEndForBody, + /// The size defined in the section code mismatches with the actual payload size. + MalformedSection, + /// Stream has reached the end. Unreachable for caller and must be handled internally + /// by the parser. + EndOfStream, + /// Ran out of memory when allocating. + OutOfMemory, + /// A non-zero flag was provided for comdat info + UnexpectedValue, + /// An import symbol contains an index to an import that does + /// not exist, or no imports were defined. + InvalidIndex, + /// The section "linking" contains a version that is not supported. + UnsupportedVersion, + /// When reading the data in leb128 compressed format, its value was overflown. + Overflow, + /// Found table definitions but no corresponding table symbols + MissingTableSymbols, + /// Did not expect a table definiton, but did find one + UnexpectedTable, + /// Object file contains a feature that is unknown to the linker + UnknownFeature, + /// The 'elemkind' found in the element section is unsupported. + /// Zld currently only supports funcrefs. + UnsupportedElemKind, +}; + +fn parse(object: *Object, gpa: Allocator, reader: anytype, is_object_file: *bool) Parser(@TypeOf(reader)).Error!void { + var parser = Parser(@TypeOf(reader)).init(object, reader); + return parser.parseObject(gpa, is_object_file); +} + +fn Parser(comptime ReaderType: type) type { + return struct { + const ObjectParser = @This(); + const Error = ReaderType.Error || ParseError; + + reader: std.io.CountingReader(ReaderType), + /// Object file we're building + object: *Object, + + fn init(object: *Object, reader: ReaderType) ObjectParser { + return .{ .object = object, .reader = std.io.countingReader(reader) }; + } + + /// Verifies that the first 4 bytes contains \0Asm + fn verifyMagicBytes(parser: *ObjectParser) Error!void { + var magic_bytes: [4]u8 = undefined; + + try parser.reader.reader().readNoEof(&magic_bytes); + if (!std.mem.eql(u8, &magic_bytes, &std.wasm.magic)) { + log.debug("Invalid magic bytes '{s}'", .{&magic_bytes}); + return error.InvalidMagicByte; + } + } + + fn parseObject(parser: *ObjectParser, gpa: Allocator, is_object_file: *bool) Error!void { + errdefer parser.object.deinit(gpa); + try parser.verifyMagicBytes(); + const version = try parser.reader.reader().readIntLittle(u32); + + parser.object.version = version; + var relocatable_data = std.ArrayList(RelocatableData).init(gpa); + var debug_names = std.ArrayList(u8).init(gpa); + + errdefer { + while (relocatable_data.popOrNull()) |rel_data| { + gpa.free(rel_data.data[0..rel_data.size]); + } else relocatable_data.deinit(); + gpa.free(debug_names.items); + debug_names.deinit(); + } + + var section_index: u32 = 0; + while (parser.reader.reader().readByte()) |byte| : (section_index += 1) { + const len = try readLeb(u32, parser.reader.reader()); + var limited_reader = std.io.limitedReader(parser.reader.reader(), len); + const reader = limited_reader.reader(); + switch (@as(std.wasm.Section, @enumFromInt(byte))) { + .custom => { + const name_len = try readLeb(u32, reader); + const name = try gpa.alloc(u8, name_len); + defer gpa.free(name); + try reader.readNoEof(name); + + if (std.mem.eql(u8, name, "linking")) { + is_object_file.* = true; + parser.object.relocatable_data = relocatable_data.items; // at this point no new relocatable sections will appear so we're free to store them. + try parser.parseMetadata(gpa, @as(usize, @intCast(reader.context.bytes_left))); + } else if (std.mem.startsWith(u8, name, "reloc")) { + try parser.parseRelocations(gpa); + } else if (std.mem.eql(u8, name, "target_features")) { + try parser.parseFeatures(gpa); + } else if (std.mem.eql(u8, name, "producers")) { + const size = @as(u32, @intCast(reader.context.bytes_left)); + const content = try gpa.alloc(u8, size); + errdefer gpa.free(content); + try reader.readNoEof(content); + parser.object.producers = content; + } else if (std.mem.startsWith(u8, name, ".debug")) { + const debug_size = @as(u32, @intCast(reader.context.bytes_left)); + const debug_content = try gpa.alloc(u8, debug_size); + errdefer gpa.free(debug_content); + try reader.readNoEof(debug_content); + + try relocatable_data.append(.{ + .type = .debug, + .data = debug_content.ptr, + .size = debug_size, + .index = try parser.object.string_table.put(gpa, name), + .offset = 0, // debug sections only contain 1 entry, so no need to calculate offset + .section_index = section_index, + }); + } else { + try reader.skipBytes(reader.context.bytes_left, .{}); + } + }, + .type => { + for (try readVec(&parser.object.func_types, reader, gpa)) |*type_val| { + if ((try reader.readByte()) != std.wasm.function_type) return error.ExpectedFuncType; + + for (try readVec(&type_val.params, reader, gpa)) |*param| { + param.* = try readEnum(std.wasm.Valtype, reader); + } + + for (try readVec(&type_val.returns, reader, gpa)) |*result| { + result.* = try readEnum(std.wasm.Valtype, reader); + } + } + try assertEnd(reader); + }, + .import => { + for (try readVec(&parser.object.imports, reader, gpa)) |*import| { + const module_len = try readLeb(u32, reader); + const module_name = try gpa.alloc(u8, module_len); + defer gpa.free(module_name); + try reader.readNoEof(module_name); + + const name_len = try readLeb(u32, reader); + const name = try gpa.alloc(u8, name_len); + defer gpa.free(name); + try reader.readNoEof(name); + + const kind = try readEnum(std.wasm.ExternalKind, reader); + const kind_value: types.Import.Kind = switch (kind) { + .function => .{ .function = try readLeb(u32, reader) }, + .memory => .{ .memory = try readLimits(reader) }, + .global => .{ .global = .{ + .valtype = try readEnum(std.wasm.Valtype, reader), + .mutable = (try reader.readByte()) == 0x01, + } }, + .table => .{ .table = .{ + .reftype = try readEnum(std.wasm.RefType, reader), + .limits = try readLimits(reader), + } }, + }; + + import.* = .{ + .module_name = try parser.object.string_table.put(gpa, module_name), + .name = try parser.object.string_table.put(gpa, name), + .kind = kind_value, + }; + } + try assertEnd(reader); + }, + .function => { + for (try readVec(&parser.object.functions, reader, gpa)) |*func| { + func.* = .{ .type_index = try readLeb(u32, reader) }; + } + try assertEnd(reader); + }, + .table => { + for (try readVec(&parser.object.tables, reader, gpa)) |*table| { + table.* = .{ + .reftype = try readEnum(std.wasm.RefType, reader), + .limits = try readLimits(reader), + }; + } + try assertEnd(reader); + }, + .memory => { + for (try readVec(&parser.object.memories, reader, gpa)) |*memory| { + memory.* = .{ .limits = try readLimits(reader) }; + } + try assertEnd(reader); + }, + .global => { + for (try readVec(&parser.object.globals, reader, gpa)) |*global| { + global.* = .{ + .global_type = .{ + .valtype = try readEnum(std.wasm.Valtype, reader), + .mutable = (try reader.readByte()) == 0x01, + }, + .init = try readInit(reader), + }; + } + try assertEnd(reader); + }, + .@"export" => { + for (try readVec(&parser.object.exports, reader, gpa)) |*exp| { + const name_len = try readLeb(u32, reader); + const name = try gpa.alloc(u8, name_len); + defer gpa.free(name); + try reader.readNoEof(name); + exp.* = .{ + .name = try parser.object.string_table.put(gpa, name), + .kind = try readEnum(std.wasm.ExternalKind, reader), + .index = try readLeb(u32, reader), + }; + } + try assertEnd(reader); + }, + .start => { + parser.object.start = try readLeb(u32, reader); + try assertEnd(reader); + }, + .element => { + for (try readVec(&parser.object.elements, reader, gpa)) |*elem| { + const flags = try readLeb(u8, reader); + if (flags & 0x2 != 0) { + elem.table_index = try readLeb(u32, reader); + } else { + elem.table_index = flags; + } + elem.offset = try readInit(reader); + if (flags & 0x3 != 0) { + const elem_kind = try readLeb(u8, reader); + if (elem_kind != 0) { + return error.UnsupportedElemKind; + } + } + + for (try readVec(&elem.func_indexes, reader, gpa)) |*idx| { + idx.* = try readLeb(u32, reader); + } + } + try assertEnd(reader); + }, + .code => { + var start = reader.context.bytes_left; + var index: u32 = 0; + const count = try readLeb(u32, reader); + const imported_function_count = parser.object.importedCountByKind(.function); + while (index < count) : (index += 1) { + const code_len = try readLeb(u32, reader); + const offset = @as(u32, @intCast(start - reader.context.bytes_left)); + const data = try gpa.alloc(u8, code_len); + errdefer gpa.free(data); + try reader.readNoEof(data); + try relocatable_data.append(.{ + .type = .code, + .data = data.ptr, + .size = code_len, + .index = imported_function_count + index, + .offset = offset, + .section_index = section_index, + }); + } + }, + .data => { + var start = reader.context.bytes_left; + var index: u32 = 0; + const count = try readLeb(u32, reader); + while (index < count) : (index += 1) { + const flags = try readLeb(u32, reader); + const data_offset = try readInit(reader); + _ = flags; // TODO: Do we need to check flags to detect passive/active memory? + _ = data_offset; + const data_len = try readLeb(u32, reader); + const offset = @as(u32, @intCast(start - reader.context.bytes_left)); + const data = try gpa.alloc(u8, data_len); + errdefer gpa.free(data); + try reader.readNoEof(data); + try relocatable_data.append(.{ + .type = .data, + .data = data.ptr, + .size = data_len, + .index = index, + .offset = offset, + .section_index = section_index, + }); + } + }, + else => try parser.reader.reader().skipBytes(len, .{}), + } + } else |err| switch (err) { + error.EndOfStream => {}, // finished parsing the file + else => |e| return e, + } + parser.object.relocatable_data = try relocatable_data.toOwnedSlice(); + } + + /// Based on the "features" custom section, parses it into a list of + /// features that tell the linker what features were enabled and may be mandatory + /// to be able to link. + /// Logs an info message when an undefined feature is detected. + fn parseFeatures(parser: *ObjectParser, gpa: Allocator) !void { + const reader = parser.reader.reader(); + for (try readVec(&parser.object.features, reader, gpa)) |*feature| { + const prefix = try readEnum(types.Feature.Prefix, reader); + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + defer gpa.free(name); + try reader.readNoEof(name); + + const tag = types.known_features.get(name) orelse { + log.err("Object file contains unknown feature: {s}", .{name}); + return error.UnknownFeature; + }; + feature.* = .{ + .prefix = prefix, + .tag = tag, + }; + } + } + + /// Parses a "reloc" custom section into a list of relocations. + /// The relocations are mapped into `Object` where the key is the section + /// they apply to. + fn parseRelocations(parser: *ObjectParser, gpa: Allocator) !void { + const reader = parser.reader.reader(); + const section = try leb.readULEB128(u32, reader); + const count = try leb.readULEB128(u32, reader); + const relocations = try gpa.alloc(types.Relocation, count); + errdefer gpa.free(relocations); + + log.debug("Found {d} relocations for section ({d})", .{ + count, + section, + }); + + for (relocations) |*relocation| { + const rel_type = try leb.readULEB128(u8, reader); + const rel_type_enum = @as(types.Relocation.RelocationType, @enumFromInt(rel_type)); + relocation.* = .{ + .relocation_type = rel_type_enum, + .offset = try leb.readULEB128(u32, reader), + .index = try leb.readULEB128(u32, reader), + .addend = if (rel_type_enum.addendIsPresent()) try leb.readILEB128(i32, reader) else 0, + }; + log.debug("Found relocation: type({s}) offset({d}) index({d}) addend({?d})", .{ + @tagName(relocation.relocation_type), + relocation.offset, + relocation.index, + relocation.addend, + }); + } + + try parser.object.relocations.putNoClobber(gpa, section, relocations); + } + + /// Parses the "linking" custom section. Versions that are not + /// supported will be an error. `payload_size` is required to be able + /// to calculate the subsections we need to parse, as that data is not + /// available within the section itparser. + fn parseMetadata(parser: *ObjectParser, gpa: Allocator, payload_size: usize) !void { + var limited = std.io.limitedReader(parser.reader.reader(), payload_size); + const limited_reader = limited.reader(); + + const version = try leb.readULEB128(u32, limited_reader); + log.debug("Link meta data version: {d}", .{version}); + if (version != 2) return error.UnsupportedVersion; + + while (limited.bytes_left > 0) { + try parser.parseSubsection(gpa, limited_reader); + } + } + + /// Parses a `spec.Subsection`. + /// The `reader` param for this is to provide a `LimitedReader`, which allows + /// us to only read until a max length. + /// + /// `parser` is used to provide access to other sections that may be needed, + /// such as access to the `import` section to find the name of a symbol. + fn parseSubsection(parser: *ObjectParser, gpa: Allocator, reader: anytype) !void { + const sub_type = try leb.readULEB128(u8, reader); + log.debug("Found subsection: {s}", .{@tagName(@as(types.SubsectionType, @enumFromInt(sub_type)))}); + const payload_len = try leb.readULEB128(u32, reader); + if (payload_len == 0) return; + + var limited = std.io.limitedReader(reader, payload_len); + const limited_reader = limited.reader(); + + // every subsection contains a 'count' field + const count = try leb.readULEB128(u32, limited_reader); + + switch (@as(types.SubsectionType, @enumFromInt(sub_type))) { + .WASM_SEGMENT_INFO => { + const segments = try gpa.alloc(types.Segment, count); + errdefer gpa.free(segments); + for (segments) |*segment| { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + errdefer gpa.free(name); + try reader.readNoEof(name); + segment.* = .{ + .name = name, + .alignment = try leb.readULEB128(u32, reader), + .flags = try leb.readULEB128(u32, reader), + }; + log.debug("Found segment: {s} align({d}) flags({b})", .{ + segment.name, + segment.alignment, + segment.flags, + }); + + // support legacy object files that specified being TLS by the name instead of the TLS flag. + if (!segment.isTLS() and (std.mem.startsWith(u8, segment.name, ".tdata") or std.mem.startsWith(u8, segment.name, ".tbss"))) { + // set the flag so we can simply check for the flag in the rest of the linker. + segment.flags |= @intFromEnum(types.Segment.Flags.WASM_SEG_FLAG_TLS); + } + } + parser.object.segment_info = segments; + }, + .WASM_INIT_FUNCS => { + const funcs = try gpa.alloc(types.InitFunc, count); + errdefer gpa.free(funcs); + for (funcs) |*func| { + func.* = .{ + .priority = try leb.readULEB128(u32, reader), + .symbol_index = try leb.readULEB128(u32, reader), + }; + log.debug("Found function - prio: {d}, index: {d}", .{ func.priority, func.symbol_index }); + } + parser.object.init_funcs = funcs; + }, + .WASM_COMDAT_INFO => { + const comdats = try gpa.alloc(types.Comdat, count); + errdefer gpa.free(comdats); + for (comdats) |*comdat| { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + errdefer gpa.free(name); + try reader.readNoEof(name); + + const flags = try leb.readULEB128(u32, reader); + if (flags != 0) { + return error.UnexpectedValue; + } + + const symbol_count = try leb.readULEB128(u32, reader); + const symbols = try gpa.alloc(types.ComdatSym, symbol_count); + errdefer gpa.free(symbols); + for (symbols) |*symbol| { + symbol.* = .{ + .kind = @as(types.ComdatSym.Type, @enumFromInt(try leb.readULEB128(u8, reader))), + .index = try leb.readULEB128(u32, reader), + }; + } + + comdat.* = .{ + .name = name, + .flags = flags, + .symbols = symbols, + }; + } + + parser.object.comdat_info = comdats; + }, + .WASM_SYMBOL_TABLE => { + var symbols = try std.ArrayList(Symbol).initCapacity(gpa, count); + + var i: usize = 0; + while (i < count) : (i += 1) { + const symbol = symbols.addOneAssumeCapacity(); + symbol.* = try parser.parseSymbol(gpa, reader); + log.debug("Found symbol: type({s}) name({s}) flags(0b{b:0>8})", .{ + @tagName(symbol.tag), + parser.object.string_table.get(symbol.name), + symbol.flags, + }); + } + + // we found all symbols, check for indirect function table + // in case of an MVP object file + if (try parser.object.checkLegacyIndirectFunctionTable()) |symbol| { + try symbols.append(symbol); + log.debug("Found legacy indirect function table. Created symbol", .{}); + } + + parser.object.symtable = try symbols.toOwnedSlice(); + }, + } + } + + /// Parses the symbol information based on its kind, + /// requires access to `Object` to find the name of a symbol when it's + /// an import and flag `WASM_SYM_EXPLICIT_NAME` is not set. + fn parseSymbol(parser: *ObjectParser, gpa: Allocator, reader: anytype) !Symbol { + const tag = @as(Symbol.Tag, @enumFromInt(try leb.readULEB128(u8, reader))); + const flags = try leb.readULEB128(u32, reader); + var symbol: Symbol = .{ + .flags = flags, + .tag = tag, + .name = undefined, + .index = undefined, + }; + + switch (tag) { + .data => { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + defer gpa.free(name); + try reader.readNoEof(name); + symbol.name = try parser.object.string_table.put(gpa, name); + + // Data symbols only have the following fields if the symbol is defined + if (symbol.isDefined()) { + symbol.index = try leb.readULEB128(u32, reader); + // @TODO: We should verify those values + _ = try leb.readULEB128(u32, reader); + _ = try leb.readULEB128(u32, reader); + } + }, + .section => { + symbol.index = try leb.readULEB128(u32, reader); + for (parser.object.relocatable_data) |data| { + if (data.section_index == symbol.index) { + symbol.name = data.index; + break; + } + } + }, + else => { + symbol.index = try leb.readULEB128(u32, reader); + const is_undefined = symbol.isUndefined(); + const explicit_name = symbol.hasFlag(.WASM_SYM_EXPLICIT_NAME); + symbol.name = if (!is_undefined or (is_undefined and explicit_name)) name: { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + defer gpa.free(name); + try reader.readNoEof(name); + break :name try parser.object.string_table.put(gpa, name); + } else parser.object.findImport(symbol.tag.externalType(), symbol.index).name; + }, + } + return symbol; + } + }; +} + +/// First reads the count from the reader and then allocate +/// a slice of ptr child's element type. +fn readVec(ptr: anytype, reader: anytype, gpa: Allocator) ![]ElementType(@TypeOf(ptr)) { + const len = try readLeb(u32, reader); + const slice = try gpa.alloc(ElementType(@TypeOf(ptr)), len); + ptr.* = slice; + return slice; +} + +fn ElementType(comptime ptr: type) type { + return meta.Elem(meta.Child(ptr)); +} + +/// Uses either `readILEB128` or `readULEB128` depending on the +/// signedness of the given type `T`. +/// Asserts `T` is an integer. +fn readLeb(comptime T: type, reader: anytype) !T { + if (comptime std.meta.trait.isSignedInt(T)) { + return try leb.readILEB128(T, reader); + } else { + return try leb.readULEB128(T, reader); + } +} + +/// Reads an enum type from the given reader. +/// Asserts `T` is an enum +fn readEnum(comptime T: type, reader: anytype) !T { + switch (@typeInfo(T)) { + .Enum => |enum_type| return @as(T, @enumFromInt(try readLeb(enum_type.tag_type, reader))), + else => @compileError("T must be an enum. Instead was given type " ++ @typeName(T)), + } +} + +fn readLimits(reader: anytype) !types.Limits { + const flags = try readLeb(u1, reader); + const min = try readLeb(u32, reader); + return .{ + .flags = flags, + .min = min, + .max = if (flags == 0) null else try readLeb(u32, reader), + }; +} + +fn readInit(reader: anytype) !std.wasm.InitExpression { + const opcode = try reader.readByte(); + const init_expr: std.wasm.InitExpression = switch (@as(std.wasm.Opcode, @enumFromInt(opcode))) { + .i32_const => .{ .i32_const = try readLeb(i32, reader) }, + .global_get => .{ .global_get = try readLeb(u32, reader) }, + else => |tag| std.debug.panic("TODO: initexpression for other opcodes: {}", .{tag}), + }; + + if ((try readEnum(std.wasm.Opcode, reader)) != .end) return error.MissingEndForExpression; + return init_expr; +} + +fn assertEnd(reader: anytype) !void { + var buf: [1]u8 = undefined; + const len = try reader.read(&buf); + if (len != 0) return error.MalformedSection; + if (reader.context.bytes_left != 0) return error.MalformedSection; +} + +/// Parses an object file into atoms, for code and data sections +pub fn parseIntoAtoms(object: *Object, object_index: u16, wasm_bin: *Wasm) !void { + const Key = struct { + kind: Symbol.Tag, + index: u32, + }; + var symbol_for_segment = std.AutoArrayHashMap(Key, std.ArrayList(u32)).init(wasm_bin.base.allocator); + defer for (symbol_for_segment.values()) |*list| { + list.deinit(); + } else symbol_for_segment.deinit(); + + for (object.symtable, 0..) |symbol, symbol_index| { + switch (symbol.tag) { + .function, .data, .section => if (!symbol.isUndefined()) { + const gop = try symbol_for_segment.getOrPut(.{ .kind = symbol.tag, .index = symbol.index }); + const sym_idx = @as(u32, @intCast(symbol_index)); + if (!gop.found_existing) { + gop.value_ptr.* = std.ArrayList(u32).init(wasm_bin.base.allocator); + } + try gop.value_ptr.*.append(sym_idx); + }, + else => continue, + } + } + + for (object.relocatable_data, 0..) |relocatable_data, index| { + const final_index = (try wasm_bin.getMatchingSegment(wasm_bin.base.allocator, object_index, @as(u32, @intCast(index)))) orelse { + continue; // found unknown section, so skip parsing into atom as we do not know how to handle it. + }; + + const atom = try Atom.create(wasm_bin.base.allocator); + errdefer atom.deinit(wasm_bin.base.allocator); + + try wasm_bin.managed_atoms.append(wasm_bin.base.allocator, atom); + atom.file = object_index; + atom.size = relocatable_data.size; + atom.alignment = relocatable_data.getAlignment(object); + + const relocations: []types.Relocation = object.relocations.get(relocatable_data.section_index) orelse &.{}; + for (relocations) |relocation| { + if (isInbetween(relocatable_data.offset, atom.size, relocation.offset)) { + // set the offset relative to the offset of the segment itobject, + // rather than within the entire section. + var reloc = relocation; + reloc.offset -= relocatable_data.offset; + try atom.relocs.append(wasm_bin.base.allocator, reloc); + } + + switch (relocation.relocation_type) { + .R_WASM_TABLE_INDEX_I32, + .R_WASM_TABLE_INDEX_I64, + .R_WASM_TABLE_INDEX_SLEB, + .R_WASM_TABLE_INDEX_SLEB64, + => { + try wasm_bin.elements.indirect_functions.put(wasm_bin.base.allocator, .{ + .file = object_index, + .sym_index = relocation.index, + }, 0); + }, + .R_WASM_GLOBAL_INDEX_I32, + .R_WASM_GLOBAL_INDEX_LEB, + => { + const sym = object.symtable[relocation.index]; + if (sym.tag != .global) { + try wasm_bin.globals.addGOTEntry( + wasm_bin.base.allocator, + .{ .file = object_index, .sym_index = relocation.index }, + ); + } + }, + else => {}, + } + } + + try atom.code.appendSlice(wasm_bin.base.allocator, relocatable_data.data[0..relocatable_data.size]); + + if (symbol_for_segment.getPtr(.{ + .kind = relocatable_data.getSymbolKind(), + .index = relocatable_data.getIndex(), + })) |symbols| { + atom.sym_index = symbols.pop(); + try wasm_bin.symbol_atom.putNoClobber(wasm_bin.base.allocator, atom.symbolLoc(), atom); + + // symbols referencing the same atom will be added as alias + // or as 'parent' when they are global. + while (symbols.popOrNull()) |idx| { + try wasm_bin.symbol_atom.putNoClobber(wasm_bin.base.allocator, .{ .file = atom.file, .sym_index = idx }, atom); + const alias_symbol = object.symtable[idx]; + if (alias_symbol.isGlobal()) { + atom.sym_index = idx; + } + } + } else { + // ensure we do not try to read the symbol index of an atom that's not represented by a symbol. + atom.sym_index = undefined; + } + + const segment: *Wasm.Segment = &wasm_bin.segments.items[final_index]; + if (relocatable_data.type == .data) { //code section and debug sections are 1-byte aligned + segment.alignment = std.math.max(segment.alignment, atom.alignment); + } + + try wasm_bin.appendAtomAtIndex(wasm_bin.base.allocator, final_index, atom); + } +} + +/// Verifies if a given value is in between a minimum -and maximum value. +/// The maxmimum value is calculated using the length, both start and end are inclusive. +inline fn isInbetween(min: u32, length: u32, value: u32) bool { + return value >= min and value <= min + length; +} diff --git a/src/archive/archive/zld/Wasm/Options.zig b/src/archive/archive/zld/Wasm/Options.zig new file mode 100644 index 000000000000..12bc42f98870 --- /dev/null +++ b/src/archive/archive/zld/Wasm/Options.zig @@ -0,0 +1,224 @@ +//! Options to pass to our linker which affects +//! the end result and tells the linker how to build the final binary. +const Options = @This(); + +const std = @import("std"); +const Zld = @import("../Zld.zig"); +const Wasm = @import("../Wasm.zig"); + +const mem = std.mem; +const Allocator = mem.Allocator; + +const usage = + \\Usage: {s} [options] [files...] -o [path] + \\ + \\Options: + \\-h, --help Print this help and exit + \\--debug-log [scope] Turn on debugging logs for [scope] (requires zld compiled with -Dlog) + \\-o [path] Output path of the binary + \\--entry Name of entry point symbol + \\--global-base= Value from where the global data will start + \\--import-symbols Allows references to undefined symbols + \\--import-memory Import memory from the host environment + \\--import-table Import function table from the host environment + \\--export-table Export function table to the host environment + \\--initial-memory= Initial size of the linear memory + \\--max-memory= Maximum size of the linear memory + \\--merge-data-segments[=false] Enable merging data segments (default=true) + \\--no-entry Do not output any entry point + \\--stack-first Place stack at start of linear memory instead of after data + \\--stack-size= Specifies the stack size in bytes + \\--features= Comma-delimited list of used features, inferred by object files if unset + \\--strip Strip all debug information and symbol names + \\--export-dynamic Dynamically export non-hidden symbols + \\--export= Force exporting a global symbol (fails when symbol does not exist) + \\--shared-memory Use shared linear memory (requires atomics and bulk memory) +; + +/// Result path of the binary +emit: Zld.Emit, +/// List of positionals (paths) of objects and archives +/// that may be linked into the final binary +positionals: []const []const u8, +/// When the entry name is different than `_start` +entry_name: ?[]const u8 = null, +/// Points to where the global data will start +global_base: ?u32 = null, +/// Allow undefined symbols to be imported into the linker. +/// By default the linker will emit an error instead when one or multiple +/// undefined references are found. +import_symbols: bool = false, +/// Tells the linker we will import memory from the host environment +import_memory: bool = false, +/// Tells the linker we will import the function table from the host environment +import_table: bool = false, +/// Tells the linker we will export the function table to the host environment +export_table: bool = false, +/// Sets the initial memory of the data section +/// Providing a value too low will result in a linking error. +initial_memory: ?u32 = null, +/// Sets the max memory for the data section. +/// Will result in a linking error when it's smaller than `initial_memory`m +/// or when the initial memory calculated by the linker is larger than the given maximum memory. +max_memory: ?u32 = null, +/// Tell the linker to merge data segments +/// i.e. all '.rodata' will be merged into a .rodata segment. +merge_data_segments: bool = true, +/// Tell the linker we do not require a starting entry +no_entry: bool = false, +/// Tell the linker to put the stack first, instead of after the data +stack_first: bool = false, +/// Specifies the size of the stack in bytes +stack_size: ?u32 = null, +/// Comma-delimited list of features to use. +/// When empty, the used features are inferred from the objects instead. +features: []const u8, +/// Strips all debug information and optional sections such as symbol names, +/// and the 'producers' section. +strip: bool = false, +/// Exports a symbol when it's defined, global and not hidden. +export_dynamic: bool = false, +/// Forcefully exports a symbol by its name, fails when the symbol +/// is unresolved. +exports: []const []const u8, +/// Enables shared linear memory. Requires to have the features +/// atomics and bulk-memory enabled. +shared_memory: bool = false, + +pub fn parseArgs(arena: Allocator, context: Zld.MainCtx) !Options { + if (context.args.len == 0) { + context.printSuccess(usage, .{context.cmd}); + } + + const args = context.args; + var positionals = std.ArrayList([]const u8).init(arena); + var entry_name: ?[]const u8 = null; + var global_base: ?u32 = null; + var import_symbols: bool = false; + var import_memory: bool = false; + var import_table: bool = false; + var export_table: bool = false; + var initial_memory: ?u32 = null; + var max_memory: ?u32 = null; + var merge_data_segments = true; + var no_entry = false; + var output_path: ?[]const u8 = null; + var stack_first = false; + var stack_size: ?u32 = null; + var features: ?[]const u8 = null; + var strip: ?bool = null; + var export_dynamic: bool = false; + var exports = std.ArrayList([]const u8).init(arena); + var shared_memory: bool = false; + + var i: usize = 0; + while (i < args.len) : (i += 1) { + const arg = args[i]; + if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) { + context.printSuccess(usage, .{context.cmd}); + } else if (mem.eql(u8, arg, "--debug-log")) { + if (i + 1 >= args.len) context.printFailure("Missing scope for debug log", .{}); + i += 1; + try context.log_scopes.append(args[i]); + } else if (mem.eql(u8, arg, "--entry")) { + if (i + 1 >= args.len) context.printFailure("Missing entry name argument", .{}); + entry_name = args[i + 1]; + i += 1; + } else if (mem.startsWith(u8, arg, "--global-base")) { + const index = mem.indexOfScalar(u8, arg, '=') orelse context.printFailure("Missing '=' symbol and value for global base", .{}); + global_base = std.fmt.parseInt(u32, arg[index + 1 ..], 10) catch context.printFailure( + "Could not parse value '{s}' into integer", + .{arg[index + 1 ..]}, + ); + } else if (mem.eql(u8, arg, "--import-symbols")) { + import_symbols = true; + } else if (mem.eql(u8, arg, "--import-memory")) { + import_memory = true; + } else if (mem.eql(u8, arg, "--import-table")) { + import_table = true; + } else if (mem.eql(u8, arg, "--export-table")) { + export_table = true; + } else if (mem.startsWith(u8, arg, "--initial-memory")) { + const index = mem.indexOfScalar(u8, arg, '=') orelse context.printFailure("Missing '=' symbol and value for initial memory", .{}); + initial_memory = std.fmt.parseInt(u32, arg[index + 1 ..], 10) catch context.printFailure( + "Could not parse value '{s}' into integer", + .{arg[index + 1 ..]}, + ); + } else if (mem.startsWith(u8, arg, "--max-memory")) { + const index = mem.indexOfScalar(u8, arg, '=') orelse context.printFailure("Missing '=' symbol and value for max memory", .{}); + max_memory = std.fmt.parseInt(u32, arg[index + 1 ..], 10) catch context.printFailure( + "Could not parse value '{s}' into integer", + .{arg[index + 1 ..]}, + ); + } else if (mem.startsWith(u8, arg, "--merge-data-segments")) { + merge_data_segments = true; + if (mem.indexOfScalar(u8, arg, '=')) |index| { + if (mem.eql(u8, arg[index + 1 ..], "false")) { + merge_data_segments = false; + } + } + } else if (mem.eql(u8, arg, "--no-entry")) { + no_entry = true; + } else if (mem.eql(u8, arg, "--stack-first")) { + stack_first = true; + } else if (mem.startsWith(u8, arg, "--stack-size")) { + const index = mem.indexOfScalar(u8, arg, '=') orelse context.printFailure("Missing '=' symbol and value for stack size", .{}); + stack_size = std.fmt.parseInt(u32, arg[index + 1 ..], 10) catch context.printFailure( + "Could not parse value '{s}' into integer", + .{arg[index + 1 ..]}, + ); + } else if (mem.eql(u8, arg, "-o")) { + if (i + 1 >= args.len) context.printFailure("Missing output file argument", .{}); + output_path = args[i + 1]; + i += 1; + } else if (mem.startsWith(u8, arg, "--features")) { + const index = mem.indexOfScalar(u8, arg, '=') orelse context.printFailure("Missing '=' symbol and value for features list", .{}); + features = arg[index + 1 ..]; + i += 1; + } else if (mem.eql(u8, arg, "--strip")) { + strip = true; + } else if (mem.eql(u8, arg, "--export-dynamic")) { + export_dynamic = true; + } else if (mem.startsWith(u8, arg, "--export")) { + const index = mem.indexOfScalar(u8, arg, '=') orelse context.printFailure("Missing '=' symbol and value for symbol name", .{}); + try exports.append(arg[index + 1 ..]); + } else if (mem.eql(u8, arg, "--shared-memory")) { + shared_memory = true; + } else { + try positionals.append(arg); + } + } + + if (positionals.items.len == 0) { + context.printFailure("Expected one or more object files, none were given", .{}); + } + + if (output_path == null) { + context.printFailure("Missing output path", .{}); + } + + return Options{ + .emit = .{ + .directory = std.fs.cwd(), + .sub_path = output_path.?, + }, + .positionals = positionals.items, + .entry_name = entry_name, + .global_base = global_base, + .import_symbols = import_symbols, + .import_memory = import_memory, + .import_table = import_table, + .export_table = export_table, + .initial_memory = initial_memory, + .max_memory = max_memory, + .merge_data_segments = merge_data_segments, + .no_entry = no_entry, + .stack_first = stack_first, + .stack_size = stack_size, + .features = features orelse &.{}, + .strip = strip orelse false, + .export_dynamic = export_dynamic, + .exports = exports.items, + .shared_memory = shared_memory, + }; +} diff --git a/src/archive/archive/zld/Wasm/Symbol.zig b/src/archive/archive/zld/Wasm/Symbol.zig new file mode 100644 index 000000000000..ccbcb49b8760 --- /dev/null +++ b/src/archive/archive/zld/Wasm/Symbol.zig @@ -0,0 +1,173 @@ +//! Represents a wasm symbol. Containing all of its properties, +//! as well as providing helper methods to determine its functionality +//! and how it will/must be linked. +//! The name of the symbol can be found by providing the offset, found +//! on the `name` field, to a string table in the wasm binary or object file. +const Symbol = @This(); + +const std = @import("std"); +const types = @import("types.zig"); + +/// Bitfield containings flags for a symbol +/// Can contain any of the flags defined in `Flag` +flags: u32, +/// Symbol name, when the symbol is undefined the name will be taken from the import. +/// Note: This is an index into the string table. +name: u32, +/// Index into the list of objects based on set `tag` +/// NOTE: This will be set to `undefined` when `tag` is `data` +/// and the symbol is undefined. +index: u32, +/// Represents the kind of the symbol, such as a function or global. +tag: Tag, + +pub const Tag = enum { + function, + data, + global, + section, + event, + table, + + /// From a given symbol tag, returns the `ExternalType` + /// Asserts the given tag can be represented as an external type. + pub fn externalType(tag: Tag) std.wasm.ExternalKind { + return switch (tag) { + .function => .function, + .global => .global, + .data => .memory, + .section => unreachable, // Not an external type + .event => unreachable, // Not an external type + .table => .table, + }; + } +}; + +pub const Flag = enum(u32) { + /// Indicates a weak symbol. + /// When linking multiple modules defining the same symbol, all weak definitions are discarded + /// in favourite of the strong definition. When no strong definition exists, all weak but one definiton is discarded. + /// If multiple definitions remain, we get an error: symbol collision. + WASM_SYM_BINDING_WEAK = 0x1, + /// Indicates a local, non-exported, non-module-linked symbol. + /// The names of local symbols are not required to be unique, unlike non-local symbols. + WASM_SYM_BINDING_LOCAL = 0x2, + /// Represents the binding of a symbol, indicating if it's local or not, and weak or not. + WASM_SYM_BINDING_MASK = 0x3, + /// Indicates a hidden symbol. Hidden symbols will not be exported to the link result, but may + /// link to other modules. + WASM_SYM_VISIBILITY_HIDDEN = 0x4, + /// Indicates an undefined symbol. For non-data symbols, this must match whether the symbol is + /// an import or is defined. For data symbols however, determines whether a segment is specified. + WASM_SYM_UNDEFINED = 0x10, + /// Indicates a symbol of which its intention is to be exported from the wasm module to the host environment. + /// This differs from the visibility flag as this flag affects the static linker. + WASM_SYM_EXPORTED = 0x20, + /// Indicates the symbol uses an explicit symbol name, rather than reusing the name from a wasm import. + /// Allows remapping imports from foreign WASM modules into local symbols with a different name. + WASM_SYM_EXPLICIT_NAME = 0x40, + /// Indicates the symbol is to be included in the linker output, regardless of whether it is used or has any references to it. + WASM_SYM_NO_STRIP = 0x80, + /// Indicates a symbol is TLS + WASM_SYM_TLS = 0x100, +}; + +/// Verifies if the given symbol should be imported from the +/// host environment or not +pub fn requiresImport(symbol: Symbol) bool { + if (symbol.tag == .data) return false; + if (!symbol.isUndefined()) return false; + if (symbol.isWeak()) return false; + // if (symbol.isDefined() and symbol.isWeak()) return true; //TODO: Only when building shared lib + + return true; +} + +pub fn hasFlag(symbol: Symbol, flag: Flag) bool { + return symbol.flags & @intFromEnum(flag) != 0; +} + +pub fn setFlag(symbol: *Symbol, flag: Flag) void { + symbol.flags |= @intFromEnum(flag); +} + +pub fn isUndefined(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.WASM_SYM_UNDEFINED) != 0; +} + +pub fn setUndefined(symbol: *Symbol, is_undefined: bool) void { + if (is_undefined) { + symbol.setFlag(.WASM_SYM_UNDEFINED); + } else { + symbol.flags &= ~@intFromEnum(Flag.WASM_SYM_UNDEFINED); + } +} + +pub fn setGlobal(symbol: *Symbol, is_global: bool) void { + if (is_global) { + symbol.flags &= ~@intFromEnum(Flag.WASM_SYM_BINDING_LOCAL); + } else { + symbol.setFlag(.WASM_SYM_BINDING_LOCAL); + } +} + +pub fn isTLS(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.WASM_SYM_TLS) != 0; +} + +pub fn isDefined(symbol: Symbol) bool { + return !symbol.isUndefined(); +} + +pub fn isVisible(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.WASM_SYM_VISIBILITY_HIDDEN) == 0; +} + +pub fn isLocal(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.WASM_SYM_BINDING_LOCAL) != 0; +} + +pub fn isGlobal(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.WASM_SYM_BINDING_LOCAL) == 0; +} + +pub fn isHidden(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.WASM_SYM_VISIBILITY_HIDDEN) != 0; +} + +pub fn isNoStrip(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.WASM_SYM_NO_STRIP) != 0; +} + +pub fn isExported(symbol: Symbol, is_dynamic: bool) bool { + if (symbol.isUndefined() or symbol.isLocal()) return false; + if (is_dynamic and symbol.isVisible()) return true; + return symbol.hasFlag(.WASM_SYM_EXPORTED); +} + +pub fn isWeak(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.WASM_SYM_BINDING_WEAK) != 0; +} + +/// Formats the symbol into human-readable text +pub fn format(symbol: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + + const kind_fmt: u8 = switch (symbol.tag) { + .function => 'F', + .data => 'D', + .global => 'G', + .section => 'S', + .event => 'E', + .table => 'T', + }; + const visible: []const u8 = if (symbol.isVisible()) "yes" else "no"; + const binding: []const u8 = if (symbol.isLocal()) "local" else "global"; + const undef: []const u8 = if (symbol.isUndefined()) "undefined" else ""; + + try writer.print( + "{c} binding={s} visible={s} id={d} name_offset={d} {s}", + .{ kind_fmt, binding, visible, symbol.index, symbol.name, undef }, + ); +} diff --git a/src/archive/archive/zld/Wasm/emit_wasm.zig b/src/archive/archive/zld/Wasm/emit_wasm.zig new file mode 100644 index 000000000000..678bcbcfde31 --- /dev/null +++ b/src/archive/archive/zld/Wasm/emit_wasm.zig @@ -0,0 +1,667 @@ +//! Writes all the wasm sections that are valid +//! to the final binary file that was passed to the `Wasm` object. +//! When a section contains no entries, the section will not be emitted. + +const Object = @import("Object.zig"); +const std = @import("std"); +const Symbol = @import("Symbol.zig"); +const types = @import("types.zig"); +const Wasm = @import("../Wasm.zig"); +const Atom = @import("Atom.zig"); + +const fs = std.fs; +const leb = std.leb; +const log = std.log.scoped(.wasm); + +/// Writes the given `Wasm` object into a binary file as-is. +pub fn emit(wasm: *Wasm) !void { + const file = wasm.base.file; + const writer = file.writer(); + + // magic bytes and wasm version + try emitWasmHeader(writer); + + // emit sections + if (wasm.func_types.count() != 0) { + log.debug("Writing 'Types' section ({d})", .{wasm.func_types.count()}); + const offset = try reserveSectionHeader(file); + for (wasm.func_types.items.items) |type_entry| { + try emitType(type_entry, writer); + } + try emitSectionHeader(file, offset, .type, wasm.func_types.count()); + } + if (wasm.imports.symbolCount() != 0 or wasm.options.import_memory) { + const count = wasm.imports.symbolCount() + @intFromBool(wasm.options.import_memory); + log.debug("Writing 'Imports' section ({d})", .{count}); + const offset = try reserveSectionHeader(file); + + if (wasm.options.import_memory) { + const mem_import: types.Import = .{ + .module_name = wasm.string_table.getOffset("env").?, + .name = wasm.string_table.getOffset("memory").?, + .kind = .{ .memory = wasm.memories.limits }, + }; + try emitImport(wasm, mem_import, writer); + } + + for (wasm.imports.symbols()) |sym_with_loc| { + try emitImportSymbol(wasm, sym_with_loc, writer); + } + + // TODO: Also emit GOT symbols + try emitSectionHeader(file, offset, .import, count); + } + if (wasm.functions.count() != 0) { + log.debug("Writing 'Functions' section ({d})", .{wasm.functions.count()}); + const offset = try reserveSectionHeader(file); + for (wasm.functions.items.values()) |func| { + try emitFunction(func, writer); + } + try emitSectionHeader(file, offset, .function, wasm.functions.count()); + } + if (wasm.tables.count() != 0) { + log.debug("Writing 'Tables' section ({d})", .{wasm.tables.count()}); + const offset = try reserveSectionHeader(file); + for (wasm.tables.items.items) |table| { + try emitTable(table, writer); + } + try emitSectionHeader(file, offset, .table, wasm.tables.count()); + } + if (!wasm.options.import_memory) { + log.debug("Writing 'Memory' section", .{}); + const offset = try reserveSectionHeader(file); + try emitLimits(wasm.memories.limits, writer); + try emitSectionHeader(file, offset, .memory, 1); + } + if (wasm.globals.count() != 0) { + log.debug("Writing 'Globals' section ({d})", .{wasm.globals.count()}); + const offset = try reserveSectionHeader(file); + for (wasm.globals.items.items) |global| { + try emitGlobal(global, writer); + } + try emitSectionHeader(file, offset, .global, wasm.globals.count()); + } + if (wasm.exports.count() != 0) { + log.debug("Writing 'Exports' section ({d})", .{wasm.exports.count()}); + const offset = try reserveSectionHeader(file); + for (wasm.exports.items.items) |exported| { + try emitExport(exported, writer); + } + try emitSectionHeader(file, offset, .@"export", wasm.exports.count()); + } + + if (wasm.entry) |entry_index| { + const offset = try reserveSectionHeader(file); + try emitSectionHeader(file, offset, .start, entry_index); + } + + if (wasm.elements.functionCount() != 0) { + log.debug("Writing 'Element' section (1)", .{}); + const offset = try reserveSectionHeader(file); + try emitElement(wasm, writer); + try emitSectionHeader(file, offset, .element, 1); + } + + const data_count = wasm.dataCount(); + if (data_count > 0 and wasm.options.shared_memory) { + const offset = try reserveSectionHeader(file); + try emitSectionHeader(file, offset, .data_count, data_count); + } + + if (wasm.code_section_index) |index| { + log.debug("Writing 'Code' section ({d})", .{wasm.functions.count()}); + const offset = try reserveSectionHeader(file); + var atom = wasm.atoms.get(index).?.getFirst(); + + // The code section must be sorted in line with the function order. + var sorted_atoms = try std.ArrayList(*Atom).initCapacity(wasm.base.allocator, wasm.functions.count()); + defer sorted_atoms.deinit(); + + while (true) { + if (wasm.resolved_symbols.contains(atom.symbolLoc())) { + atom.resolveRelocs(wasm); + sorted_atoms.appendAssumeCapacity(atom); + } + atom = atom.next orelse break; + } + + const atom_sort_fn = struct { + fn sort(ctx: *const Wasm, lhs: *const Atom, rhs: *const Atom) bool { + const lhs_sym = lhs.symbolLoc().getSymbol(ctx); + const rhs_sym = rhs.symbolLoc().getSymbol(ctx); + return lhs_sym.index < rhs_sym.index; + } + }.sort; + + std.sort.sort(*Atom, sorted_atoms.items, wasm, atom_sort_fn); + for (sorted_atoms.items) |sorted_atom| { + try leb.writeULEB128(writer, sorted_atom.size); + try writer.writeAll(sorted_atom.code.items); + } + try emitSectionHeader(file, offset, .code, wasm.functions.count()); + } + + if (data_count != 0) { + log.debug("Writing 'Data' section ({d})", .{data_count}); + const offset = try reserveSectionHeader(file); + + var it = wasm.data_segments.iterator(); + while (it.next()) |entry| { + // do not output the 'bss' section + if (std.mem.eql(u8, entry.key_ptr.*, ".bss") and !wasm.options.import_memory) continue; + const atom_index = entry.value_ptr.*; + var atom = wasm.atoms.getPtr(atom_index).?.*.getFirst(); + const segment: Wasm.Segment = wasm.segments.items[atom_index]; + + try leb.writeULEB128(writer, segment.flags); + if (segment.flags & @intFromEnum(Wasm.Segment.Flag.WASM_DATA_SEGMENT_HAS_MEMINDEX) != 0) { + try leb.writeULEB128(writer, @as(u32, 0)); // memory is always index 0 as we only have 1 memory entry + } + if (!segment.isPassive()) { + try emitInitExpression(.{ .i32_const = @as(i32, @bitCast(segment.offset)) }, writer); + } + try leb.writeULEB128(writer, segment.size); + + var current_offset: u32 = 0; + while (true) { + atom.resolveRelocs(wasm); + // TODO: Verify if this is faster than allocating segment's size + // Setting all zeroes, memcopy all segments and then writing. + if (current_offset != atom.offset) { + const diff = atom.offset - current_offset; + try writer.writeByteNTimes(0, diff); + current_offset += diff; + } + std.debug.assert(current_offset == atom.offset); + std.debug.assert(atom.code.items.len == atom.size); + try writer.writeAll(atom.code.items); + + current_offset += atom.size; + if (atom.next) |next| { + atom = next; + } else { + // Also make sure that if the last atom has extra bytes, we write 0's. + if (current_offset != segment.size) { + try writer.writeByteNTimes(0, segment.size - current_offset); + current_offset += segment.size - current_offset; + } + break; + } + } + // when the last atom was unresolved and we skipped writing last few 0's so do it now + if (current_offset != segment.size) { + try writer.writeByteNTimes(0, segment.size - current_offset); + current_offset += segment.size - current_offset; + } + } + + try emitSectionHeader(file, offset, .data, data_count); + } + + if (!wasm.options.strip) { + // names section + const func_count: u32 = wasm.functions.count() + wasm.imports.functionCount(); + const global_count: u32 = wasm.globals.count() + wasm.imports.globalCount(); + // we must de-duplicate symbols that point to the same function + var funcs = std.AutoArrayHashMap(u32, Wasm.SymbolWithLoc).init(wasm.base.allocator); + defer funcs.deinit(); + try funcs.ensureUnusedCapacity(func_count); + var globals = try std.ArrayList(Wasm.SymbolWithLoc).initCapacity(wasm.base.allocator, global_count); + defer globals.deinit(); + + for (wasm.resolved_symbols.keys()) |sym_with_loc| { + const symbol = sym_with_loc.getSymbol(wasm); + switch (symbol.tag) { + .function => { + const gop = try funcs.getOrPut(symbol.index); + if (!gop.found_existing) { + gop.value_ptr.* = sym_with_loc; + } + }, + .global => globals.appendAssumeCapacity(sym_with_loc), + else => {}, // do not emit 'names' section for other symbols + } + } + + std.sort.sort(Wasm.SymbolWithLoc, funcs.values(), wasm, lessThan); + std.sort.sort(Wasm.SymbolWithLoc, globals.items, wasm, lessThan); + + const offset = try reserveCustomSectionHeader(file); + try leb.writeULEB128(writer, @as(u32, @intCast("name".len))); + try writer.writeAll("name"); + + try emitNameSection(wasm, 0x01, wasm.base.allocator, funcs.values(), writer); + try emitNameSection(wasm, 0x07, wasm.base.allocator, globals.items, writer); + try emitDataNamesSection(wasm, wasm.base.allocator, writer); + try emitCustomHeader(file, offset); + + try emitDebugSections(file, wasm, wasm.base.allocator, writer); + try emitProducerSection(file, wasm, wasm.base.allocator, writer); + } + try emitFeaturesSection(file, wasm, writer); +} + +/// Sorts symbols based on the index of the object they target +fn lessThan(wasm: *const Wasm, lhs: Wasm.SymbolWithLoc, rhs: Wasm.SymbolWithLoc) bool { + const lhs_sym = lhs.getSymbol(wasm); + const rhs_sym = rhs.getSymbol(wasm); + return lhs_sym.index < rhs_sym.index; +} + +fn emitSymbol(wasm: *const Wasm, loc: Wasm.SymbolWithLoc, writer: anytype) !void { + const symbol = loc.getSymbol(wasm); + const name = loc.getName(wasm); + try leb.writeULEB128(writer, symbol.index); + try leb.writeULEB128(writer, @as(u32, @intCast(name.len))); + try writer.writeAll(name); +} + +fn emitNameSection(wasm: *const Wasm, name_type: u8, gpa: std.mem.Allocator, items: []const Wasm.SymbolWithLoc, writer: anytype) !void { + var section_list = std.ArrayList(u8).init(gpa); + defer section_list.deinit(); + const sec_writer = section_list.writer(); + + try leb.writeULEB128(sec_writer, @as(u32, @intCast(items.len))); + for (items) |sym_loc| try emitSymbol(wasm, sym_loc, sec_writer); + try leb.writeULEB128(writer, name_type); + try leb.writeULEB128(writer, @as(u32, @intCast(section_list.items.len))); + try writer.writeAll(section_list.items); +} + +fn emitDataNamesSection(wasm: *Wasm, gpa: std.mem.Allocator, writer: anytype) !void { + var section_list = std.ArrayList(u8).init(gpa); + defer section_list.deinit(); + const sec_writer = section_list.writer(); + + try leb.writeULEB128(sec_writer, wasm.dataCount()); + for (wasm.data_segments.keys(), 0..) |key, index| { + if (std.mem.eql(u8, key, ".bss") and !wasm.options.import_memory) continue; + try leb.writeULEB128(sec_writer, @as(u32, @intCast(index))); + try leb.writeULEB128(sec_writer, @as(u32, @intCast(key.len))); + try sec_writer.writeAll(key); + } + try leb.writeULEB128(writer, @as(u8, 0x09)); + try leb.writeULEB128(writer, @as(u32, @intCast(section_list.items.len))); + try writer.writeAll(section_list.items); +} + +fn emitWasmHeader(writer: anytype) !void { + try writer.writeAll(&std.wasm.magic); + try writer.writeIntLittle(u32, 1); // version +} + +/// Reserves enough space within the file to write our section header. +/// Returns the offset into the file where the header will be written. +fn reserveSectionHeader(file: fs.File) !u64 { + // section id, section byte size, section entry count + const header_size = 1 + 5 + 5; + try file.seekBy(header_size); + return (try file.getPos()); +} + +fn reserveCustomSectionHeader(file: fs.File) !u64 { + const header_size = 1 + 5; + try file.seekBy(header_size); + return (try file.getPos()); +} + +/// Emits the actual section header at the given `offset`. +/// Will write the section id, the section byte length, as well as the section entry count. +/// The amount of bytes is calculated using the current position, minus the offset (and reserved header bytes). +fn emitSectionHeader(file: fs.File, offset: u64, section_type: std.wasm.Section, entries: usize) !void { + // section id, section byte size, section entry count + var buf: [1 + 5 + 5]u8 = undefined; + buf[0] = @intFromEnum(section_type); + + const pos = try file.getPos(); + const byte_size = pos + 5 - offset; // +5 due to 'entries' also being part of byte size + leb.writeUnsignedFixed(5, buf[1..6], @as(u32, @intCast(byte_size))); + leb.writeUnsignedFixed(5, buf[6..], @as(u32, @intCast(entries))); + try file.pwriteAll(&buf, offset - buf.len); + log.debug("Written section '{s}' offset=0x{x:0>8} size={d} count={d}", .{ + @tagName(section_type), + offset - buf.len, + byte_size, + entries, + }); +} + +fn emitCustomHeader(file: fs.File, offset: u64) !void { + var buf: [1 + 5]u8 = undefined; + buf[0] = 0; // 0 = 'custom' section + const pos = try file.getPos(); + const byte_size = pos - offset; + leb.writeUnsignedFixed(5, buf[1..6], @as(u32, @intCast(byte_size))); + try file.pwriteAll(&buf, offset - buf.len); +} + +fn emitType(type_entry: std.wasm.Type, writer: anytype) !void { + log.debug("Writing type {}", .{type_entry}); + try leb.writeULEB128(writer, @as(u8, 0x60)); //functype + try leb.writeULEB128(writer, @as(u32, @intCast(type_entry.params.len))); + for (type_entry.params) |para_ty| { + try leb.writeULEB128(writer, @intFromEnum(para_ty)); + } + try leb.writeULEB128(writer, @as(u32, @intCast(type_entry.returns.len))); + for (type_entry.returns) |ret_ty| { + try leb.writeULEB128(writer, @intFromEnum(ret_ty)); + } +} + +fn emitImportSymbol(wasm: *Wasm, sym_loc: Wasm.SymbolWithLoc, writer: anytype) !void { + const symbol = sym_loc.getSymbol(wasm).*; + + const import: types.Import = switch (symbol.tag) { + .function => import: { + const value = wasm.imports.imported_functions.values()[symbol.index]; + const key = wasm.imports.imported_functions.keys()[symbol.index]; + std.debug.assert(value.index == symbol.index); + break :import .{ + .kind = .{ .function = value.type }, + .module_name = try wasm.string_table.put(wasm.base.allocator, key.module_name), + .name = try wasm.string_table.put(wasm.base.allocator, key.name), + }; + }, + .global => import: { + const value = wasm.imports.imported_globals.values()[symbol.index]; + const key = wasm.imports.imported_globals.keys()[symbol.index]; + std.debug.assert(value.index == symbol.index); + break :import .{ + .kind = .{ .global = value.global }, + .module_name = try wasm.string_table.put(wasm.base.allocator, key.module_name), + .name = try wasm.string_table.put(wasm.base.allocator, key.name), + }; + }, + .table => import: { + const value = wasm.imports.imported_tables.values()[symbol.index]; + const key = wasm.imports.imported_tables.keys()[symbol.index]; + std.debug.assert(value.index == symbol.index); + break :import .{ + .kind = .{ .table = value.table }, + .module_name = try wasm.string_table.put(wasm.base.allocator, key.module_name), + .name = try wasm.string_table.put(wasm.base.allocator, key.name), + }; + }, + else => unreachable, + }; + + try emitImport(wasm, import, writer); +} + +fn emitImport(wasm: *Wasm, import_entry: types.Import, writer: anytype) !void { + const module_name = wasm.string_table.get(import_entry.module_name); + try leb.writeULEB128(writer, @as(u32, @intCast(module_name.len))); + try writer.writeAll(module_name); + + const name = wasm.string_table.get(import_entry.name); + try leb.writeULEB128(writer, @as(u32, @intCast(name.len))); + try writer.writeAll(name); + + try leb.writeULEB128(writer, @intFromEnum(import_entry.kind)); + switch (import_entry.kind) { + .function => |type_index| try leb.writeULEB128(writer, type_index), + .table => |table| try emitTable(table, writer), + .global => |global| { + try leb.writeULEB128(writer, @intFromEnum(global.valtype)); + try leb.writeULEB128(writer, @intFromBool(global.mutable)); + }, + .memory => |mem| try emitLimits(mem, writer), + } +} + +fn emitFunction(func: std.wasm.Func, writer: anytype) !void { + try leb.writeULEB128(writer, func.type_index); +} + +fn emitTable(table: types.Table, writer: anytype) !void { + try leb.writeULEB128(writer, @intFromEnum(table.reftype)); + try emitLimits(table.limits, writer); +} + +fn emitLimits(limits: types.Limits, writer: anytype) !void { + try leb.writeULEB128(writer, limits.flags); + try leb.writeULEB128(writer, limits.min); + if (limits.max) |max| { + try leb.writeULEB128(writer, max); + } +} + +fn emitGlobal(global: std.wasm.Global, writer: anytype) !void { + try leb.writeULEB128(writer, @intFromEnum(global.global_type.valtype)); + try leb.writeULEB128(writer, @intFromBool(global.global_type.mutable)); + try emitInitExpression(global.init, writer); +} + +fn emitInitExpression(init: std.wasm.InitExpression, writer: anytype) !void { + switch (init) { + .i32_const => |val| { + try leb.writeULEB128(writer, std.wasm.opcode(.i32_const)); + try leb.writeILEB128(writer, val); + }, + .global_get => |index| { + try leb.writeULEB128(writer, std.wasm.opcode(.global_get)); + try leb.writeULEB128(writer, index); + }, + else => @panic("TODO: Other init expression emission"), + } + try leb.writeULEB128(writer, std.wasm.opcode(.end)); +} + +fn emitExport(exported: std.wasm.Export, writer: anytype) !void { + try leb.writeULEB128(writer, @as(u32, @intCast(exported.name.len))); + try writer.writeAll(exported.name); + try leb.writeULEB128(writer, @intFromEnum(exported.kind)); + try leb.writeULEB128(writer, exported.index); +} + +fn emitElement(wasm: *Wasm, writer: anytype) !void { + // passive, with implicit 0-index table + var flags: u32 = 0; + try leb.writeULEB128(writer, flags); + // Start the function table at index 1 + try emitInitExpression(.{ .i32_const = 1 }, writer); + try leb.writeULEB128(writer, wasm.elements.functionCount()); + var it = wasm.elements.indirect_functions.keyIterator(); + while (it.next()) |key_ptr| { + try leb.writeULEB128(writer, key_ptr.*.getSymbol(wasm).index); + } +} + +const ProducerField = struct { + value: []const u8, + version: []const u8, + + const Context = struct { + pub fn hash(ctx: Context, field: ProducerField) u32 { + _ = ctx; + var hasher = std.hash.Wyhash.init(0); + hasher.update(field.value); + hasher.update(field.version); + return @as(u32, @truncate(hasher.final())); + } + + pub fn eql(ctx: Context, lhs: ProducerField, rhs: ProducerField, index: usize) bool { + _ = ctx; + _ = index; + return std.mem.eql(u8, lhs.value, rhs.value) and std.mem.eql(u8, lhs.version, rhs.version); + } + }; +}; + +fn emitProducerSection(file: fs.File, wasm: *const Wasm, gpa: std.mem.Allocator, writer: anytype) !void { + const header_offset = try reserveCustomSectionHeader(file); + + var languages_map = std.ArrayHashMap(ProducerField, void, ProducerField.Context, false).init(gpa); + defer for (languages_map.keys()) |key| { + gpa.free(key.value); + gpa.free(key.version); + } else languages_map.deinit(); + + var processed_map = std.ArrayHashMap(ProducerField, void, ProducerField.Context, false).init(gpa); + defer for (processed_map.keys()) |key| { + gpa.free(key.value); + gpa.free(key.version); + } else processed_map.deinit(); + + try processed_map.put(.{ .value = try gpa.dupe(u8, "Zld"), .version = try gpa.dupe(u8, "0.1") }, {}); + + for (wasm.objects.items) |object| { + if (object.producers.len != 0) { + var fbs = std.io.fixedBufferStream(object.producers); + const reader = fbs.reader(); + + const field_count = try leb.readULEB128(u32, reader); + var field_index: u32 = 0; + while (field_index < field_count) : (field_index += 1) { + const field_name_len = try leb.readULEB128(u32, reader); + const field_name = try gpa.alloc(u8, field_name_len); + defer gpa.free(field_name); + try reader.readNoEof(field_name); + + const value_count = try leb.readULEB128(u32, reader); + var value_index: u32 = 0; + while (value_index < value_count) : (value_index += 1) { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + errdefer gpa.free(name); + try reader.readNoEof(name); + + const version_len = try leb.readULEB128(u32, reader); + const version = try gpa.alloc(u8, version_len); + errdefer gpa.free(version); + try reader.readNoEof(version); + + log.debug("parsed producer field", .{}); + log.debug(" value '{s}'", .{name}); + log.debug(" version '{s}'", .{version}); + + if (std.mem.eql(u8, field_name, "language")) { + try languages_map.put(.{ .value = name, .version = version }, {}); + } else if (std.mem.eql(u8, field_name, "processed-by")) { + try processed_map.put(.{ .value = name, .version = version }, {}); + } else { + log.err("Invalid field name '{s}' in 'producers' section", .{field_name}); + log.err(" referenced in '{s}'", .{object.name}); + } + } + } + } + } + + const producers = "producers"; + try leb.writeULEB128(writer, @as(u32, @intCast(producers.len))); + try writer.writeAll(producers); + + var fields_count: u32 = 1; // always have a processed-by field + const languages_count = @as(u32, @intCast(languages_map.count())); + + if (languages_count > 0) { + fields_count += 1; + } + + try leb.writeULEB128(writer, @as(u32, fields_count)); + + if (languages_count > 0) { + const language = "language"; + try leb.writeULEB128(writer, @as(u32, @intCast(language.len))); + try writer.writeAll(language); + + try leb.writeULEB128(writer, languages_count); + + for (languages_map.keys()) |field| { + try leb.writeULEB128(writer, @as(u32, @intCast(field.value.len))); + try writer.writeAll(field.value); + + try leb.writeULEB128(writer, @as(u32, @intCast(field.version.len))); + try writer.writeAll(field.version); + } + } + + // processed-by field (this is never empty as it's always populated by Zld itself) + { + const processed_by = "processed-by"; + try leb.writeULEB128(writer, @as(u32, @intCast(processed_by.len))); + try writer.writeAll(processed_by); + + try leb.writeULEB128(writer, @as(u32, @intCast(processed_map.count()))); + + // versioned name + for (processed_map.keys()) |field| { + try leb.writeULEB128(writer, @as(u32, @intCast(field.value.len))); // len of "Zld" + try writer.writeAll(field.value); + + try leb.writeULEB128(writer, @as(u32, @intCast(field.version.len))); + try writer.writeAll(field.version); + } + } + + try emitCustomHeader(file, header_offset); +} + +fn emitFeaturesSection(file: fs.File, wasm: *const Wasm, writer: anytype) !void { + const used_count = wasm.used_features.count(); + if (used_count == 0) return; // when no features are used, we omit the entire section + const header_offset = try reserveCustomSectionHeader(file); + + const target_features = "target_features"; + try leb.writeULEB128(writer, @as(u32, @intCast(target_features.len))); + try writer.writeAll(target_features); + + try leb.writeULEB128(writer, used_count); + var it = wasm.used_features.iterator(); + while (it.next()) |feature_tag| { + if (wasm.used_features.isEnabled(feature_tag)) { + const feature: types.Feature = .{ .prefix = .used, .tag = feature_tag }; + try leb.writeULEB128(writer, @intFromEnum(feature.prefix)); + var buf: [100]u8 = undefined; + const feature_name = try std.fmt.bufPrint(&buf, "{}", .{feature.tag}); + try leb.writeULEB128(writer, @as(u32, @intCast(feature_name.len))); + try writer.writeAll(feature_name); + } + } + + try emitCustomHeader(file, header_offset); +} + +fn emitDebugSections(file: fs.File, wasm: *const Wasm, gpa: std.mem.Allocator, writer: anytype) !void { + var debug_bytes = std.ArrayList(u8).init(gpa); + defer debug_bytes.deinit(); + + const DebugSection = struct { + name: []const u8, + index: ?u32, + }; + + const debug_sections: []const DebugSection = &.{ + .{ .name = ".debug_info", .index = wasm.debug_info_index }, + .{ .name = ".debug_pubtypes", .index = wasm.debug_pubtypes_index }, + .{ .name = ".debug_abbrev", .index = wasm.debug_abbrev_index }, + .{ .name = ".debug_line", .index = wasm.debug_line_index }, + .{ .name = ".debug_str", .index = wasm.debug_str_index }, + .{ .name = ".debug_pubnames", .index = wasm.debug_pubnames_index }, + .{ .name = ".debug_loc", .index = wasm.debug_loc_index }, + .{ .name = ".debug_ranges", .index = wasm.debug_ranges_index }, + }; + + for (debug_sections) |item| { + if (item.index) |index| { + const segment = wasm.segments.items[index]; + if (segment.size == 0) continue; + try debug_bytes.ensureUnusedCapacity(segment.size); + var atom = wasm.atoms.get(index).?.getFirst(); + while (true) { + atom.resolveRelocs(wasm); + debug_bytes.appendSliceAssumeCapacity(atom.code.items); + atom = atom.next orelse break; + } + const header_offset = try reserveCustomSectionHeader(file); + try leb.writeULEB128(writer, @as(u32, @intCast(item.name.len))); + try writer.writeAll(item.name); + + try writer.writeAll(debug_bytes.items); + + try emitCustomHeader(file, header_offset); + debug_bytes.clearRetainingCapacity(); + } + } +} diff --git a/src/archive/archive/zld/Wasm/sections.zig b/src/archive/archive/zld/Wasm/sections.zig new file mode 100644 index 000000000000..ab91ad14c682 --- /dev/null +++ b/src/archive/archive/zld/Wasm/sections.zig @@ -0,0 +1,372 @@ +//! Contains the definiton and logic for all the +//! output sections required to build the final file. +const std = @import("std"); +const Symbol = @import("Symbol.zig"); +const Object = @import("Object.zig"); +const types = @import("types.zig"); +const Wasm = @import("../Wasm.zig"); +const Allocator = std.mem.Allocator; + +const log = std.log.scoped(.wasm); + +/// Output function section, holding a list of all +/// function with indexes to their type +pub const Functions = struct { + /// Holds the list of function type indexes. + /// The list is built from merging all defined functions into this single list. + /// Once appended, it becomes immutable and should not be mutated outside this list. + items: std.AutoArrayHashMapUnmanaged(struct { file: ?u16, index: u32 }, std.wasm.Func) = .{}, + + /// Adds a new function to the section while also setting the function index + /// of the `Func` itself. + pub fn append(self: *Functions, gpa: Allocator, ref: struct { file: ?u16, index: u32 }, offset: u32, func: std.wasm.Func) !u32 { + const gop = try self.items.getOrPut( + gpa, + .{ .file = ref.file, .index = ref.index }, + ); + if (!gop.found_existing) { + gop.value_ptr.* = func; + } + return @as(u32, @intCast(gop.index)) + offset; + } + + /// Returns the count of entires within the function section + pub fn count(self: *Functions) u32 { + return @as(u32, @intCast(self.items.count())); + } + + pub fn deinit(self: *Functions, gpa: Allocator) void { + self.items.deinit(gpa); + self.* = undefined; + } +}; + +/// Output import section, containing all the various import types +pub const Imports = struct { + /// Table where the key is represented by an import. + /// Each entry represents and imported function where the value contains the index of the function + /// as well as the index of the type. + imported_functions: std.ArrayHashMapUnmanaged( + ImportKey, + struct { index: u32, type: u32 }, + ImportKey.Ctx, + true, + ) = .{}, + /// Table where the key is represented by an import. + /// Each entry represents an imported global from the host environment and maps to the index + /// within this map. + imported_globals: std.ArrayHashMapUnmanaged( + ImportKey, + struct { index: u32, global: std.wasm.GlobalType }, + ImportKey.Ctx, + true, + ) = .{}, + /// Table where the key is represented by an import. + /// Each entry represents an imported table from the host environment and maps to the index + /// within this map. + imported_tables: std.ArrayHashMapUnmanaged( + ImportKey, + struct { index: u32, table: types.Table }, + ImportKey.Ctx, + true, + ) = .{}, + /// A list of symbols representing objects that have been imported. + imported_symbols: std.ArrayListUnmanaged(Wasm.SymbolWithLoc) = .{}, + + const ImportKey = struct { + module_name: []const u8, + name: []const u8, + + const Ctx = struct { + pub fn hash(ctx: Ctx, key: ImportKey) u32 { + _ = ctx; + const hashFunc = std.hash.autoHash; + var hasher = std.hash.Wyhash.init(0); + hashFunc(&hasher, key.module_name.len); + hashFunc(&hasher, key.module_name.ptr); + hashFunc(&hasher, key.name.len); + hashFunc(&hasher, key.name.ptr); + return @as(u32, @truncate(hasher.final())); + } + + pub fn eql(ctx: Ctx, lhs: ImportKey, rhs: ImportKey, index: usize) bool { + _ = ctx; + _ = index; + return std.mem.eql(u8, lhs.name, rhs.name) and + std.mem.eql(u8, lhs.module_name, rhs.module_name); + } + }; + }; + + const max_load = std.hash_map.default_max_load_percentage; + + /// Appends an import symbol into the list of imports. Based on the type, also appends it + /// to their respective import list (such as imported_functions) + /// + /// NOTE: The given symbol must reside within the given `Object`. + pub fn appendSymbol( + self: *Imports, + gpa: Allocator, + wasm: *const Wasm, + sym_with_loc: Wasm.SymbolWithLoc, + ) !void { + const object: *Object = &wasm.objects.items[sym_with_loc.file.?]; + const symbol = &object.symtable[sym_with_loc.sym_index]; + const import = object.findImport(symbol.tag.externalType(), symbol.index); + const module_name = object.string_table.get(import.module_name); + const import_name = object.string_table.get(import.name); + + switch (symbol.tag) { + .function => { + const ret = try self.imported_functions.getOrPut(gpa, .{ + .module_name = module_name, + .name = import_name, + }); + if (!ret.found_existing) { + try self.imported_symbols.append(gpa, sym_with_loc); + ret.value_ptr.* = .{ + .index = self.functionCount() - 1, + .type = import.kind.function, + }; + } + symbol.index = ret.value_ptr.*.index; + log.debug("Imported function '{s}' at index ({d})", .{ import_name, symbol.index }); + }, + .global => { + const ret = try self.imported_globals.getOrPut(gpa, .{ + .module_name = module_name, + .name = import_name, + }); + if (!ret.found_existing) { + try self.imported_symbols.append(gpa, sym_with_loc); + ret.value_ptr.* = .{ + .index = self.globalCount() - 1, + .global = import.kind.global, + }; + } + symbol.index = ret.value_ptr.*.index; + log.debug("Imported global '{s}' at index ({d})", .{ import_name, symbol.index }); + }, + .table => { + const ret = try self.imported_tables.getOrPut(gpa, .{ + .module_name = module_name, + .name = import_name, + }); + if (!ret.found_existing) { + try self.imported_symbols.append(gpa, sym_with_loc); + ret.value_ptr.* = .{ + .index = self.tableCount() - 1, + .table = import.kind.table, + }; + } + symbol.index = ret.value_ptr.*.index; + log.debug("Imported table '{s}' at index ({d})", .{ import_name, symbol.index }); + }, + else => unreachable, // programmer error: Given symbol cannot be imported + } + } + + /// Returns the count of functions that have been imported (so far) + pub fn functionCount(self: Imports) u32 { + return @as(u32, @intCast(self.imported_functions.count())); + } + + /// Returns the count of tables that have been imported (so far) + pub fn tableCount(self: Imports) u32 { + return @as(u32, @intCast(self.imported_tables.count())); + } + + /// Returns the count of globals that have been imported (so far) + pub fn globalCount(self: Imports) u32 { + return @as(u32, @intCast(self.imported_globals.count())); + } + + pub fn deinit(self: *Imports, gpa: Allocator) void { + self.imported_functions.deinit(gpa); + self.imported_globals.deinit(gpa); + self.imported_tables.deinit(gpa); + self.imported_symbols.deinit(gpa); + self.* = undefined; + } + + /// Returns a slice to pointers to symbols that have been imported + pub fn symbols(self: Imports) []const Wasm.SymbolWithLoc { + return self.imported_symbols.items; + } + + /// Returns the count of symbols which have been imported + pub fn symbolCount(self: Imports) u32 { + return @as(u32, @intCast(self.imported_symbols.items.len)); + } +}; + +/// Represents the output global section, containing a list of globals +pub const Globals = struct { + /// A list of `wasm.Global`s + /// Once appended to this list, they should no longer be mutated + items: std.ArrayListUnmanaged(std.wasm.Global) = .{}, + /// List of internal GOT symbols + got_symbols: std.ArrayListUnmanaged(Wasm.SymbolWithLoc) = .{}, + + /// Appends a new global and sets the `global_idx` on the global based on the + /// current count of globals and the given `offset`. + pub fn append(globals: *Globals, gpa: Allocator, offset: u32, global: std.wasm.Global) !u32 { + const index = offset + @as(u32, @intCast(globals.items.items.len)); + try globals.items.append(gpa, global); + return index; + } + + /// Appends a new entry to the internal GOT + pub fn addGOTEntry(globals: *Globals, gpa: Allocator, loc: Wasm.SymbolWithLoc) !void { + try globals.got_symbols.append(gpa, loc); + } + + /// Returns true for when any GOT entry is a TLS symbol + pub fn requiresTLSReloc(globals: *const Globals, wasm: *const Wasm) bool { + for (globals.got_symbols.items) |loc| { + if (loc.getSymbol(wasm).isTLS()) return true; + } + return false; + } + + /// Returns the total amount of globals of the global section + pub fn count(globals: Globals) u32 { + return @as(u32, @intCast(globals.items.items.len)); + } + + /// Creates a new linker-defined global with the given mutability and value type. + /// Also appends the new global to the output global section and returns a pointer + /// to the newly created global. + /// + /// This will automatically set `init` to `null` and can manually be updated at a later point using + /// the returned pointer. + pub fn create(globals: *Globals, gpa: Allocator, mutability: enum { mutable, immutable }, valtype: types.ValueType) !*types.Global { + const index = globals.count(); + try globals.items.append(gpa, .{ + .valtype = valtype, + .mutable = mutability == .mutable, + .init = null, + .global_idx = index, + }); + return &globals.items.items[index]; + } + + pub fn deinit(globals: *Globals, gpa: Allocator) void { + globals.items.deinit(gpa); + globals.got_symbols.deinit(gpa); + globals.* = undefined; + } +}; + +/// Represents the type section, containing a list of +/// wasm signature types. +pub const Types = struct { + /// A list of `wasm.FuncType`, when appending to + /// this list, duplicates will be removed. + items: std.ArrayListUnmanaged(std.wasm.Type) = .{}, + + /// Checks if a given type is already present within the list of types. + /// If not, the given type will be appended to the list. + /// In all cases, this will return the index within the list of types. + pub fn append(self: *Types, gpa: Allocator, func_type: std.wasm.Type) !u32 { + return self.find(func_type) orelse { + const index = self.count(); + try self.items.append(gpa, func_type); + return index; + }; + } + + /// Returns a pointer to the function type at given `index` + /// Asserts the index is within bounds. + pub fn get(self: Types, index: u32) *std.wasm.Type { + return &self.items.items[index]; + } + + /// Checks if any type (read: function signature) already exists within + /// the type section. When it does exist, it will return its index + /// otherwise, returns `null`. + pub fn find(self: Types, func_type: std.wasm.Type) ?u32 { + return for (self.items.items, 0..) |ty, index| { + if (ty.eql(func_type)) { + return @as(u32, @intCast(index)); + } + } else null; + } + + /// Returns the amount of entries in the type section + pub fn count(self: Types) u32 { + return @as(u32, @intCast(self.items.items.len)); + } + + pub fn deinit(self: *Types, gpa: Allocator) void { + self.items.deinit(gpa); + self.* = undefined; + } +}; + +/// Represents the table section, containing a list +/// of tables, as well as the definition of linker-defined +/// tables such as the indirect function table +pub const Tables = struct { + /// The list of tables that have been merged from all + /// object files. This does not include any linker-defined + /// tables. Once inserted in this list, the object becomes immutable. + items: std.ArrayListUnmanaged(types.Table) = .{}, + + /// Appends a new table to the list of tables and sets its index to + /// the position within the list of tables. + pub fn append(self: *Tables, gpa: Allocator, offset: u32, table: types.Table) !u32 { + const index = offset + self.count(); + try self.items.append(gpa, table); + return index; + } + + /// Returns the amount of entries in the table section + pub fn count(self: Tables) u32 { + return @as(u32, @intCast(self.items.items.len)); + } + + pub fn deinit(self: *Tables, gpa: Allocator) void { + self.items.deinit(gpa); + self.* = undefined; + } +}; + +/// Represents the exports section, built from explicit exports +/// from all object files, as well as global defined symbols that are +/// non-hidden. +pub const Exports = struct { + /// List of exports, containing both merged exports + /// as linker-defined exports such as __stack_pointer. + items: std.ArrayListUnmanaged(std.wasm.Export) = .{}, + + /// Appends a given `wasm.Export` to the list of output exports. + pub fn append(self: *Exports, gpa: Allocator, exp: std.wasm.Export) !void { + try self.items.append(gpa, exp); + } + + /// Returns the amount of entries in the export section + pub fn count(self: Exports) u32 { + return @as(u32, @intCast(self.items.items.len)); + } + + pub fn deinit(self: *Exports, gpa: Allocator) void { + self.items.deinit(gpa); + self.* = undefined; + } +}; + +pub const Elements = struct { + /// A list of symbols for indirect function calls where the key + /// represents the symbol location, and the value represents the index into the table. + indirect_functions: std.AutoHashMapUnmanaged(Wasm.SymbolWithLoc, u32) = .{}, + + pub fn functionCount(self: Elements) u32 { + return @as(u32, @intCast(self.indirect_functions.count())); + } + + pub fn deinit(self: *Elements, gpa: Allocator) void { + self.indirect_functions.deinit(gpa); + self.* = undefined; + } +}; diff --git a/src/archive/archive/zld/Wasm/types.zig b/src/archive/archive/zld/Wasm/types.zig new file mode 100644 index 000000000000..81b789c4eea7 --- /dev/null +++ b/src/archive/archive/zld/Wasm/types.zig @@ -0,0 +1,304 @@ +//! This file contains all constants and related to wasm's object format. + +const std = @import("std"); + +pub const Relocation = struct { + /// Represents the type of the `Relocation` + relocation_type: RelocationType, + /// Offset of the value to rewrite relative to the relevant section's contents. + /// When `offset` is zero, its position is immediately after the id and size of the section. + offset: u32, + /// The index of the symbol used. + /// When the type is `R_WASM_TYPE_INDEX_LEB`, it represents the index of the type. + index: u32, + /// Addend to add to the address. + /// This field is only non-zero for `R_WASM_MEMORY_ADDR_*`, `R_WASM_FUNCTION_OFFSET_I32` and `R_WASM_SECTION_OFFSET_I32`. + addend: i32 = 0, + + /// All possible relocation types currently existing. + /// This enum is exhaustive as the spec is WIP and new types + /// can be added which means that a generated binary will be invalid, + /// so instead we will show an error in such cases. + pub const RelocationType = enum(u8) { + R_WASM_FUNCTION_INDEX_LEB = 0, + R_WASM_TABLE_INDEX_SLEB = 1, + R_WASM_TABLE_INDEX_I32 = 2, + R_WASM_MEMORY_ADDR_LEB = 3, + R_WASM_MEMORY_ADDR_SLEB = 4, + R_WASM_MEMORY_ADDR_I32 = 5, + R_WASM_TYPE_INDEX_LEB = 6, + R_WASM_GLOBAL_INDEX_LEB = 7, + R_WASM_FUNCTION_OFFSET_I32 = 8, + R_WASM_SECTION_OFFSET_I32 = 9, + R_WASM_EVENT_INDEX_LEB = 10, + R_WASM_GLOBAL_INDEX_I32 = 13, + R_WASM_MEMORY_ADDR_LEB64 = 14, + R_WASM_MEMORY_ADDR_SLEB64 = 15, + R_WASM_MEMORY_ADDR_I64 = 16, + R_WASM_TABLE_INDEX_SLEB64 = 18, + R_WASM_TABLE_INDEX_I64 = 19, + R_WASM_TABLE_NUMBER_LEB = 20, + R_WASM_MEMORY_ADDR_TLS_SLEB = 21, + R_WASM_MEMORY_ADDR_TLS_SLEB64 = 25, + + /// Returns true for relocation types where the `addend` field is present. + pub fn addendIsPresent(self: RelocationType) bool { + return switch (self) { + .R_WASM_MEMORY_ADDR_LEB, + .R_WASM_MEMORY_ADDR_SLEB, + .R_WASM_MEMORY_ADDR_I32, + .R_WASM_MEMORY_ADDR_LEB64, + .R_WASM_MEMORY_ADDR_SLEB64, + .R_WASM_MEMORY_ADDR_I64, + .R_WASM_FUNCTION_OFFSET_I32, + .R_WASM_SECTION_OFFSET_I32, + => true, + else => false, + }; + } + }; + + /// Verifies the relocation type of a given `Relocation` and returns + /// true when the relocation references a function call or address to a function. + pub fn isFunction(self: Relocation) bool { + return switch (self.relocation_type) { + .R_WASM_FUNCTION_INDEX_LEB, + .R_WASM_TABLE_INDEX_SLEB, + => true, + else => false, + }; + } + + /// Returns true when the relocation represents a table index relocatable + pub fn isTableIndex(self: Relocation) bool { + return switch (self.relocation_type) { + .R_WASM_TABLE_INDEX_I32, + .R_WASM_TABLE_INDEX_I64, + .R_WASM_TABLE_INDEX_SLEB, + .R_WASM_TABLE_INDEX_SLEB64, + => true, + else => false, + }; + } + + pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try writer.print("{s} offset=0x{x:0>6} symbol={d}", .{ + @tagName(self.relocation_type), + self.offset, + self.index, + }); + } +}; + +/// Unlike the `Import` object defined by the wasm spec, and existing +/// in the std.wasm namespace, this construct saves the 'module name' and 'name' +/// of the import using offsets into a string table, rather than the slices itself. +/// This saves us (potentially) 24 bytes per import on 64bit machines. +pub const Import = struct { + module_name: u32, + name: u32, + kind: Kind, + + pub const Kind = union(std.wasm.ExternalKind) { + function: u32, + table: Table, + memory: Limits, + global: std.wasm.GlobalType, + }; +}; + +pub const Memory = struct { + limits: Limits, +}; + +pub const Table = struct { + limits: Limits, + reftype: std.wasm.RefType, +}; + +/// Unlike the `Export` object defined by the wasm spec, and existing +/// in the std.wasm namespace, this construct saves the 'name' +/// of the export using offsets into a string table, rather than the slice itself. +/// This saves us (potentially) 12 bytes per export on 64bit machines. +pub const Export = struct { + name: u32, + index: u32, + kind: std.wasm.ExternalKind, +}; + +pub const SubsectionType = enum(u8) { + WASM_SEGMENT_INFO = 5, + WASM_INIT_FUNCS = 6, + WASM_COMDAT_INFO = 7, + WASM_SYMBOL_TABLE = 8, +}; + +pub const Segment = struct { + /// Segment's name, encoded as UTF-8 bytes. + name: []const u8, + /// The required alignment of the segment, encoded as a power of 2 + alignment: u32, + /// Bitfield containing flags for a segment + flags: u32, + + pub fn isTLS(segment: Segment) bool { + return segment.flags & @intFromEnum(Flags.WASM_SEG_FLAG_TLS) != 0; + } + + /// Returns the name as how it will be output into the final object + /// file or binary. When `merge_segments` is true, this will return the + /// short name. i.e. ".rodata". When false, it returns the entire name instead. + pub fn outputName(segment: Segment, merge_segments: bool) []const u8 { + if (segment.isTLS()) { + return ".tdata"; + } else if (!merge_segments) { + return segment.name; + } else if (std.mem.startsWith(u8, segment.name, ".rodata.")) { + return ".rodata"; + } else if (std.mem.startsWith(u8, segment.name, ".text.")) { + return ".text"; + } else if (std.mem.startsWith(u8, segment.name, ".data.")) { + return ".data"; + } else if (std.mem.startsWith(u8, segment.name, ".bss.")) { + return ".bss"; + } + return segment.name; + } + + pub const Flags = enum(u32) { + WASM_SEG_FLAG_STRINGS = 0x1, + WASM_SEG_FLAG_TLS = 0x2, + }; +}; + +pub const InitFunc = struct { + /// Priority of the init function + priority: u32, + /// The symbol index of init function (not the function index). + symbol_index: u32, +}; + +pub const Comdat = struct { + name: []const u8, + /// Must be zero, no flags are currently defined by the tool-convention. + flags: u32, + symbols: []const ComdatSym, +}; + +pub const ComdatSym = struct { + kind: Type, + /// Index of the data segment/function/global/event/table within a WASM module. + /// The object must not be an import. + index: u32, + + pub const Type = enum(u8) { + WASM_COMDAT_DATA = 0, + WASM_COMDAT_FUNCTION = 1, + WASM_COMDAT_GLOBAL = 2, + WASM_COMDAT_EVENT = 3, + WASM_COMDAT_TABLE = 4, + WASM_COMDAT_SECTION = 5, + }; +}; + +pub const Feature = struct { + /// Provides information about the usage of the feature. + /// - '0x2b' (+): Object uses this feature, and the link fails if feature is not in the allowed set. + /// - '0x2d' (-): Object does not use this feature, and the link fails if this feature is in the allowed set. + /// - '0x3d' (=): Object uses this feature, and the link fails if this feature is not in the allowed set, + /// or if any object does not use this feature. + prefix: Prefix, + /// Type of the feature, must be unique in the sequence of features. + tag: Tag, + + /// Unlike `std.Target.wasm.Feature` this also contains linker-features such as shared-mem + pub const Tag = enum { + atomics, + bulk_memory, + exception_handling, + extended_const, + multivalue, + mutable_globals, + nontrapping_fptoint, + reference_types, + relaxed_simd, + sign_ext, + simd128, + tail_call, + shared_mem, + + /// From a given cpu feature, returns its linker feature + pub fn fromCpuFeature(feature: std.Target.wasm.Feature) Tag { + return @as(Tag, @enumFromInt(@intFromEnum(feature))); + } + + pub fn format(tag: Tag, comptime fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = opt; + try writer.writeAll(switch (tag) { + .atomics => "atomics", + .bulk_memory => "bulk-memory", + .exception_handling => "exception-handling", + .extended_const => "extended-const", + .multivalue => "multivalue", + .mutable_globals => "mutable-globals", + .nontrapping_fptoint => "nontrapping-fptoint", + .reference_types => "reference-types", + .relaxed_simd => "relaxed-simd", + .sign_ext => "sign-ext", + .simd128 => "simd128", + .tail_call => "tail-call", + .shared_mem => "shared-mem", + }); + } + }; + + pub const Prefix = enum(u8) { + used = '+', + disallowed = '-', + required = '=', + }; + + pub fn format(feature: Feature, comptime fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void { + _ = opt; + _ = fmt; + try writer.print("{c} {}", .{ feature.prefix, feature.tag }); + } +}; + +pub const known_features = std.ComptimeStringMap(Feature.Tag, .{ + .{ "atomics", .atomics }, + .{ "bulk-memory", .bulk_memory }, + .{ "exception-handling", .exception_handling }, + .{ "extended-const", .extended_const }, + .{ "multivalue", .multivalue }, + .{ "mutable-globals", .mutable_globals }, + .{ "nontrapping-fptoint", .nontrapping_fptoint }, + .{ "reference-types", .reference_types }, + .{ "relaxed-simd", .relaxed_simd }, + .{ "sign-ext", .sign_ext }, + .{ "simd128", .simd128 }, + .{ "tail-call", .tail_call }, + .{ "shared-mem", .shared_mem }, +}); + +pub const Limits = struct { + flags: u32 = 0, + min: u32, + max: ?u32, + + pub const Flags = enum(u32) { + WASM_LIMITS_FLAG_HAS_MAX = 0x1, + WASM_LIMITS_FLAG_IS_SHARED = 0x2, + }; + + pub fn hasFlag(limits: Limits, flag: Flags) bool { + return limits & @intFromEnum(flag) != 0; + } + + pub fn setFlag(limits: *Limits, flag: Flags) void { + limits.flags |= @intFromEnum(flag); + } +}; diff --git a/src/archive/archive/zld/Zld.zig b/src/archive/archive/zld/Zld.zig new file mode 100644 index 000000000000..3a4f2d9c16a2 --- /dev/null +++ b/src/archive/archive/zld/Zld.zig @@ -0,0 +1,142 @@ +const Zld = @This(); + +const std = @import("std"); +const fs = std.fs; +const io = std.io; +const mem = std.mem; +const process = std.process; +const trace = @import("tracy.zig").trace; + +const Allocator = mem.Allocator; +const CrossTarget = std.zig.CrossTarget; +pub const Elf = @import("Elf.zig"); +pub const MachO = @import("MachO.zig"); +pub const Coff = @import("Coff.zig"); +pub const Wasm = @import("Wasm.zig"); +const ThreadPool = @import("ThreadPool.zig"); + +tag: Tag, +allocator: Allocator, +file: fs.File, +thread_pool: *ThreadPool, + +pub const Tag = enum { + coff, + elf, + macho, + wasm, +}; + +pub const Emit = struct { + directory: fs.Dir, + sub_path: []const u8, +}; + +pub const OutputMode = enum { + exe, + lib, +}; + +pub const SystemLib = struct { + needed: bool = false, + weak: bool = false, +}; + +pub const LinkObject = struct { + path: []const u8, + must_link: bool = false, +}; + +pub const Options = union { + elf: Elf.Options, + macho: MachO.Options, + coff: Coff.Options, + wasm: Wasm.Options, +}; + +pub const MainCtx = struct { + gpa: Allocator, + cmd: []const u8, + args: []const []const u8, + log_scopes: *std.ArrayList([]const u8), + + pub fn printSuccess(ctx: MainCtx, comptime format: []const u8, args: anytype) noreturn { + ret: { + const msg = std.fmt.allocPrint(ctx.gpa, format, args) catch break :ret; + std.io.getStdOut().writeAll(msg) catch {}; + } + std.process.exit(0); + } + + pub fn printFailure(ctx: MainCtx, comptime format: []const u8, args: anytype) noreturn { + ret: { + const msg = std.fmt.allocPrint(ctx.gpa, format, args) catch break :ret; + std.io.getStdErr().writeAll(msg) catch {}; + } + std.process.exit(1); + } +}; + +pub fn main(tag: Tag, ctx: MainCtx) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var arena_allocator = std.heap.ArenaAllocator.init(ctx.gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const opts: Options = switch (tag) { + .elf => .{ .elf = try Elf.Options.parseArgs(arena, ctx) }, + .macho => .{ .macho = try MachO.Options.parseArgs(arena, ctx) }, + .coff => .{ .coff = try Coff.Options.parseArgs(arena, ctx) }, + .wasm => .{ .wasm = try Wasm.Options.parseArgs(arena, ctx) }, + }; + + var thread_pool: ThreadPool = undefined; + try thread_pool.init(ctx.gpa); + defer thread_pool.deinit(); + + const zld = try openPath(ctx.gpa, tag, opts, &thread_pool); + defer zld.deinit(); + + try zld.flush(); +} + +pub fn openPath(allocator: Allocator, tag: Tag, options: Options, thread_pool: *ThreadPool) !*Zld { + return switch (tag) { + .macho => &(try MachO.openPath(allocator, options.macho, thread_pool)).base, + .elf => &(try Elf.openPath(allocator, options.elf, thread_pool)).base, + .coff => &(try Coff.openPath(allocator, options.coff, thread_pool)).base, + .wasm => &(try Wasm.openPath(allocator, options.wasm, thread_pool)).base, + }; +} + +pub fn deinit(base: *Zld) void { + switch (base.tag) { + .elf => @fieldParentPtr(Elf, "base", base).deinit(), + .macho => @fieldParentPtr(MachO, "base", base).deinit(), + .coff => @fieldParentPtr(Coff, "base", base).deinit(), + .wasm => @fieldParentPtr(Wasm, "base", base).deinit(), + } + base.allocator.destroy(base); +} + +pub fn flush(base: *Zld) !void { + switch (base.tag) { + .elf => try @fieldParentPtr(Elf, "base", base).flush(), + .macho => try @fieldParentPtr(MachO, "base", base).flush(), + .coff => try @fieldParentPtr(Coff, "base", base).flush(), + .wasm => try @fieldParentPtr(Wasm, "base", base).flush(), + } + base.closeFiles(); +} + +fn closeFiles(base: *const Zld) void { + switch (base.tag) { + .elf => @fieldParentPtr(Elf, "base", base).closeFiles(), + .macho => @fieldParentPtr(MachO, "base", base).closeFiles(), + .coff => @fieldParentPtr(Coff, "base", base).closeFiles(), + .wasm => @fieldParentPtr(Wasm, "base", base).closeFiles(), + } + base.file.close(); +} diff --git a/src/archive/archive/zld/aarch64.zig b/src/archive/archive/zld/aarch64.zig new file mode 100644 index 000000000000..4dd49e8d67cc --- /dev/null +++ b/src/archive/archive/zld/aarch64.zig @@ -0,0 +1,1943 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const DW = std.dwarf; +const assert = std.debug.assert; +const testing = std.testing; + +// zig fmt: off + +/// General purpose registers in the AArch64 instruction set +pub const Register = enum(u7) { + // 64-bit registers + x0, x1, x2, x3, x4, x5, x6, x7, + x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, + x24, x25, x26, x27, x28, x29, x30, xzr, + + // 32-bit registers + w0, w1, w2, w3, w4, w5, w6, w7, + w8, w9, w10, w11, w12, w13, w14, w15, + w16, w17, w18, w19, w20, w21, w22, w23, + w24, w25, w26, w27, w28, w29, w30, wzr, + + // Stack pointer + sp, wsp, + + pub fn id(self: Register) u6 { + return switch (@intFromEnum(self)) { + 0...63 => return @as(u6, @as(u5, @truncate(@intFromEnum(self)))), + 64...65 => 32, + else => unreachable, + }; + } + + pub fn enc(self: Register) u5 { + return switch (@intFromEnum(self)) { + 0...63 => return @as(u5, @truncate( @intFromEnum(self))), + 64...65 => 31, + else => unreachable, + }; + } + + /// Returns the bit-width of the register. + pub fn size(self: Register) u7 { + return switch (@intFromEnum(self)) { + 0...31 => 64, + 32...63 => 32, + 64 => 64, + 65 => 32, + else => unreachable, + }; + } + + /// Convert from any register to its 64 bit alias. + pub fn to64(self: Register) Register { + return switch (@intFromEnum(self)) { + 0...31 => self, + 32...63 => @enumFromInt(@intFromEnum(self) - 32), + 64 => .sp, + 65 => .sp, + else => unreachable, + }; + } + + /// Convert from any register to its 32 bit alias. + pub fn to32(self: Register) Register { + return switch (@intFromEnum(self)) { + 0...31 => @enumFromInt(@intFromEnum(self) + 32), + 32...63 => self, + 64 => .wsp, + 65 => .wsp, + else => unreachable, + }; + } + + pub fn dwarfLocOp(self: Register) u8 { + return @as(u8, self.enc()) + DW.OP.reg0; + } +}; + +// zig fmt: on + +test "Register.enc" { + try testing.expectEqual(@as(u5, 0), Register.x0.enc()); + try testing.expectEqual(@as(u5, 0), Register.w0.enc()); + + try testing.expectEqual(@as(u5, 31), Register.xzr.enc()); + try testing.expectEqual(@as(u5, 31), Register.wzr.enc()); + + try testing.expectEqual(@as(u5, 31), Register.sp.enc()); + try testing.expectEqual(@as(u5, 31), Register.sp.enc()); +} + +test "Register.size" { + try testing.expectEqual(@as(u7, 64), Register.x19.size()); + try testing.expectEqual(@as(u7, 32), Register.w3.size()); +} + +test "Register.to64/to32" { + try testing.expectEqual(Register.x0, Register.w0.to64()); + try testing.expectEqual(Register.x0, Register.x0.to64()); + + try testing.expectEqual(Register.w3, Register.w3.to32()); + try testing.expectEqual(Register.w3, Register.x3.to32()); +} + +// zig fmt: off + +/// Scalar floating point registers in the aarch64 instruction set +pub const FloatingPointRegister = enum(u8) { + // 128-bit registers + q0, q1, q2, q3, q4, q5, q6, q7, + q8, q9, q10, q11, q12, q13, q14, q15, + q16, q17, q18, q19, q20, q21, q22, q23, + q24, q25, q26, q27, q28, q29, q30, q31, + + // 64-bit registers + d0, d1, d2, d3, d4, d5, d6, d7, + d8, d9, d10, d11, d12, d13, d14, d15, + d16, d17, d18, d19, d20, d21, d22, d23, + d24, d25, d26, d27, d28, d29, d30, d31, + + // 32-bit registers + s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, + s16, s17, s18, s19, s20, s21, s22, s23, + s24, s25, s26, s27, s28, s29, s30, s31, + + // 16-bit registers + h0, h1, h2, h3, h4, h5, h6, h7, + h8, h9, h10, h11, h12, h13, h14, h15, + h16, h17, h18, h19, h20, h21, h22, h23, + h24, h25, h26, h27, h28, h29, h30, h31, + + // 8-bit registers + b0, b1, b2, b3, b4, b5, b6, b7, + b8, b9, b10, b11, b12, b13, b14, b15, + b16, b17, b18, b19, b20, b21, b22, b23, + b24, b25, b26, b27, b28, b29, b30, b31, + + pub fn id(self: FloatingPointRegister) u5 { + return @truncate(@intFromEnum(self)); + } + + /// Returns the bit-width of the register. + pub fn size(self: FloatingPointRegister) u8 { + return switch (@intFromEnum(self)) { + 0...31 => 128, + 32...63 => 64, + 64...95 => 32, + 96...127 => 16, + 128...159 => 8, + else => unreachable, + }; + } + + /// Convert from any register to its 128 bit alias. + pub fn to128(self: FloatingPointRegister) FloatingPointRegister { + return @enumFromInt(self.id()); + } + + /// Convert from any register to its 64 bit alias. + pub fn to64(self: FloatingPointRegister) FloatingPointRegister { + return @enumFromInt(@as(u8, self.id()) + 32); + } + + /// Convert from any register to its 32 bit alias. + pub fn to32(self: FloatingPointRegister) FloatingPointRegister { + return @enumFromInt(@as(u8, self.id()) + 64); + } + + /// Convert from any register to its 16 bit alias. + pub fn to16(self: FloatingPointRegister) FloatingPointRegister { + return @enumFromInt(@as(u8, self.id()) + 96); + } + + /// Convert from any register to its 8 bit alias. + pub fn to8(self: FloatingPointRegister) FloatingPointRegister { + return @enumFromInt(@as(u8, self.id()) + 128); + } +}; + +// zig fmt: on + +test "FloatingPointRegister.id" { + try testing.expectEqual(@as(u5, 0), FloatingPointRegister.b0.id()); + try testing.expectEqual(@as(u5, 0), FloatingPointRegister.h0.id()); + try testing.expectEqual(@as(u5, 0), FloatingPointRegister.s0.id()); + try testing.expectEqual(@as(u5, 0), FloatingPointRegister.d0.id()); + try testing.expectEqual(@as(u5, 0), FloatingPointRegister.q0.id()); + + try testing.expectEqual(@as(u5, 2), FloatingPointRegister.q2.id()); + try testing.expectEqual(@as(u5, 31), FloatingPointRegister.d31.id()); +} + +test "FloatingPointRegister.size" { + try testing.expectEqual(@as(u8, 128), FloatingPointRegister.q1.size()); + try testing.expectEqual(@as(u8, 64), FloatingPointRegister.d2.size()); + try testing.expectEqual(@as(u8, 32), FloatingPointRegister.s3.size()); + try testing.expectEqual(@as(u8, 16), FloatingPointRegister.h4.size()); + try testing.expectEqual(@as(u8, 8), FloatingPointRegister.b5.size()); +} + +test "FloatingPointRegister.toX" { + try testing.expectEqual(FloatingPointRegister.q1, FloatingPointRegister.q1.to128()); + try testing.expectEqual(FloatingPointRegister.q2, FloatingPointRegister.b2.to128()); + try testing.expectEqual(FloatingPointRegister.q3, FloatingPointRegister.h3.to128()); + + try testing.expectEqual(FloatingPointRegister.d0, FloatingPointRegister.q0.to64()); + try testing.expectEqual(FloatingPointRegister.s1, FloatingPointRegister.d1.to32()); + try testing.expectEqual(FloatingPointRegister.h2, FloatingPointRegister.s2.to16()); + try testing.expectEqual(FloatingPointRegister.b3, FloatingPointRegister.h3.to8()); +} + +/// Represents an instruction in the AArch64 instruction set +pub const Instruction = union(enum) { + move_wide_immediate: packed struct { + rd: u5, + imm16: u16, + hw: u2, + fixed: u6 = 0b100101, + opc: u2, + sf: u1, + }, + pc_relative_address: packed struct { + rd: u5, + immhi: u19, + fixed: u5 = 0b10000, + immlo: u2, + op: u1, + }, + load_store_register: packed struct { + rt: u5, + rn: u5, + offset: u12, + opc: u2, + op1: u2, + v: u1, + fixed: u3 = 0b111, + size: u2, + }, + load_store_register_pair: packed struct { + rt1: u5, + rn: u5, + rt2: u5, + imm7: u7, + load: u1, + encoding: u2, + fixed: u5 = 0b101_0_0, + opc: u2, + }, + load_literal: packed struct { + rt: u5, + imm19: u19, + fixed: u6 = 0b011_0_00, + opc: u2, + }, + exception_generation: packed struct { + ll: u2, + op2: u3, + imm16: u16, + opc: u3, + fixed: u8 = 0b1101_0100, + }, + unconditional_branch_register: packed struct { + op4: u5, + rn: u5, + op3: u6, + op2: u5, + opc: u4, + fixed: u7 = 0b1101_011, + }, + unconditional_branch_immediate: packed struct { + imm26: u26, + fixed: u5 = 0b00101, + op: u1, + }, + no_operation: packed struct { + fixed: u32 = 0b1101010100_0_00_011_0010_0000_000_11111, + }, + logical_shifted_register: packed struct { + rd: u5, + rn: u5, + imm6: u6, + rm: u5, + n: u1, + shift: u2, + fixed: u5 = 0b01010, + opc: u2, + sf: u1, + }, + add_subtract_immediate: packed struct { + rd: u5, + rn: u5, + imm12: u12, + sh: u1, + fixed: u6 = 0b100010, + s: u1, + op: u1, + sf: u1, + }, + logical_immediate: packed struct { + rd: u5, + rn: u5, + imms: u6, + immr: u6, + n: u1, + fixed: u6 = 0b100100, + opc: u2, + sf: u1, + }, + bitfield: packed struct { + rd: u5, + rn: u5, + imms: u6, + immr: u6, + n: u1, + fixed: u6 = 0b100110, + opc: u2, + sf: u1, + }, + add_subtract_shifted_register: packed struct { + rd: u5, + rn: u5, + imm6: u6, + rm: u5, + fixed_1: u1 = 0b0, + shift: u2, + fixed_2: u5 = 0b01011, + s: u1, + op: u1, + sf: u1, + }, + add_subtract_extended_register: packed struct { + rd: u5, + rn: u5, + imm3: u3, + option: u3, + rm: u5, + fixed: u8 = 0b01011_00_1, + s: u1, + op: u1, + sf: u1, + }, + conditional_branch: struct { + cond: u4, + o0: u1, + imm19: u19, + o1: u1, + fixed: u7 = 0b0101010, + }, + compare_and_branch: struct { + rt: u5, + imm19: u19, + op: u1, + fixed: u6 = 0b011010, + sf: u1, + }, + conditional_select: struct { + rd: u5, + rn: u5, + op2: u2, + cond: u4, + rm: u5, + fixed: u8 = 0b11010100, + s: u1, + op: u1, + sf: u1, + }, + data_processing_3_source: packed struct { + rd: u5, + rn: u5, + ra: u5, + o0: u1, + rm: u5, + op31: u3, + fixed: u5 = 0b11011, + op54: u2, + sf: u1, + }, + data_processing_2_source: packed struct { + rd: u5, + rn: u5, + opcode: u6, + rm: u5, + fixed_1: u8 = 0b11010110, + s: u1, + fixed_2: u1 = 0b0, + sf: u1, + }, + + pub const Condition = enum(u4) { + /// Integer: Equal + /// Floating point: Equal + eq, + /// Integer: Not equal + /// Floating point: Not equal or unordered + ne, + /// Integer: Carry set + /// Floating point: Greater than, equal, or unordered + cs, + /// Integer: Carry clear + /// Floating point: Less than + cc, + /// Integer: Minus, negative + /// Floating point: Less than + mi, + /// Integer: Plus, positive or zero + /// Floating point: Greater than, equal, or unordered + pl, + /// Integer: Overflow + /// Floating point: Unordered + vs, + /// Integer: No overflow + /// Floating point: Ordered + vc, + /// Integer: Unsigned higher + /// Floating point: Greater than, or unordered + hi, + /// Integer: Unsigned lower or same + /// Floating point: Less than or equal + ls, + /// Integer: Signed greater than or equal + /// Floating point: Greater than or equal + ge, + /// Integer: Signed less than + /// Floating point: Less than, or unordered + lt, + /// Integer: Signed greater than + /// Floating point: Greater than + gt, + /// Integer: Signed less than or equal + /// Floating point: Less than, equal, or unordered + le, + /// Integer: Always + /// Floating point: Always + al, + /// Integer: Always + /// Floating point: Always + nv, + + /// Converts a std.math.CompareOperator into a condition flag, + /// i.e. returns the condition that is true iff the result of the + /// comparison is true. Assumes signed comparison + pub fn fromCompareOperatorSigned(op: std.math.CompareOperator) Condition { + return switch (op) { + .gte => .ge, + .gt => .gt, + .neq => .ne, + .lt => .lt, + .lte => .le, + .eq => .eq, + }; + } + + /// Converts a std.math.CompareOperator into a condition flag, + /// i.e. returns the condition that is true iff the result of the + /// comparison is true. Assumes unsigned comparison + pub fn fromCompareOperatorUnsigned(op: std.math.CompareOperator) Condition { + return switch (op) { + .gte => .cs, + .gt => .hi, + .neq => .ne, + .lt => .cc, + .lte => .ls, + .eq => .eq, + }; + } + + /// Returns the condition which is true iff the given condition is + /// false (if such a condition exists) + pub fn negate(cond: Condition) Condition { + return switch (cond) { + .eq => .ne, + .ne => .eq, + .cs => .cc, + .cc => .cs, + .mi => .pl, + .pl => .mi, + .vs => .vc, + .vc => .vs, + .hi => .ls, + .ls => .hi, + .ge => .lt, + .lt => .ge, + .gt => .le, + .le => .gt, + .al => unreachable, + .nv => unreachable, + }; + } + }; + + pub fn toU32(self: Instruction) u32 { + return switch (self) { + .move_wide_immediate => |v| @bitCast(v), + .pc_relative_address => |v| @bitCast(v), + .load_store_register => |v| @bitCast(v), + .load_store_register_pair => |v| @bitCast(v), + .load_literal => |v| @bitCast(v), + .exception_generation => |v| @bitCast(v), + .unconditional_branch_register => |v| @bitCast(v), + .unconditional_branch_immediate => |v| @bitCast(v), + .no_operation => |v| @bitCast(v), + .logical_shifted_register => |v| @bitCast(v), + .add_subtract_immediate => |v| @bitCast(v), + .logical_immediate => |v| @bitCast(v), + .bitfield => |v| @bitCast(v), + .add_subtract_shifted_register => |v| @bitCast(v), + .add_subtract_extended_register => |v| @bitCast(v), + // TODO once packed structs work, this can be refactored + .conditional_branch => |v| @as(u32, v.cond) | (@as(u32, v.o0) << 4) | (@as(u32, v.imm19) << 5) | (@as(u32, v.o1) << 24) | (@as(u32, v.fixed) << 25), + .compare_and_branch => |v| @as(u32, v.rt) | (@as(u32, v.imm19) << 5) | (@as(u32, v.op) << 24) | (@as(u32, v.fixed) << 25) | (@as(u32, v.sf) << 31), + .conditional_select => |v| @as(u32, v.rd) | @as(u32, v.rn) << 5 | @as(u32, v.op2) << 10 | @as(u32, v.cond) << 12 | @as(u32, v.rm) << 16 | @as(u32, v.fixed) << 21 | @as(u32, v.s) << 29 | @as(u32, v.op) << 30 | @as(u32, v.sf) << 31, + .data_processing_3_source => |v| @bitCast(v), + .data_processing_2_source => |v| @bitCast(v), + }; + } + + fn moveWideImmediate( + opc: u2, + rd: Register, + imm16: u16, + shift: u6, + ) Instruction { + assert(shift % 16 == 0); + assert(!(rd.size() == 32 and shift > 16)); + assert(!(rd.size() == 64 and shift > 48)); + + return Instruction{ + .move_wide_immediate = .{ + .rd = rd.enc(), + .imm16 = imm16, + .hw = @intCast(shift / 16), + .opc = opc, + .sf = switch (rd.size()) { + 32 => 0, + 64 => 1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + fn pcRelativeAddress(rd: Register, imm21: i21, op: u1) Instruction { + assert(rd.size() == 64); + const imm21_u :u32= @bitCast( imm21); + return Instruction{ + .pc_relative_address = .{ + .rd = rd.enc(), + .immlo = @truncate(imm21_u), + .immhi = @truncate(imm21_u >> 2), + .op = op, + }, + }; + } + + pub const LoadStoreOffsetImmediate = union(enum) { + post_index: i9, + pre_index: i9, + unsigned: u12, + }; + + pub const LoadStoreOffsetRegister = struct { + rm: u5, + shift: union(enum) { + uxtw: u2, + lsl: u2, + sxtw: u2, + sxtx: u2, + }, + }; + + /// Represents the offset operand of a load or store instruction. + /// Data can be loaded from memory with either an immediate offset + /// or an offset that is stored in some register. + pub const LoadStoreOffset = union(enum) { + immediate: LoadStoreOffsetImmediate, + register: LoadStoreOffsetRegister, + + pub const none = LoadStoreOffset{ + .immediate = .{ .unsigned = 0 }, + }; + + pub fn toU12(self: LoadStoreOffset) u12 { + return switch (self) { + .immediate => |imm_type| switch (imm_type) { + .post_index => |v| (@as(u12,@intCast(@as(u9, @bitCast(v)))) << 2) + 1, + .pre_index => |v| (@as(u12,@intCast(@as(u9, @bitCast(v)))) << 2) + 3, + .unsigned => |v| v, + }, + .register => |r| switch (r.shift) { + .uxtw => |v| (@as(u12, @intCast(r.rm)) << 6) + (@as(u12,@intCast(v)) << 2) + 16 + 2050, + .lsl => |v| (@as(u12, @intCast(r.rm)) << 6) + (@as(u12,@intCast(v)) << 2) + 24 + 2050, + .sxtw => |v| (@as(u12, @intCast(r.rm)) << 6) + (@as(u12,@intCast(v)) << 2) + 48 + 2050, + .sxtx => |v| (@as(u12, @intCast(r.rm)) << 6) + (@as(u12,@intCast(v)) << 2) + 56 + 2050, + }, + }; + } + + pub fn imm(offset: u12) LoadStoreOffset { + return .{ + .immediate = .{ .unsigned = offset }, + }; + } + + pub fn imm_post_index(offset: i9) LoadStoreOffset { + return .{ + .immediate = .{ .post_index = offset }, + }; + } + + pub fn imm_pre_index(offset: i9) LoadStoreOffset { + return .{ + .immediate = .{ .pre_index = offset }, + }; + } + + pub fn reg(rm: Register) LoadStoreOffset { + return .{ + .register = .{ + .rm = rm.enc(), + .shift = .{ + .lsl = 0, + }, + }, + }; + } + + pub fn reg_uxtw(rm: Register, shift: u2) LoadStoreOffset { + assert(rm.size() == 32 and (shift == 0 or shift == 2)); + return .{ + .register = .{ + .rm = rm.enc(), + .shift = .{ + .uxtw = shift, + }, + }, + }; + } + + pub fn reg_lsl(rm: Register, shift: u2) LoadStoreOffset { + assert(rm.size() == 64 and (shift == 0 or shift == 3)); + return .{ + .register = .{ + .rm = rm.enc(), + .shift = .{ + .lsl = shift, + }, + }, + }; + } + + pub fn reg_sxtw(rm: Register, shift: u2) LoadStoreOffset { + assert(rm.size() == 32 and (shift == 0 or shift == 2)); + return .{ + .register = .{ + .rm = rm.enc(), + .shift = .{ + .sxtw = shift, + }, + }, + }; + } + + pub fn reg_sxtx(rm: Register, shift: u2) LoadStoreOffset { + assert(rm.size() == 64 and (shift == 0 or shift == 3)); + return .{ + .register = .{ + .rm = rm.enc(), + .shift = .{ + .sxtx = shift, + }, + }, + }; + } + }; + + /// Which kind of load/store to perform + const LoadStoreVariant = enum { + /// 32 bits or 64 bits + str, + /// 8 bits, zero-extended + strb, + /// 16 bits, zero-extended + strh, + /// 32 bits or 64 bits + ldr, + /// 8 bits, zero-extended + ldrb, + /// 16 bits, zero-extended + ldrh, + /// 8 bits, sign extended + ldrsb, + /// 16 bits, sign extended + ldrsh, + /// 32 bits, sign extended + ldrsw, + }; + + fn loadStoreRegister( + rt: Register, + rn: Register, + offset: LoadStoreOffset, + variant: LoadStoreVariant, + ) Instruction { + assert(rn.size() == 64); + assert(rn.id() != Register.xzr.id()); + + const off = offset.toU12(); + + const op1: u2 = blk: { + switch (offset) { + .immediate => |imm| switch (imm) { + .unsigned => break :blk 0b01, + else => {}, + }, + else => {}, + } + break :blk 0b00; + }; + + const opc: u2 = blk: { + switch (variant) { + .ldr, .ldrh, .ldrb => break :blk 0b01, + .str, .strh, .strb => break :blk 0b00, + .ldrsb, + .ldrsh, + => switch (rt.size()) { + 32 => break :blk 0b11, + 64 => break :blk 0b10, + else => unreachable, // unexpected register size + }, + .ldrsw => break :blk 0b10, + } + }; + + const size: u2 = blk: { + switch (variant) { + .ldr, .str => switch (rt.size()) { + 32 => break :blk 0b10, + 64 => break :blk 0b11, + else => unreachable, // unexpected register size + }, + .ldrsw => break :blk 0b10, + .ldrh, .ldrsh, .strh => break :blk 0b01, + .ldrb, .ldrsb, .strb => break :blk 0b00, + } + }; + + return Instruction{ + .load_store_register = .{ + .rt = rt.enc(), + .rn = rn.enc(), + .offset = off, + .opc = opc, + .op1 = op1, + .v = 0, + .size = size, + }, + }; + } + + fn loadStoreRegisterPair( + rt1: Register, + rt2: Register, + rn: Register, + offset: i9, + encoding: u2, + load: bool, + ) Instruction { + assert(rn.size() == 64); + assert(rn.id() != Register.xzr.id()); + + switch (rt1.size()) { + 32 => { + assert(-256 <= offset and offset <= 252); + const imm7 :u7 = @truncate(@as(u9, @bitCast(offset >> 2))); + return Instruction{ + .load_store_register_pair = .{ + .rt1 = rt1.enc(), + .rn = rn.enc(), + .rt2 = rt2.enc(), + .imm7 = imm7, + .load = @intFromBool(load), + .encoding = encoding, + .opc = 0b00, + }, + }; + }, + 64 => { + assert(-512 <= offset and offset <= 504); + const imm7 : u7 = @truncate(@as( u9, @bitCast(offset >> 3))); + return Instruction{ + .load_store_register_pair = .{ + .rt1 = rt1.enc(), + .rn = rn.enc(), + .rt2 = rt2.enc(), + .imm7 = imm7, + .load = @intFromBool(load), + .encoding = encoding, + .opc = 0b10, + }, + }; + }, + else => unreachable, // unexpected register size + } + } + + fn loadLiteral(rt: Register, imm19: u19) Instruction { + return Instruction{ + .load_literal = .{ + .rt = rt.enc(), + .imm19 = imm19, + .opc = switch (rt.size()) { + 32 => 0b00, + 64 => 0b01, + else => unreachable, // unexpected register size + }, + }, + }; + } + + fn exceptionGeneration( + opc: u3, + op2: u3, + ll: u2, + imm16: u16, + ) Instruction { + return Instruction{ + .exception_generation = .{ + .ll = ll, + .op2 = op2, + .imm16 = imm16, + .opc = opc, + }, + }; + } + + fn unconditionalBranchRegister( + opc: u4, + op2: u5, + op3: u6, + rn: Register, + op4: u5, + ) Instruction { + assert(rn.size() == 64); + + return Instruction{ + .unconditional_branch_register = .{ + .op4 = op4, + .rn = rn.enc(), + .op3 = op3, + .op2 = op2, + .opc = opc, + }, + }; + } + + fn unconditionalBranchImmediate( + op: u1, + offset: i28, + ) Instruction { + return Instruction{ + .unconditional_branch_immediate = .{ + .imm26 = @bitCast(@as(i26, @intCast(offset >> 2))), + .op = op, + }, + }; + } + + pub const LogicalShiftedRegisterShift = enum(u2) { lsl, lsr, asr, ror }; + + fn logicalShiftedRegister( + opc: u2, + n: u1, + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + assert(rd.size() == rn.size()); + assert(rd.size() == rm.size()); + if (rd.size() == 32) assert(amount < 32); + + return Instruction{ + .logical_shifted_register = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .imm6 = amount, + .rm = rm.enc(), + .n = n, + .shift = @intFromEnum(shift), + .opc = opc, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, + }, + }, + }; + } + + fn addSubtractImmediate( + op: u1, + s: u1, + rd: Register, + rn: Register, + imm12: u12, + shift: bool, + ) Instruction { + assert(rd.size() == rn.size()); + assert(rn.id() != Register.xzr.id()); + + return Instruction{ + .add_subtract_immediate = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .imm12 = imm12, + .sh = @intFromBool(shift), + .s = s, + .op = op, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + fn logicalImmediate( + opc: u2, + rd: Register, + rn: Register, + imms: u6, + immr: u6, + n: u1, + ) Instruction { + assert(rd.size() == rn.size()); + assert(!(rd.size() == 32 and n != 0)); + + return Instruction{ + .logical_immediate = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .imms = imms, + .immr = immr, + .n = n, + .opc = opc, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + fn bitfield( + opc: u2, + n: u1, + rd: Register, + rn: Register, + immr: u6, + imms: u6, + ) Instruction { + assert(rd.size() == rn.size()); + assert(!(rd.size() == 64 and n != 1)); + assert(!(rd.size() == 32 and (n != 0 or immr >> 5 != 0 or immr >> 5 != 0))); + + return Instruction{ + .bitfield = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .imms = imms, + .immr = immr, + .n = n, + .opc = opc, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + pub const AddSubtractShiftedRegisterShift = enum(u2) { lsl, lsr, asr, _ }; + + fn addSubtractShiftedRegister( + op: u1, + s: u1, + shift: AddSubtractShiftedRegisterShift, + rd: Register, + rn: Register, + rm: Register, + imm6: u6, + ) Instruction { + assert(rd.size() == rn.size()); + assert(rd.size() == rm.size()); + + return Instruction{ + .add_subtract_shifted_register = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .imm6 = imm6, + .rm = rm.enc(), + .shift = @intFromEnum(shift), + .s = s, + .op = op, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + pub const AddSubtractExtendedRegisterOption = enum(u3) { + uxtb, + uxth, + uxtw, + uxtx, // serves also as lsl + sxtb, + sxth, + sxtw, + sxtx, + }; + + fn addSubtractExtendedRegister( + op: u1, + s: u1, + rd: Register, + rn: Register, + rm: Register, + extend: AddSubtractExtendedRegisterOption, + imm3: u3, + ) Instruction { + return Instruction{ + .add_subtract_extended_register = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .imm3 = imm3, + .option = @intFromEnum(extend), + .rm = rm.enc(), + .s = s, + .op = op, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + fn conditionalBranch( + o0: u1, + o1: u1, + cond: Condition, + offset: i21, + ) Instruction { + assert(offset & 0b11 == 0b00); + + return Instruction{ + .conditional_branch = .{ + .cond = @intFromEnum(cond), + .o0 = o0, + .imm19 = @bitCast(@as(i19, @intCast(offset >> 2))), + .o1 = o1, + }, + }; + } + + fn compareAndBranch( + op: u1, + rt: Register, + offset: i21, + ) Instruction { + assert(offset & 0b11 == 0b00); + + return Instruction{ + .compare_and_branch = .{ + .rt = rt.enc(), + .imm19 = @bitCast(@as( i19, @intCast(offset >> 2))), + .op = op, + .sf = switch (rt.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + fn conditionalSelect( + op2: u2, + op: u1, + s: u1, + rd: Register, + rn: Register, + rm: Register, + cond: Condition, + ) Instruction { + assert(rd.size() == rn.size()); + assert(rd.size() == rm.size()); + + return Instruction{ + .conditional_select = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .op2 = op2, + .cond = @intFromEnum(cond), + .rm = rm.enc(), + .s = s, + .op = op, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + fn dataProcessing3Source( + op54: u2, + op31: u3, + o0: u1, + rd: Register, + rn: Register, + rm: Register, + ra: Register, + ) Instruction { + return Instruction{ + .data_processing_3_source = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .ra = ra.enc(), + .o0 = o0, + .rm = rm.enc(), + .op31 = op31, + .op54 = op54, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + fn dataProcessing2Source( + s: u1, + opcode: u6, + rd: Register, + rn: Register, + rm: Register, + ) Instruction { + assert(rd.size() == rn.size()); + assert(rd.size() == rm.size()); + + return Instruction{ + .data_processing_2_source = .{ + .rd = rd.enc(), + .rn = rn.enc(), + .opcode = opcode, + .rm = rm.enc(), + .s = s, + .sf = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }, + }, + }; + } + + // Helper functions for assembly syntax functions + + // Move wide (immediate) + + pub fn movn(rd: Register, imm16: u16, shift: u6) Instruction { + return moveWideImmediate(0b00, rd, imm16, shift); + } + + pub fn movz(rd: Register, imm16: u16, shift: u6) Instruction { + return moveWideImmediate(0b10, rd, imm16, shift); + } + + pub fn movk(rd: Register, imm16: u16, shift: u6) Instruction { + return moveWideImmediate(0b11, rd, imm16, shift); + } + + // PC relative address + + pub fn adr(rd: Register, imm21: i21) Instruction { + return pcRelativeAddress(rd, imm21, 0b0); + } + + pub fn adrp(rd: Register, imm21: i21) Instruction { + return pcRelativeAddress(rd, imm21, 0b1); + } + + // Load or store register + + pub fn ldrLiteral(rt: Register, literal: u19) Instruction { + return loadLiteral(rt, literal); + } + + pub fn ldr(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .ldr); + } + + pub fn ldrh(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .ldrh); + } + + pub fn ldrb(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .ldrb); + } + + pub fn ldrsb(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .ldrsb); + } + + pub fn ldrsh(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .ldrsh); + } + + pub fn ldrsw(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .ldrsw); + } + + pub fn str(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .str); + } + + pub fn strh(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .strh); + } + + pub fn strb(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction { + return loadStoreRegister(rt, rn, offset, .strb); + } + + // Load or store pair of registers + + pub const LoadStorePairOffset = struct { + encoding: enum(u2) { + post_index = 0b01, + signed = 0b10, + pre_index = 0b11, + }, + offset: i9, + + pub fn none() LoadStorePairOffset { + return .{ .encoding = .signed, .offset = 0 }; + } + + pub fn post_index(imm: i9) LoadStorePairOffset { + return .{ .encoding = .post_index, .offset = imm }; + } + + pub fn pre_index(imm: i9) LoadStorePairOffset { + return .{ .encoding = .pre_index, .offset = imm }; + } + + pub fn signed(imm: i9) LoadStorePairOffset { + return .{ .encoding = .signed, .offset = imm }; + } + }; + + pub fn ldp(rt1: Register, rt2: Register, rn: Register, offset: LoadStorePairOffset) Instruction { + return loadStoreRegisterPair(rt1, rt2, rn, offset.offset, @intFromEnum(offset.encoding), true); + } + + pub fn ldnp(rt1: Register, rt2: Register, rn: Register, offset: i9) Instruction { + return loadStoreRegisterPair(rt1, rt2, rn, offset, 0, true); + } + + pub fn stp(rt1: Register, rt2: Register, rn: Register, offset: LoadStorePairOffset) Instruction { + return loadStoreRegisterPair(rt1, rt2, rn, offset.offset, @intFromEnum(offset.encoding), false); + } + + pub fn stnp(rt1: Register, rt2: Register, rn: Register, offset: i9) Instruction { + return loadStoreRegisterPair(rt1, rt2, rn, offset, 0, false); + } + + // Exception generation + + pub fn svc(imm16: u16) Instruction { + return exceptionGeneration(0b000, 0b000, 0b01, imm16); + } + + pub fn hvc(imm16: u16) Instruction { + return exceptionGeneration(0b000, 0b000, 0b10, imm16); + } + + pub fn smc(imm16: u16) Instruction { + return exceptionGeneration(0b000, 0b000, 0b11, imm16); + } + + pub fn brk(imm16: u16) Instruction { + return exceptionGeneration(0b001, 0b000, 0b00, imm16); + } + + pub fn hlt(imm16: u16) Instruction { + return exceptionGeneration(0b010, 0b000, 0b00, imm16); + } + + // Unconditional branch (register) + + pub fn br(rn: Register) Instruction { + return unconditionalBranchRegister(0b0000, 0b11111, 0b000000, rn, 0b00000); + } + + pub fn blr(rn: Register) Instruction { + return unconditionalBranchRegister(0b0001, 0b11111, 0b000000, rn, 0b00000); + } + + pub fn ret(rn: ?Register) Instruction { + return unconditionalBranchRegister(0b0010, 0b11111, 0b000000, rn orelse .x30, 0b00000); + } + + // Unconditional branch (immediate) + + pub fn b(offset: i28) Instruction { + return unconditionalBranchImmediate(0, offset); + } + + pub fn bl(offset: i28) Instruction { + return unconditionalBranchImmediate(1, offset); + } + + // Nop + + pub fn nop() Instruction { + return Instruction{ .no_operation = .{} }; + } + + // Logical (shifted register) + + pub fn andShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + return logicalShiftedRegister(0b00, 0b0, rd, rn, rm, shift, amount); + } + + pub fn bicShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + return logicalShiftedRegister(0b00, 0b1, rd, rn, rm, shift, amount); + } + + pub fn orrShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + return logicalShiftedRegister(0b01, 0b0, rd, rn, rm, shift, amount); + } + + pub fn ornShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + return logicalShiftedRegister(0b01, 0b1, rd, rn, rm, shift, amount); + } + + pub fn eorShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + return logicalShiftedRegister(0b10, 0b0, rd, rn, rm, shift, amount); + } + + pub fn eonShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + return logicalShiftedRegister(0b10, 0b1, rd, rn, rm, shift, amount); + } + + pub fn andsShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + return logicalShiftedRegister(0b11, 0b0, rd, rn, rm, shift, amount); + } + + pub fn bicsShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: LogicalShiftedRegisterShift, + amount: u6, + ) Instruction { + return logicalShiftedRegister(0b11, 0b1, rd, rn, rm, shift, amount); + } + + // Add/subtract (immediate) + + pub fn add(rd: Register, rn: Register, imm: u12, shift: bool) Instruction { + return addSubtractImmediate(0b0, 0b0, rd, rn, imm, shift); + } + + pub fn adds(rd: Register, rn: Register, imm: u12, shift: bool) Instruction { + return addSubtractImmediate(0b0, 0b1, rd, rn, imm, shift); + } + + pub fn sub(rd: Register, rn: Register, imm: u12, shift: bool) Instruction { + return addSubtractImmediate(0b1, 0b0, rd, rn, imm, shift); + } + + pub fn subs(rd: Register, rn: Register, imm: u12, shift: bool) Instruction { + return addSubtractImmediate(0b1, 0b1, rd, rn, imm, shift); + } + + // Logical (immediate) + + pub fn andImmediate(rd: Register, rn: Register, imms: u6, immr: u6, n: u1) Instruction { + return logicalImmediate(0b00, rd, rn, imms, immr, n); + } + + pub fn orrImmediate(rd: Register, rn: Register, imms: u6, immr: u6, n: u1) Instruction { + return logicalImmediate(0b01, rd, rn, imms, immr, n); + } + + pub fn eorImmediate(rd: Register, rn: Register, imms: u6, immr: u6, n: u1) Instruction { + return logicalImmediate(0b10, rd, rn, imms, immr, n); + } + + pub fn andsImmediate(rd: Register, rn: Register, imms: u6, immr: u6, n: u1) Instruction { + return logicalImmediate(0b11, rd, rn, imms, immr, n); + } + + // Bitfield + + pub fn sbfm(rd: Register, rn: Register, immr: u6, imms: u6) Instruction { + const n: u1 = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }; + return bitfield(0b00, n, rd, rn, immr, imms); + } + + pub fn bfm(rd: Register, rn: Register, immr: u6, imms: u6) Instruction { + const n: u1 = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }; + return bitfield(0b01, n, rd, rn, immr, imms); + } + + pub fn ubfm(rd: Register, rn: Register, immr: u6, imms: u6) Instruction { + const n: u1 = switch (rd.size()) { + 32 => 0b0, + 64 => 0b1, + else => unreachable, // unexpected register size + }; + return bitfield(0b10, n, rd, rn, immr, imms); + } + + pub fn asrImmediate(rd: Register, rn: Register, shift: u6) Instruction { + const imms :u6 = @intCast(rd.size() - 1); + return sbfm(rd, rn, shift, imms); + } + + pub fn sbfx(rd: Register, rn: Register, lsb: u6, width: u7) Instruction { + return sbfm(rd, rn, lsb, @intCast(lsb + width - 1)); + } + + pub fn sxtb(rd: Register, rn: Register) Instruction { + return sbfm(rd, rn, 0, 7); + } + + pub fn sxth(rd: Register, rn: Register) Instruction { + return sbfm(rd, rn, 0, 15); + } + + pub fn sxtw(rd: Register, rn: Register) Instruction { + assert(rd.size() == 64); + return sbfm(rd, rn, 0, 31); + } + + pub fn lslImmediate(rd: Register, rn: Register, shift: u6) Instruction { + const size :u6 = @intCast(rd.size() - 1); + return ubfm(rd, rn, size - shift + 1, size - shift); + } + + pub fn lsrImmediate(rd: Register, rn: Register, shift: u6) Instruction { + const imms :u6 = @intCast(rd.size() - 1); + return ubfm(rd, rn, shift, imms); + } + + pub fn ubfx(rd: Register, rn: Register, lsb: u6, width: u7) Instruction { + return ubfm(rd, rn, lsb, @intCast(lsb + width - 1)); + } + + pub fn uxtb(rd: Register, rn: Register) Instruction { + return ubfm(rd, rn, 0, 7); + } + + pub fn uxth(rd: Register, rn: Register) Instruction { + return ubfm(rd, rn, 0, 15); + } + + // Add/subtract (shifted register) + + pub fn addShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: AddSubtractShiftedRegisterShift, + imm6: u6, + ) Instruction { + return addSubtractShiftedRegister(0b0, 0b0, shift, rd, rn, rm, imm6); + } + + pub fn addsShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: AddSubtractShiftedRegisterShift, + imm6: u6, + ) Instruction { + return addSubtractShiftedRegister(0b0, 0b1, shift, rd, rn, rm, imm6); + } + + pub fn subShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: AddSubtractShiftedRegisterShift, + imm6: u6, + ) Instruction { + return addSubtractShiftedRegister(0b1, 0b0, shift, rd, rn, rm, imm6); + } + + pub fn subsShiftedRegister( + rd: Register, + rn: Register, + rm: Register, + shift: AddSubtractShiftedRegisterShift, + imm6: u6, + ) Instruction { + return addSubtractShiftedRegister(0b1, 0b1, shift, rd, rn, rm, imm6); + } + + // Add/subtract (extended register) + + pub fn addExtendedRegister( + rd: Register, + rn: Register, + rm: Register, + extend: AddSubtractExtendedRegisterOption, + imm3: u3, + ) Instruction { + return addSubtractExtendedRegister(0b0, 0b0, rd, rn, rm, extend, imm3); + } + + pub fn addsExtendedRegister( + rd: Register, + rn: Register, + rm: Register, + extend: AddSubtractExtendedRegisterOption, + imm3: u3, + ) Instruction { + return addSubtractExtendedRegister(0b0, 0b1, rd, rn, rm, extend, imm3); + } + + pub fn subExtendedRegister( + rd: Register, + rn: Register, + rm: Register, + extend: AddSubtractExtendedRegisterOption, + imm3: u3, + ) Instruction { + return addSubtractExtendedRegister(0b1, 0b0, rd, rn, rm, extend, imm3); + } + + pub fn subsExtendedRegister( + rd: Register, + rn: Register, + rm: Register, + extend: AddSubtractExtendedRegisterOption, + imm3: u3, + ) Instruction { + return addSubtractExtendedRegister(0b1, 0b1, rd, rn, rm, extend, imm3); + } + + // Conditional branch + + pub fn bCond(cond: Condition, offset: i21) Instruction { + return conditionalBranch(0b0, 0b0, cond, offset); + } + + // Compare and branch + + pub fn cbz(rt: Register, offset: i21) Instruction { + return compareAndBranch(0b0, rt, offset); + } + + pub fn cbnz(rt: Register, offset: i21) Instruction { + return compareAndBranch(0b1, rt, offset); + } + + // Conditional select + + pub fn csel(rd: Register, rn: Register, rm: Register, cond: Condition) Instruction { + return conditionalSelect(0b00, 0b0, 0b0, rd, rn, rm, cond); + } + + pub fn csinc(rd: Register, rn: Register, rm: Register, cond: Condition) Instruction { + return conditionalSelect(0b01, 0b0, 0b0, rd, rn, rm, cond); + } + + pub fn csinv(rd: Register, rn: Register, rm: Register, cond: Condition) Instruction { + return conditionalSelect(0b00, 0b1, 0b0, rd, rn, rm, cond); + } + + pub fn csneg(rd: Register, rn: Register, rm: Register, cond: Condition) Instruction { + return conditionalSelect(0b01, 0b1, 0b0, rd, rn, rm, cond); + } + + // Data processing (3 source) + + pub fn madd(rd: Register, rn: Register, rm: Register, ra: Register) Instruction { + return dataProcessing3Source(0b00, 0b000, 0b0, rd, rn, rm, ra); + } + + pub fn smaddl(rd: Register, rn: Register, rm: Register, ra: Register) Instruction { + assert(rd.size() == 64 and rn.size() == 32 and rm.size() == 32 and ra.size() == 64); + return dataProcessing3Source(0b00, 0b001, 0b0, rd, rn, rm, ra); + } + + pub fn umaddl(rd: Register, rn: Register, rm: Register, ra: Register) Instruction { + assert(rd.size() == 64 and rn.size() == 32 and rm.size() == 32 and ra.size() == 64); + return dataProcessing3Source(0b00, 0b101, 0b0, rd, rn, rm, ra); + } + + pub fn msub(rd: Register, rn: Register, rm: Register, ra: Register) Instruction { + return dataProcessing3Source(0b00, 0b000, 0b1, rd, rn, rm, ra); + } + + pub fn mul(rd: Register, rn: Register, rm: Register) Instruction { + return madd(rd, rn, rm, .xzr); + } + + pub fn smull(rd: Register, rn: Register, rm: Register) Instruction { + return smaddl(rd, rn, rm, .xzr); + } + + pub fn smulh(rd: Register, rn: Register, rm: Register) Instruction { + assert(rd.size() == 64); + return dataProcessing3Source(0b00, 0b010, 0b0, rd, rn, rm, .xzr); + } + + pub fn umull(rd: Register, rn: Register, rm: Register) Instruction { + return umaddl(rd, rn, rm, .xzr); + } + + pub fn umulh(rd: Register, rn: Register, rm: Register) Instruction { + assert(rd.size() == 64); + return dataProcessing3Source(0b00, 0b110, 0b0, rd, rn, rm, .xzr); + } + + pub fn mneg(rd: Register, rn: Register, rm: Register) Instruction { + return msub(rd, rn, rm, .xzr); + } + + // Data processing (2 source) + + pub fn lslv(rd: Register, rn: Register, rm: Register) Instruction { + return dataProcessing2Source(0b0, 0b001000, rd, rn, rm); + } + + pub fn lsrv(rd: Register, rn: Register, rm: Register) Instruction { + return dataProcessing2Source(0b0, 0b001001, rd, rn, rm); + } + + pub fn asrv(rd: Register, rn: Register, rm: Register) Instruction { + return dataProcessing2Source(0b0, 0b001010, rd, rn, rm); + } + + pub const asrRegister = asrv; + pub const lslRegister = lslv; + pub const lsrRegister = lsrv; +}; + +test { + testing.refAllDecls(@This()); +} + +test "serialize instructions" { + const Testcase = struct { + inst: Instruction, + expected: u32, + }; + + const testcases = [_]Testcase{ + .{ // orr x0, xzr, x1 + .inst = Instruction.orrShiftedRegister(.x0, .xzr, .x1, .lsl, 0), + .expected = 0b1_01_01010_00_0_00001_000000_11111_00000, + }, + .{ // orn x0, xzr, x1 + .inst = Instruction.ornShiftedRegister(.x0, .xzr, .x1, .lsl, 0), + .expected = 0b1_01_01010_00_1_00001_000000_11111_00000, + }, + .{ // movz x1, #4 + .inst = Instruction.movz(.x1, 4, 0), + .expected = 0b1_10_100101_00_0000000000000100_00001, + }, + .{ // movz x1, #4, lsl 16 + .inst = Instruction.movz(.x1, 4, 16), + .expected = 0b1_10_100101_01_0000000000000100_00001, + }, + .{ // movz x1, #4, lsl 32 + .inst = Instruction.movz(.x1, 4, 32), + .expected = 0b1_10_100101_10_0000000000000100_00001, + }, + .{ // movz x1, #4, lsl 48 + .inst = Instruction.movz(.x1, 4, 48), + .expected = 0b1_10_100101_11_0000000000000100_00001, + }, + .{ // movz w1, #4 + .inst = Instruction.movz(.w1, 4, 0), + .expected = 0b0_10_100101_00_0000000000000100_00001, + }, + .{ // movz w1, #4, lsl 16 + .inst = Instruction.movz(.w1, 4, 16), + .expected = 0b0_10_100101_01_0000000000000100_00001, + }, + .{ // svc #0 + .inst = Instruction.svc(0), + .expected = 0b1101_0100_000_0000000000000000_00001, + }, + .{ // svc #0x80 ; typical on Darwin + .inst = Instruction.svc(0x80), + .expected = 0b1101_0100_000_0000000010000000_00001, + }, + .{ // ret + .inst = Instruction.ret(null), + .expected = 0b1101_011_00_10_11111_0000_00_11110_00000, + }, + .{ // bl #0x10 + .inst = Instruction.bl(0x10), + .expected = 0b1_00101_00_0000_0000_0000_0000_0000_0100, + }, + .{ // ldr x2, [x1] + .inst = Instruction.ldr(.x2, .x1, Instruction.LoadStoreOffset.none), + .expected = 0b11_111_0_01_01_000000000000_00001_00010, + }, + .{ // ldr x2, [x1, #1]! + .inst = Instruction.ldr(.x2, .x1, Instruction.LoadStoreOffset.imm_pre_index(1)), + .expected = 0b11_111_0_00_01_0_000000001_11_00001_00010, + }, + .{ // ldr x2, [x1], #-1 + .inst = Instruction.ldr(.x2, .x1, Instruction.LoadStoreOffset.imm_post_index(-1)), + .expected = 0b11_111_0_00_01_0_111111111_01_00001_00010, + }, + .{ // ldr x2, [x1], (x3) + .inst = Instruction.ldr(.x2, .x1, Instruction.LoadStoreOffset.reg(.x3)), + .expected = 0b11_111_0_00_01_1_00011_011_0_10_00001_00010, + }, + .{ // ldr x2, label + .inst = Instruction.ldrLiteral(.x2, 0x1), + .expected = 0b01_011_0_00_0000000000000000001_00010, + }, + .{ // ldrh x7, [x4], #0xaa + .inst = Instruction.ldrh(.x7, .x4, Instruction.LoadStoreOffset.imm_post_index(0xaa)), + .expected = 0b01_111_0_00_01_0_010101010_01_00100_00111, + }, + .{ // ldrb x9, [x15, #0xff]! + .inst = Instruction.ldrb(.x9, .x15, Instruction.LoadStoreOffset.imm_pre_index(0xff)), + .expected = 0b00_111_0_00_01_0_011111111_11_01111_01001, + }, + .{ // str x2, [x1] + .inst = Instruction.str(.x2, .x1, Instruction.LoadStoreOffset.none), + .expected = 0b11_111_0_01_00_000000000000_00001_00010, + }, + .{ // str x2, [x1], (x3) + .inst = Instruction.str(.x2, .x1, Instruction.LoadStoreOffset.reg(.x3)), + .expected = 0b11_111_0_00_00_1_00011_011_0_10_00001_00010, + }, + .{ // strh w0, [x1] + .inst = Instruction.strh(.w0, .x1, Instruction.LoadStoreOffset.none), + .expected = 0b01_111_0_01_00_000000000000_00001_00000, + }, + .{ // strb w8, [x9] + .inst = Instruction.strb(.w8, .x9, Instruction.LoadStoreOffset.none), + .expected = 0b00_111_0_01_00_000000000000_01001_01000, + }, + .{ // adr x2, #0x8 + .inst = Instruction.adr(.x2, 0x8), + .expected = 0b0_00_10000_0000000000000000010_00010, + }, + .{ // adr x2, -#0x8 + .inst = Instruction.adr(.x2, -0x8), + .expected = 0b0_00_10000_1111111111111111110_00010, + }, + .{ // adrp x2, #0x8 + .inst = Instruction.adrp(.x2, 0x8), + .expected = 0b1_00_10000_0000000000000000010_00010, + }, + .{ // adrp x2, -#0x8 + .inst = Instruction.adrp(.x2, -0x8), + .expected = 0b1_00_10000_1111111111111111110_00010, + }, + .{ // stp x1, x2, [sp, #8] + .inst = Instruction.stp(.x1, .x2, .sp, Instruction.LoadStorePairOffset.signed(8)), + .expected = 0b10_101_0_010_0_0000001_00010_11111_00001, + }, + .{ // ldp x1, x2, [sp, #8] + .inst = Instruction.ldp(.x1, .x2, .sp, Instruction.LoadStorePairOffset.signed(8)), + .expected = 0b10_101_0_010_1_0000001_00010_11111_00001, + }, + .{ // stp x1, x2, [sp, #-16]! + .inst = Instruction.stp(.x1, .x2, .sp, Instruction.LoadStorePairOffset.pre_index(-16)), + .expected = 0b10_101_0_011_0_1111110_00010_11111_00001, + }, + .{ // ldp x1, x2, [sp], #16 + .inst = Instruction.ldp(.x1, .x2, .sp, Instruction.LoadStorePairOffset.post_index(16)), + .expected = 0b10_101_0_001_1_0000010_00010_11111_00001, + }, + .{ // and x0, x4, x2 + .inst = Instruction.andShiftedRegister(.x0, .x4, .x2, .lsl, 0), + .expected = 0b1_00_01010_00_0_00010_000000_00100_00000, + }, + .{ // and x0, x4, x2, lsl #0x8 + .inst = Instruction.andShiftedRegister(.x0, .x4, .x2, .lsl, 0x8), + .expected = 0b1_00_01010_00_0_00010_001000_00100_00000, + }, + .{ // add x0, x10, #10 + .inst = Instruction.add(.x0, .x10, 10, false), + .expected = 0b1_0_0_100010_0_0000_0000_1010_01010_00000, + }, + .{ // subs x0, x5, #11, lsl #12 + .inst = Instruction.subs(.x0, .x5, 11, true), + .expected = 0b1_1_1_100010_1_0000_0000_1011_00101_00000, + }, + .{ // b.hi #-4 + .inst = Instruction.bCond(.hi, -4), + .expected = 0b0101010_0_1111111111111111111_0_1000, + }, + .{ // cbz x10, #40 + .inst = Instruction.cbz(.x10, 40), + .expected = 0b1_011010_0_0000000000000001010_01010, + }, + .{ // add x0, x1, x2, lsl #5 + .inst = Instruction.addShiftedRegister(.x0, .x1, .x2, .lsl, 5), + .expected = 0b1_0_0_01011_00_0_00010_000101_00001_00000, + }, + .{ // csinc x1, x2, x4, eq + .inst = Instruction.csinc(.x1, .x2, .x4, .eq), + .expected = 0b1_0_0_11010100_00100_0000_0_1_00010_00001, + }, + .{ // mul x1, x4, x9 + .inst = Instruction.mul(.x1, .x4, .x9), + .expected = 0b1_00_11011_000_01001_0_11111_00100_00001, + }, + .{ // eor x3, x5, #1 + .inst = Instruction.eorImmediate(.x3, .x5, 0b000000, 0b000000, 0b1), + .expected = 0b1_10_100100_1_000000_000000_00101_00011, + }, + .{ // lslv x6, x9, x10 + .inst = Instruction.lslv(.x6, .x9, .x10), + .expected = 0b1_0_0_11010110_01010_0010_00_01001_00110, + }, + .{ // lsl x4, x2, #42 + .inst = Instruction.lslImmediate(.x4, .x2, 42), + .expected = 0b1_10_100110_1_010110_010101_00010_00100, + }, + .{ // lsl x4, x2, #63 + .inst = Instruction.lslImmediate(.x4, .x2, 63), + .expected = 0b1_10_100110_1_000001_000000_00010_00100, + }, + .{ // lsr x4, x2, #42 + .inst = Instruction.lsrImmediate(.x4, .x2, 42), + .expected = 0b1_10_100110_1_101010_111111_00010_00100, + }, + .{ // lsr x4, x2, #63 + .inst = Instruction.lsrImmediate(.x4, .x2, 63), + .expected = 0b1_10_100110_1_111111_111111_00010_00100, + }, + .{ // umull x0, w0, w1 + .inst = Instruction.umull(.x0, .w0, .w1), + .expected = 0b1_00_11011_1_01_00001_0_11111_00000_00000, + }, + .{ // smull x0, w0, w1 + .inst = Instruction.smull(.x0, .w0, .w1), + .expected = 0b1_00_11011_0_01_00001_0_11111_00000_00000, + }, + .{ // tst x0, #0xffffffff00000000 + .inst = Instruction.andsImmediate(.xzr, .x0, 0b011111, 0b100000, 0b1), + .expected = 0b1_11_100100_1_100000_011111_00000_11111, + }, + .{ // umulh x0, x1, x2 + .inst = Instruction.umulh(.x0, .x1, .x2), + .expected = 0b1_00_11011_1_10_00010_0_11111_00001_00000, + }, + .{ // smulh x0, x1, x2 + .inst = Instruction.smulh(.x0, .x1, .x2), + .expected = 0b1_00_11011_0_10_00010_0_11111_00001_00000, + }, + .{ // adds x0, x1, x2, sxtx + .inst = Instruction.addsExtendedRegister(.x0, .x1, .x2, .sxtx, 0), + .expected = 0b1_0_1_01011_00_1_00010_111_000_00001_00000, + }, + }; + + for (testcases) |case| { + const actual = case.inst.toU32(); + try testing.expectEqual(case.expected, actual); + } +} diff --git a/src/archive/archive/zld/main.zig b/src/archive/archive/zld/main.zig new file mode 100644 index 000000000000..d53bcf578ad7 --- /dev/null +++ b/src/archive/archive/zld/main.zig @@ -0,0 +1,83 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const build_options = @import("build_options"); +const mem = std.mem; + +const Allocator = mem.Allocator; +const Zld = @import("Zld.zig"); + +const gpa = std.heap.c_allocator; + +const usage = + \\zld is a generic linker driver. + \\Call + \\ ELF: ld.zld, ld + \\ MachO: ld64.zld, ld64 + \\ COFF: link-dl + \\ Wasm: wasm-zld +; + +var log_scopes: std.ArrayList([]const u8) = std.ArrayList([]const u8).init(gpa); + +pub const std_options = struct { + pub fn logFn( + comptime level: std.log.Level, + comptime scope: @TypeOf(.EnumLiteral), + comptime format: []const u8, + args: anytype, + ) void { + // Hide debug messages unless: + // * logging enabled with `-Dlog`. + // * the --debug-log arg for the scope has been provided + if (@intFromEnum(level) > @intFromEnum(std.options.log_level) or + @intFromEnum(level) > @intFromEnum(std.log.Level.info)) + { + if (!build_options.enable_logging) return; + + const scope_name = @tagName(scope); + for (log_scopes.items) |log_scope| { + if (mem.eql(u8, log_scope, scope_name)) break; + } else return; + } + + // We only recognize 4 log levels in this application. + const level_txt = switch (level) { + .err => "error", + .warn => "warning", + .info => "info", + .debug => "debug", + }; + const prefix1 = level_txt; + const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): "; + + // Print the message to stderr, silently ignoring any errors + std.debug.print(prefix1 ++ prefix2 ++ format ++ "\n", args); + } +}; + +pub fn main() !void { + const all_args = try std.process.argsAlloc(gpa); + defer std.process.argsFree(gpa, all_args); + + const cmd = std.fs.path.basename(all_args[0]); + const tag: Zld.Tag = blk: { + if (mem.eql(u8, cmd, "ld.zld") or mem.eql(u8, cmd, "ld")) { + break :blk .elf; + } else if (mem.eql(u8, cmd, "ld64.zld") or mem.eql(u8, cmd, "ld64")) { + break :blk .macho; + } else if (mem.eql(u8, cmd, "link-zld")) { + break :blk .coff; + } else if (mem.eql(u8, cmd, "wasm-zld")) { + break :blk .wasm; + } else { + std.io.getStdOut().writeAll(usage) catch {}; + std.process.exit(0); + } + }; + return Zld.main(tag, .{ + .gpa = gpa, + .cmd = cmd, + .args = all_args[1..], + .log_scopes = &log_scopes, + }); +} diff --git a/src/archive/archive/zld/strtab.zig b/src/archive/archive/zld/strtab.zig new file mode 100644 index 000000000000..e236d27a765f --- /dev/null +++ b/src/archive/archive/zld/strtab.zig @@ -0,0 +1,113 @@ +const std = @import("std"); +const mem = std.mem; + +const Allocator = mem.Allocator; +const StringIndexAdapter = std.hash_map.StringIndexAdapter; +const StringIndexContext = std.hash_map.StringIndexContext; + +pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type { + return struct { + const Self = @This(); + + const log = std.log.scoped(log_scope); + + buffer: std.ArrayListUnmanaged(u8) = .{}, + table: std.HashMapUnmanaged(u32, bool, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.buffer.deinit(gpa); + self.table.deinit(gpa); + } + + pub fn toOwnedSlice(self: *Self, gpa: Allocator) Allocator.Error![]const u8 { + const result = try self.buffer.toOwnedSlice(gpa); + self.table.clearRetainingCapacity(); + return result; + } + + pub const PrunedResult = struct { + buffer: []const u8, + idx_map: std.AutoHashMap(u32, u32), + }; + + pub fn toPrunedResult(self: *Self, gpa: Allocator) !PrunedResult { + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacity(self.buffer.items.len); + buffer.appendAssumeCapacity(0); + + var idx_map = std.AutoHashMap(u32, u32).init(gpa); + errdefer idx_map.deinit(); + try idx_map.ensureTotalCapacity(self.table.count()); + + var it = self.table.iterator(); + while (it.next()) |entry| { + const off = entry.key_ptr.*; + const save = entry.value_ptr.*; + if (!save) continue; + const new_off = @as(u32, @intCast(buffer.items.len)); + buffer.appendSliceAssumeCapacity(self.getAssumeExists(off)); + idx_map.putAssumeCapacityNoClobber(off, new_off); + } + + self.buffer.clearRetainingCapacity(); + self.table.clearRetainingCapacity(); + + return PrunedResult{ + .buffer = buffer.toOwnedSlice(), + .idx_map = idx_map, + }; + } + + pub fn insert(self: *Self, gpa: Allocator, string: []const u8) !u32 { + const gop = try self.table.getOrPutContextAdapted(gpa, @as([]const u8, string), StringIndexAdapter{ + .bytes = &self.buffer, + }, StringIndexContext{ + .bytes = &self.buffer, + }); + if (gop.found_existing) { + const off = gop.key_ptr.*; + gop.value_ptr.* = true; + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + try self.buffer.ensureUnusedCapacity(gpa, string.len + 1); + const new_off = @as(u32, @intCast(self.buffer.items.len)); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); + + self.buffer.appendSliceAssumeCapacity(string); + self.buffer.appendAssumeCapacity(0); + + gop.key_ptr.* = new_off; + gop.value_ptr.* = true; + + return new_off; + } + + pub fn delete(self: *Self, string: []const u8) void { + const value_ptr = self.table.getPtrAdapted(@as([]const u8, string), StringIndexAdapter{ + .bytes = &self.buffer, + }) orelse return; + value_ptr.* = false; + log.debug("marked '{s}' for deletion", .{string}); + } + + pub fn getOffset(self: *Self, string: []const u8) ?u32 { + return self.table.getKeyAdapted(string, StringIndexAdapter{ + .bytes = &self.buffer, + }); + } + + pub fn get(self: Self, off: u32) ?[]const u8 { + log.debug("getting string at 0x{x}", .{off}); + if (off >= self.buffer.items.len) return null; + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.buffer.items.ptr + off)), 0); + } + + pub fn getAssumeExists(self: Self, off: u32) []const u8 { + return self.get(off) orelse unreachable; + } + }; +} diff --git a/src/archive/archive/zld/tapi.zig b/src/archive/archive/zld/tapi.zig new file mode 100644 index 000000000000..c97332984fb3 --- /dev/null +++ b/src/archive/archive/zld/tapi.zig @@ -0,0 +1,168 @@ +const std = @import("std"); +const fs = std.fs; +const mem = std.mem; +const log = std.log.scoped(.tapi); + +const Allocator = mem.Allocator; +const Yaml = @import("tapi/yaml.zig").Yaml; + +const VersionField = union(enum) { + string: []const u8, + float: f64, + int: u64, +}; + +pub const TbdV3 = struct { + archs: []const []const u8, + uuids: []const []const u8, + platform: []const u8, + install_name: []const u8, + current_version: ?VersionField, + compatibility_version: ?VersionField, + objc_constraint: ?[]const u8, + parent_umbrella: ?[]const u8, + exports: ?[]const struct { + archs: []const []const u8, + allowable_clients: ?[]const []const u8, + re_exports: ?[]const []const u8, + symbols: ?[]const []const u8, + weak_symbols: ?[]const []const u8, + objc_classes: ?[]const []const u8, + objc_ivars: ?[]const []const u8, + objc_eh_types: ?[]const []const u8, + }, +}; + +pub const TbdV4 = struct { + tbd_version: u3, + targets: []const []const u8, + uuids: []const struct { + target: []const u8, + value: []const u8, + }, + install_name: []const u8, + current_version: ?VersionField, + compatibility_version: ?VersionField, + reexported_libraries: ?[]const struct { + targets: []const []const u8, + libraries: []const []const u8, + }, + parent_umbrella: ?[]const struct { + targets: []const []const u8, + umbrella: []const u8, + }, + exports: ?[]const struct { + targets: []const []const u8, + symbols: ?[]const []const u8, + weak_symbols: ?[]const []const u8, + objc_classes: ?[]const []const u8, + objc_ivars: ?[]const []const u8, + objc_eh_types: ?[]const []const u8, + }, + reexports: ?[]const struct { + targets: []const []const u8, + symbols: ?[]const []const u8, + weak_symbols: ?[]const []const u8, + objc_classes: ?[]const []const u8, + objc_ivars: ?[]const []const u8, + objc_eh_types: ?[]const []const u8, + }, + allowable_clients: ?[]const struct { + targets: []const []const u8, + clients: []const []const u8, + }, + objc_classes: ?[]const []const u8, + objc_ivars: ?[]const []const u8, + objc_eh_types: ?[]const []const u8, +}; + +pub const Tbd = union(enum) { + v3: TbdV3, + v4: TbdV4, + + pub fn currentVersion(self: Tbd) ?VersionField { + return switch (self) { + .v3 => |v3| v3.current_version, + .v4 => |v4| v4.current_version, + }; + } + + pub fn compatibilityVersion(self: Tbd) ?VersionField { + return switch (self) { + .v3 => |v3| v3.compatibility_version, + .v4 => |v4| v4.compatibility_version, + }; + } + + pub fn installName(self: Tbd) []const u8 { + return switch (self) { + .v3 => |v3| v3.install_name, + .v4 => |v4| v4.install_name, + }; + } +}; + +pub const LibStub = struct { + /// Underlying memory for stub's contents. + yaml: Yaml, + + /// Typed contents of the tbd file. + inner: []Tbd, + + pub fn loadFromFile(allocator: Allocator, file: fs.File) !LibStub { + const source = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); + defer allocator.free(source); + + var lib_stub = LibStub{ + .yaml = try Yaml.load(allocator, source), + .inner = undefined, + }; + + // TODO revisit this logic in the hope of simplifying it. + lib_stub.inner = blk: { + err: { + log.debug("trying to parse as []TbdV4", .{}); + const inner = lib_stub.yaml.parse([]TbdV4) catch break :err; + var out = try lib_stub.yaml.arena.allocator().alloc(Tbd, inner.len); + for (inner, 0..) |doc, i| { + out[i] = .{ .v4 = doc }; + } + break :blk out; + } + + err: { + log.debug("trying to parse as TbdV4", .{}); + const inner = lib_stub.yaml.parse(TbdV4) catch break :err; + var out = try lib_stub.yaml.arena.allocator().alloc(Tbd, 1); + out[0] = .{ .v4 = inner }; + break :blk out; + } + + err: { + log.debug("trying to parse as []TbdV3", .{}); + const inner = lib_stub.yaml.parse([]TbdV3) catch break :err; + var out = try lib_stub.yaml.arena.allocator().alloc(Tbd, inner.len); + for (inner, 0..) |doc, i| { + out[i] = .{ .v3 = doc }; + } + break :blk out; + } + + err: { + log.debug("trying to parse as TbdV3", .{}); + const inner = lib_stub.yaml.parse(TbdV3) catch break :err; + var out = try lib_stub.yaml.arena.allocator().alloc(Tbd, 1); + out[0] = .{ .v3 = inner }; + break :blk out; + } + + return error.NotLibStub; + }; + + return lib_stub; + } + + pub fn deinit(self: *LibStub) void { + self.yaml.deinit(); + } +}; diff --git a/src/archive/archive/zld/tapi/Tokenizer.zig b/src/archive/archive/zld/tapi/Tokenizer.zig new file mode 100644 index 000000000000..23fd8a389492 --- /dev/null +++ b/src/archive/archive/zld/tapi/Tokenizer.zig @@ -0,0 +1,460 @@ +const Tokenizer = @This(); + +const std = @import("std"); +const log = std.log.scoped(.tapi); +const testing = std.testing; + +buffer: []const u8, +index: usize = 0, + +pub const Token = struct { + id: Id, + start: usize, + end: usize, + + pub const Id = enum { + Eof, + + NewLine, + DocStart, // --- + DocEnd, // ... + SeqItemInd, // - + MapValueInd, // : + FlowMapStart, // { + FlowMapEnd, // } + FlowSeqStart, // [ + FlowSeqEnd, // ] + + Comma, + Space, + Tab, + Comment, // # + Alias, // * + Anchor, // & + Tag, // ! + SingleQuote, // ' + DoubleQuote, // " + + Literal, + }; +}; + +pub const TokenIndex = usize; + +pub const TokenIterator = struct { + buffer: []const Token, + pos: TokenIndex = 0, + + pub fn next(self: *TokenIterator) Token { + const token = self.buffer[self.pos]; + self.pos += 1; + return token; + } + + pub fn peek(self: TokenIterator) ?Token { + if (self.pos >= self.buffer.len) return null; + return self.buffer[self.pos]; + } + + pub fn reset(self: *TokenIterator) void { + self.pos = 0; + } + + pub fn seekTo(self: *TokenIterator, pos: TokenIndex) void { + self.pos = pos; + } + + pub fn seekBy(self: *TokenIterator, offset: isize) void { + const new_pos = @as(isize, @bitCast(self.pos)) + offset; + if (new_pos < 0) { + self.pos = 0; + } else { + self.pos = @as(usize, @intCast(new_pos)); + } + } +}; + +pub fn next(self: *Tokenizer) Token { + var result = Token{ + .id = .Eof, + .start = self.index, + .end = undefined, + }; + + var state: union(enum) { + Start, + NewLine, + Space, + Tab, + Hyphen: usize, + Dot: usize, + Literal, + } = .Start; + + while (self.index < self.buffer.len) : (self.index += 1) { + const c = self.buffer[self.index]; + switch (state) { + .Start => switch (c) { + ' ' => { + state = .Space; + }, + '\t' => { + state = .Tab; + }, + '\n' => { + result.id = .NewLine; + self.index += 1; + break; + }, + '\r' => { + state = .NewLine; + }, + '-' => { + state = .{ .Hyphen = 1 }; + }, + '.' => { + state = .{ .Dot = 1 }; + }, + ',' => { + result.id = .Comma; + self.index += 1; + break; + }, + '#' => { + result.id = .Comment; + self.index += 1; + break; + }, + '*' => { + result.id = .Alias; + self.index += 1; + break; + }, + '&' => { + result.id = .Anchor; + self.index += 1; + break; + }, + '!' => { + result.id = .Tag; + self.index += 1; + break; + }, + '\'' => { + result.id = .SingleQuote; + self.index += 1; + break; + }, + '"' => { + result.id = .DoubleQuote; + self.index += 1; + break; + }, + '[' => { + result.id = .FlowSeqStart; + self.index += 1; + break; + }, + ']' => { + result.id = .FlowSeqEnd; + self.index += 1; + break; + }, + ':' => { + result.id = .MapValueInd; + self.index += 1; + break; + }, + '{' => { + result.id = .FlowMapStart; + self.index += 1; + break; + }, + '}' => { + result.id = .FlowMapEnd; + self.index += 1; + break; + }, + else => { + state = .Literal; + }, + }, + .Space => switch (c) { + ' ' => {}, + else => { + result.id = .Space; + break; + }, + }, + .Tab => switch (c) { + '\t' => {}, + else => { + result.id = .Tab; + break; + }, + }, + .NewLine => switch (c) { + '\n' => { + result.id = .NewLine; + self.index += 1; + break; + }, + else => {}, // TODO this should be an error condition + }, + .Hyphen => |*count| switch (c) { + ' ' => { + result.id = .SeqItemInd; + self.index += 1; + break; + }, + '-' => { + count.* += 1; + + if (count.* == 3) { + result.id = .DocStart; + self.index += 1; + break; + } + }, + else => { + state = .Literal; + }, + }, + .Dot => |*count| switch (c) { + '.' => { + count.* += 1; + + if (count.* == 3) { + result.id = .DocEnd; + self.index += 1; + break; + } + }, + else => { + state = .Literal; + }, + }, + .Literal => switch (c) { + '\r', '\n', ' ', '\'', '"', ',', ':', ']', '}' => { + result.id = .Literal; + break; + }, + else => { + result.id = .Literal; + }, + }, + } + } + + if (state == .Literal and result.id == .Eof) { + result.id = .Literal; + } + + result.end = self.index; + + log.debug("{any}", .{result}); + log.debug(" | {s}", .{self.buffer[result.start..result.end]}); + + return result; +} + +fn testExpected(source: []const u8, expected: []const Token.Id) !void { + var tokenizer = Tokenizer{ + .buffer = source, + }; + + var token_len: usize = 0; + for (expected) |exp| { + token_len += 1; + const token = tokenizer.next(); + try testing.expectEqual(exp, token.id); + } + + while (tokenizer.next().id != .Eof) { + token_len += 1; // consume all tokens + } + + try testing.expectEqual(expected.len, token_len); +} + +test "empty doc" { + try testExpected("", &[_]Token.Id{.Eof}); +} + +test "empty doc with explicit markers" { + try testExpected( + \\--- + \\... + , &[_]Token.Id{ + .DocStart, .NewLine, .DocEnd, .Eof, + }); +} + +test "sequence of values" { + try testExpected( + \\- 0 + \\- 1 + \\- 2 + , &[_]Token.Id{ + .SeqItemInd, + .Literal, + .NewLine, + .SeqItemInd, + .Literal, + .NewLine, + .SeqItemInd, + .Literal, + .Eof, + }); +} + +test "sequence of sequences" { + try testExpected( + \\- [ val1, val2] + \\- [val3, val4 ] + , &[_]Token.Id{ + .SeqItemInd, + .FlowSeqStart, + .Space, + .Literal, + .Comma, + .Space, + .Literal, + .FlowSeqEnd, + .NewLine, + .SeqItemInd, + .FlowSeqStart, + .Literal, + .Comma, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .Eof, + }); +} + +test "mappings" { + try testExpected( + \\key1: value1 + \\key2: value2 + , &[_]Token.Id{ + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .Literal, + .Eof, + }); +} + +test "inline mapped sequence of values" { + try testExpected( + \\key : [ val1, + \\ val2 ] + , &[_]Token.Id{ + .Literal, + .Space, + .MapValueInd, + .Space, + .FlowSeqStart, + .Space, + .Literal, + .Comma, + .Space, + .NewLine, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .Eof, + }); +} + +test "part of tbd" { + try testExpected( + \\--- !tapi-tbd + \\tbd-version: 4 + \\targets: [ x86_64-macos ] + \\ + \\uuids: + \\ - target: x86_64-macos + \\ value: F86CC732-D5E4-30B5-AA7D-167DF5EC2708 + \\ + \\install-name: '/usr/lib/libSystem.B.dylib' + \\... + , &[_]Token.Id{ + .DocStart, + .Space, + .Tag, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .FlowSeqStart, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .NewLine, + .NewLine, + .Literal, + .MapValueInd, + .NewLine, + .Space, + .SeqItemInd, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Space, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .SingleQuote, + .Literal, + .SingleQuote, + .NewLine, + .DocEnd, + .Eof, + }); +} + +test "Unindented list" { + try testExpected( + \\b: + \\- foo: 1 + \\c: 1 + , &[_]Token.Id{ + .Literal, + .MapValueInd, + .NewLine, + .SeqItemInd, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .Literal, + }); +} diff --git a/src/archive/archive/zld/tapi/parse.zig b/src/archive/archive/zld/tapi/parse.zig new file mode 100644 index 000000000000..eb7bb2a0cfc9 --- /dev/null +++ b/src/archive/archive/zld/tapi/parse.zig @@ -0,0 +1,690 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.tapi); +const mem = std.mem; +const testing = std.testing; + +const Allocator = mem.Allocator; +const Tokenizer = @import("Tokenizer.zig"); +const Token = Tokenizer.Token; +const TokenIndex = Tokenizer.TokenIndex; +const TokenIterator = Tokenizer.TokenIterator; + +pub const ParseError = error{ + MalformedYaml, + NestedDocuments, + UnexpectedTag, + UnexpectedEof, + UnexpectedToken, + Unhandled, +} || Allocator.Error; + +pub const Node = struct { + tag: Tag, + tree: *const Tree, + + pub const Tag = enum { + doc, + map, + list, + value, + }; + + pub fn cast(self: *const Node, comptime T: type) ?*const T { + if (self.tag != T.base_tag) { + return null; + } + return @fieldParentPtr(T, "base", self); + } + + pub fn deinit(self: *Node, allocator: Allocator) void { + switch (self.tag) { + .doc => @fieldParentPtr(Node.Doc, "base", self).deinit(allocator), + .map => @fieldParentPtr(Node.Map, "base", self).deinit(allocator), + .list => @fieldParentPtr(Node.List, "base", self).deinit(allocator), + .value => @fieldParentPtr(Node.Value, "base", self).deinit(allocator), + } + } + + pub fn format( + self: *const Node, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + return switch (self.tag) { + .doc => @fieldParentPtr(Node.Doc, "base", self).format(fmt, options, writer), + .map => @fieldParentPtr(Node.Map, "base", self).format(fmt, options, writer), + .list => @fieldParentPtr(Node.List, "base", self).format(fmt, options, writer), + .value => @fieldParentPtr(Node.Value, "base", self).format(fmt, options, writer), + }; + } + + pub const Doc = struct { + base: Node = Node{ .tag = Tag.doc, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + directive: ?TokenIndex = null, + value: ?*Node = null, + + pub const base_tag: Node.Tag = .doc; + + pub fn deinit(self: *Doc, allocator: Allocator) void { + if (self.value) |node| { + node.deinit(allocator); + allocator.destroy(node); + } + } + + pub fn format( + self: *const Doc, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = fmt; + if (self.directive) |id| { + try std.fmt.format(writer, "{{ ", .{}); + const directive = self.base.tree.tokens[id]; + try std.fmt.format(writer, ".directive = {s}, ", .{ + self.base.tree.source[directive.start..directive.end], + }); + } + if (self.value) |node| { + try std.fmt.format(writer, "{}", .{node}); + } + if (self.directive != null) { + try std.fmt.format(writer, " }}", .{}); + } + } + }; + + pub const Map = struct { + base: Node = Node{ .tag = Tag.map, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + values: std.ArrayListUnmanaged(Entry) = .{}, + + pub const base_tag: Node.Tag = .map; + + pub const Entry = struct { + key: TokenIndex, + value: *Node, + }; + + pub fn deinit(self: *Map, allocator: Allocator) void { + for (self.values.items) |entry| { + entry.value.deinit(allocator); + allocator.destroy(entry.value); + } + self.values.deinit(allocator); + } + + pub fn format( + self: *const Map, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = fmt; + try std.fmt.format(writer, "{{ ", .{}); + for (self.values.items) |entry| { + const key = self.base.tree.tokens[entry.key]; + try std.fmt.format(writer, "{s} => {}, ", .{ + self.base.tree.source[key.start..key.end], + entry.value, + }); + } + return std.fmt.format(writer, " }}", .{}); + } + }; + + pub const List = struct { + base: Node = Node{ .tag = Tag.list, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + values: std.ArrayListUnmanaged(*Node) = .{}, + + pub const base_tag: Node.Tag = .list; + + pub fn deinit(self: *List, allocator: Allocator) void { + for (self.values.items) |node| { + node.deinit(allocator); + allocator.destroy(node); + } + self.values.deinit(allocator); + } + + pub fn format( + self: *const List, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = fmt; + try std.fmt.format(writer, "[ ", .{}); + for (self.values.items) |node| { + try std.fmt.format(writer, "{}, ", .{node}); + } + return std.fmt.format(writer, " ]", .{}); + } + }; + + pub const Value = struct { + base: Node = Node{ .tag = Tag.value, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + + pub const base_tag: Node.Tag = .value; + + pub fn deinit(self: *Value, allocator: Allocator) void { + _ = self; + _ = allocator; + } + + pub fn format( + self: *const Value, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = fmt; + const start = self.base.tree.tokens[self.start.?]; + const end = self.base.tree.tokens[self.end.?]; + return std.fmt.format(writer, "{s}", .{ + self.base.tree.source[start.start..end.end], + }); + } + }; +}; + +pub const LineCol = struct { + line: usize, + col: usize, +}; + +pub const Tree = struct { + allocator: Allocator, + source: []const u8, + tokens: []Token, + line_cols: std.AutoHashMap(TokenIndex, LineCol), + docs: std.ArrayListUnmanaged(*Node) = .{}, + + pub fn init(allocator: Allocator) Tree { + return .{ + .allocator = allocator, + .source = undefined, + .tokens = undefined, + .line_cols = std.AutoHashMap(TokenIndex, LineCol).init(allocator), + }; + } + + pub fn deinit(self: *Tree) void { + self.allocator.free(self.tokens); + self.line_cols.deinit(); + for (self.docs.items) |doc| { + doc.deinit(self.allocator); + self.allocator.destroy(doc); + } + self.docs.deinit(self.allocator); + } + + pub fn parse(self: *Tree, source: []const u8) !void { + var tokenizer = Tokenizer{ .buffer = source }; + var tokens = std.ArrayList(Token).init(self.allocator); + defer tokens.deinit(); + + var line: usize = 0; + var prev_line_last_col: usize = 0; + + while (true) { + const token = tokenizer.next(); + const tok_id = tokens.items.len; + try tokens.append(token); + + try self.line_cols.putNoClobber(tok_id, .{ + .line = line, + .col = token.start - prev_line_last_col, + }); + + switch (token.id) { + .Eof => break, + .NewLine => { + line += 1; + prev_line_last_col = token.end; + }, + else => {}, + } + } + + self.source = source; + self.tokens = try tokens.toOwnedSlice(); + + var it = TokenIterator{ .buffer = self.tokens }; + var parser = Parser{ + .allocator = self.allocator, + .tree = self, + .token_it = &it, + .line_cols = &self.line_cols, + }; + + while (true) { + if (parser.token_it.peek() == null) return; + + const pos = parser.token_it.pos; + const token = parser.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + switch (token.id) { + .Space, .Comment, .NewLine => {}, + .Eof => break, + else => { + const doc = try parser.doc(pos); + try self.docs.append(self.allocator, &doc.base); + }, + } + } + } +}; + +const Parser = struct { + allocator: Allocator, + tree: *Tree, + token_it: *TokenIterator, + line_cols: *const std.AutoHashMap(TokenIndex, LineCol), + + fn doc(self: *Parser, start: TokenIndex) ParseError!*Node.Doc { + const node = try self.allocator.create(Node.Doc); + errdefer self.allocator.destroy(node); + node.* = .{ .start = start }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Doc start: {}, {}", .{ start, self.tree.tokens[start] }); + + const explicit_doc: bool = if (self.eatToken(.DocStart)) |_| explicit_doc: { + if (self.eatToken(.Tag)) |_| { + node.directive = try self.expectToken(.Literal); + } + _ = try self.expectToken(.NewLine); + break :explicit_doc true; + } else false; + + while (true) { + const pos = self.token_it.pos; + const token = self.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + switch (token.id) { + .Tag => { + return error.UnexpectedTag; + }, + .Literal, .SingleQuote, .DoubleQuote => { + _ = try self.expectToken(.MapValueInd); + const map_node = try self.map(pos); + node.value = &map_node.base; + }, + .SeqItemInd => { + const list_node = try self.list(pos); + node.value = &list_node.base; + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + node.value = &list_node.base; + }, + .DocEnd => { + if (explicit_doc) break; + return error.UnexpectedToken; + }, + .DocStart, .Eof => { + self.token_it.seekBy(-1); + break; + }, + else => { + return error.UnexpectedToken; + }, + } + } + + node.end = self.token_it.pos - 1; + + log.debug("Doc end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn map(self: *Parser, start: TokenIndex) ParseError!*Node.Map { + const node = try self.allocator.create(Node.Map); + errdefer self.allocator.destroy(node); + node.* = .{ .start = start }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Map start: {}, {}", .{ start, self.tree.tokens[start] }); + + const col = self.getCol(start); + + while (true) { + self.eatCommentsAndSpace(); + + // Parse key. + const key_pos = self.token_it.pos; + if (self.getCol(key_pos) != col) { + break; + } + + const key = self.token_it.next(); + switch (key.id) { + .Literal => {}, + else => { + self.token_it.seekBy(-1); + break; + }, + } + + log.debug("Map key: {}, '{s}'", .{ key, self.tree.source[key.start..key.end] }); + + // Separator + _ = try self.expectToken(.MapValueInd); + + // Parse value. + const value: *Node = value: { + if (self.eatToken(.NewLine)) |_| { + self.eatCommentsAndSpace(); + + // Explicit, complex value such as list or map. + const value_pos = self.token_it.pos; + const value = self.token_it.next(); + switch (value.id) { + .Literal, .SingleQuote, .DoubleQuote => { + // Assume nested map. + const map_node = try self.map(value_pos); + break :value &map_node.base; + }, + .SeqItemInd => { + // Assume list of values. + const list_node = try self.list(value_pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{key}); + return error.Unhandled; + }, + } + } else { + self.eatCommentsAndSpace(); + + const value_pos = self.token_it.pos; + const value = self.token_it.next(); + switch (value.id) { + .Literal, .SingleQuote, .DoubleQuote => { + // Assume leaf value. + const leaf_node = try self.leaf_value(value_pos); + break :value &leaf_node.base; + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(value_pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{key}); + return error.Unhandled; + }, + } + } + }; + log.debug("Map value: {}", .{value}); + + try node.values.append(self.allocator, .{ + .key = key_pos, + .value = value, + }); + + _ = self.eatToken(.NewLine); + } + + node.end = self.token_it.pos - 1; + + log.debug("Map end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn list(self: *Parser, start: TokenIndex) ParseError!*Node.List { + const node = try self.allocator.create(Node.List); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("List start: {}, {}", .{ start, self.tree.tokens[start] }); + + const col = self.getCol(start); + + while (true) { + self.eatCommentsAndSpace(); + + if (self.getCol(self.token_it.pos) != col) { + break; + } + _ = self.eatToken(.SeqItemInd) orelse { + break; + }; + + const pos = self.token_it.pos; + const token = self.token_it.next(); + const value: *Node = value: { + switch (token.id) { + .Literal, .SingleQuote, .DoubleQuote => { + if (self.eatToken(.MapValueInd)) |_| { + // nested map + const map_node = try self.map(pos); + break :value &map_node.base; + } else { + // standalone (leaf) value + const leaf_node = try self.leaf_value(pos); + break :value &leaf_node.base; + } + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{token}); + return error.Unhandled; + }, + } + }; + try node.values.append(self.allocator, value); + + _ = self.eatToken(.NewLine); + } + + node.end = self.token_it.pos - 1; + + log.debug("List end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn list_bracketed(self: *Parser, start: TokenIndex) ParseError!*Node.List { + const node = try self.allocator.create(Node.List); + errdefer self.allocator.destroy(node); + node.* = .{ .start = start }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("List start: {}, {}", .{ start, self.tree.tokens[start] }); + + _ = try self.expectToken(.FlowSeqStart); + + while (true) { + _ = self.eatToken(.NewLine); + self.eatCommentsAndSpace(); + + const pos = self.token_it.pos; + const token = self.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + const value: *Node = value: { + switch (token.id) { + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + break :value &list_node.base; + }, + .FlowSeqEnd => { + break; + }, + .Literal, .SingleQuote, .DoubleQuote => { + const leaf_node = try self.leaf_value(pos); + _ = self.eatToken(.Comma); + // TODO newline + break :value &leaf_node.base; + }, + else => { + log.err("{}", .{token}); + return error.Unhandled; + }, + } + }; + try node.values.append(self.allocator, value); + } + + node.end = self.token_it.pos - 1; + + log.debug("List end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn leaf_value(self: *Parser, start: TokenIndex) ParseError!*Node.Value { + const node = try self.allocator.create(Node.Value); + errdefer self.allocator.destroy(node); + node.* = .{ .start = start }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Leaf start: {}, {}", .{ node.start.?, self.tree.tokens[node.start.?] }); + + parse: { + if (self.eatToken(.SingleQuote)) |_| { + node.start = node.start.? + 1; + while (true) { + const tok = self.token_it.next(); + switch (tok.id) { + .SingleQuote => { + node.end = self.token_it.pos - 2; + break :parse; + }, + .NewLine => return error.UnexpectedToken, + else => {}, + } + } + } + + if (self.eatToken(.DoubleQuote)) |_| { + node.start = node.start.? + 1; + while (true) { + const tok = self.token_it.next(); + switch (tok.id) { + .DoubleQuote => { + node.end = self.token_it.pos - 2; + break :parse; + }, + .NewLine => return error.UnexpectedToken, + else => {}, + } + } + } + + // TODO handle multiline strings in new block scope + while (true) { + const tok = self.token_it.next(); + switch (tok.id) { + .Literal => {}, + .Space => { + const trailing = self.token_it.pos - 2; + self.eatCommentsAndSpace(); + if (self.token_it.peek()) |peek| { + if (peek.id != .Literal) { + node.end = trailing; + break; + } + } + }, + else => { + self.token_it.seekBy(-1); + node.end = self.token_it.pos - 1; + break; + }, + } + } + } + + log.debug("Leaf end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn eatCommentsAndSpace(self: *Parser) void { + while (true) { + _ = self.token_it.peek() orelse return; + const token = self.token_it.next(); + switch (token.id) { + .Comment, .Space => {}, + else => { + self.token_it.seekBy(-1); + break; + }, + } + } + } + + fn eatToken(self: *Parser, id: Token.Id) ?TokenIndex { + while (true) { + const pos = self.token_it.pos; + _ = self.token_it.peek() orelse return null; + const token = self.token_it.next(); + switch (token.id) { + .Comment, .Space => continue, + else => |next_id| if (next_id == id) { + return pos; + } else { + self.token_it.seekTo(pos); + return null; + }, + } + } + } + + fn expectToken(self: *Parser, id: Token.Id) ParseError!TokenIndex { + return self.eatToken(id) orelse error.UnexpectedToken; + } + + fn getLine(self: *Parser, index: TokenIndex) usize { + return self.line_cols.get(index).?.line; + } + + fn getCol(self: *Parser, index: TokenIndex) usize { + return self.line_cols.get(index).?.col; + } +}; + +test { + _ = @import("parse/test.zig"); +} diff --git a/src/archive/archive/zld/tapi/parse/test.zig b/src/archive/archive/zld/tapi/parse/test.zig new file mode 100644 index 000000000000..b310a5c0bd05 --- /dev/null +++ b/src/archive/archive/zld/tapi/parse/test.zig @@ -0,0 +1,558 @@ +const std = @import("std"); +const mem = std.mem; +const testing = std.testing; +const parse = @import("../parse.zig"); + +const Node = parse.Node; +const Tree = parse.Tree; + +test "explicit doc" { + const source = + \\--- !tapi-tbd + \\tbd-version: 4 + \\abc-version: 5 + \\... + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + const directive = tree.tokens[doc.directive.?]; + try testing.expectEqual(directive.id, .Literal); + try testing.expect(mem.eql(u8, "tapi-tbd", tree.source[directive.start..directive.end])); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 5); + try testing.expectEqual(map.end.?, 14); + try testing.expectEqual(map.values.items.len, 2); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "tbd-version", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql(u8, "4", tree.source[value_tok.start..value_tok.end])); + } + + { + const entry = map.values.items[1]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "abc-version", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql(u8, "5", tree.source[value_tok.start..value_tok.end])); + } +} + +test "leaf in quotes" { + const source = + \\key1: no quotes + \\key2: 'single quoted' + \\key3: "double quoted" + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + try testing.expect(doc.directive == null); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 3); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1", + tree.source[key.start..key.end], + )); + + const value = entry.value.cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expectEqual(start.id, .Literal); + try testing.expectEqual(end.id, .Literal); + try testing.expect(mem.eql( + u8, + "no quotes", + tree.source[start.start..end.end], + )); + } +} + +test "nested maps" { + const source = + \\key1: + \\ key1_1 : value1_1 + \\ key1_2 : value1_2 + \\key2 : value2 + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + try testing.expect(doc.directive == null); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 2); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key1", tree.source[key.start..key.end])); + + const nested_map = entry.value.cast(Node.Map).?; + try testing.expectEqual(nested_map.start.?, 4); + try testing.expectEqual(nested_map.end.?, 16); + try testing.expectEqual(nested_map.values.items.len, 2); + + { + const nested_entry = nested_map.values.items[0]; + + const nested_key = tree.tokens[nested_entry.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1_1", + tree.source[nested_key.start..nested_key.end], + )); + + const nested_value = nested_entry.value.cast(Node.Value).?; + const nested_value_tok = tree.tokens[nested_value.start.?]; + try testing.expectEqual(nested_value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value1_1", + tree.source[nested_value_tok.start..nested_value_tok.end], + )); + } + + { + const nested_entry = nested_map.values.items[1]; + + const nested_key = tree.tokens[nested_entry.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1_2", + tree.source[nested_key.start..nested_key.end], + )); + + const nested_value = nested_entry.value.cast(Node.Value).?; + const nested_value_tok = tree.tokens[nested_value.start.?]; + try testing.expectEqual(nested_value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value1_2", + tree.source[nested_value_tok.start..nested_value_tok.end], + )); + } + } + + { + const entry = map.values.items[1]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key2", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value2", + tree.source[value_tok.start..value_tok.end], + )); + } +} + +test "map of list of values" { + const source = + \\ints: + \\ - 0 + \\ - 1 + \\ - 2 + ; + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "ints", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.List).?; + try testing.expectEqual(value.start.?, 4); + try testing.expectEqual(value.end.?, tree.tokens.len - 2); + try testing.expectEqual(value.values.items.len, 3); + + { + const elem = value.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "0", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "1", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "2", tree.source[leaf.start..leaf.end])); + } +} + +test "map of list of maps" { + const source = + \\key1: + \\- key2 : value2 + \\- key3 : value3 + \\- key4 : value4 + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key1", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.List).?; + try testing.expectEqual(value.start.?, 3); + try testing.expectEqual(value.end.?, tree.tokens.len - 2); + try testing.expectEqual(value.values.items.len, 3); + + { + const elem = value.values.items[0].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key2", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value2", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[1].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key3", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value3", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[2].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key4", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value4", tree.source[leaf.start..leaf.end])); + } +} + +test "list of lists" { + const source = + \\- [name , hr, avg ] + \\- [Mark McGwire , 65, 0.278] + \\- [Sammy Sosa , 63, 0.288] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .list); + + const list = doc.value.?.cast(Node.List).?; + try testing.expectEqual(list.start.?, 0); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .list); + const nested = list.values.items[0].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } + } + + { + try testing.expectEqual(list.values.items[1].tag, .list); + const nested = list.values.items[1].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expect(mem.eql(u8, "Mark McGwire", tree.source[start.start..end.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "65", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "0.278", tree.source[leaf.start..leaf.end])); + } + } + + { + try testing.expectEqual(list.values.items[2].tag, .list); + const nested = list.values.items[2].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expect(mem.eql(u8, "Sammy Sosa", tree.source[start.start..end.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "63", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "0.288", tree.source[leaf.start..leaf.end])); + } + } +} + +test "inline list" { + const source = + \\[name , hr, avg ] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .list); + + const list = doc.value.?.cast(Node.List).?; + try testing.expectEqual(list.start.?, 0); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .value); + const value = list.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[1].tag, .value); + const value = list.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[2].tag, .value); + const value = list.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } +} + +test "inline list as mapping value" { + const source = + \\key : [ + \\ name , + \\ hr, avg ] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key", tree.source[key.start..key.end])); + + const list = entry.value.cast(Node.List).?; + try testing.expectEqual(list.start.?, 4); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .value); + const value = list.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[1].tag, .value); + const value = list.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[2].tag, .value); + const value = list.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } +} diff --git a/src/archive/archive/zld/tapi/yaml.zig b/src/archive/archive/zld/tapi/yaml.zig new file mode 100644 index 000000000000..d4136b35d3da --- /dev/null +++ b/src/archive/archive/zld/tapi/yaml.zig @@ -0,0 +1,727 @@ +const std = @import("std"); +const assert = std.debug.assert; +const math = std.math; +const mem = std.mem; +const testing = std.testing; +const log = std.log.scoped(.tapi); + +const Allocator = mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; + +pub const Tokenizer = @import("Tokenizer.zig"); +pub const parse = @import("parse.zig"); + +const Node = parse.Node; +const Tree = parse.Tree; +const ParseError = parse.ParseError; + +pub const YamlError = error{ + UnexpectedNodeType, + OutOfMemory, +} || ParseError || std.fmt.ParseIntError; + +pub const ValueType = enum { + empty, + int, + float, + string, + list, + map, +}; + +pub const List = []Value; +pub const Map = std.StringArrayHashMap(Value); + +pub const Value = union(ValueType) { + empty, + int: i64, + float: f64, + string: []const u8, + list: List, + map: Map, + + pub fn asInt(self: Value) !i64 { + if (self != .int) return error.TypeMismatch; + return self.int; + } + + pub fn asFloat(self: Value) !f64 { + if (self != .float) return error.TypeMismatch; + return self.float; + } + + pub fn asString(self: Value) ![]const u8 { + if (self != .string) return error.TypeMismatch; + return self.string; + } + + pub fn asList(self: Value) !List { + if (self != .list) return error.TypeMismatch; + return self.list; + } + + pub fn asMap(self: Value) !Map { + if (self != .map) return error.TypeMismatch; + return self.map; + } + + const StringifyArgs = struct { + indentation: usize = 0, + should_inline_first_key: bool = false, + }; + + pub const StringifyError = std.os.WriteError; + + pub fn stringify(self: Value, writer: anytype, args: StringifyArgs) StringifyError!void { + switch (self) { + .empty => return, + .int => |int| return writer.print("{}", .{int}), + .float => |float| return writer.print("{d}", .{float}), + .string => |string| return writer.print("{s}", .{string}), + .list => |list| { + const len = list.len; + if (len == 0) return; + + const first = list[0]; + if (first.is_compound()) { + for (list, 0..) |elem, i| { + try writer.writeByteNTimes(' ', args.indentation); + try writer.writeAll("- "); + try elem.stringify(writer, .{ + .indentation = args.indentation + 2, + .should_inline_first_key = true, + }); + if (i < len - 1) { + try writer.writeByte('\n'); + } + } + return; + } + + try writer.writeAll("[ "); + for (list, 0..) |elem, i| { + try elem.stringify(writer, args); + if (i < len - 1) { + try writer.writeAll(", "); + } + } + try writer.writeAll(" ]"); + }, + .map => |map| { + const keys = map.keys(); + const len = keys.len; + if (len == 0) return; + + for (keys, 0..) |key, i| { + if (!args.should_inline_first_key or i != 0) { + try writer.writeByteNTimes(' ', args.indentation); + } + try writer.print("{s}: ", .{key}); + + const value = map.get(key) orelse unreachable; + const should_inline = blk: { + if (!value.is_compound()) break :blk true; + if (value == .list and value.list.len > 0 and !value.list[0].is_compound()) break :blk true; + break :blk false; + }; + + if (should_inline) { + try value.stringify(writer, args); + } else { + try writer.writeByte('\n'); + try value.stringify(writer, .{ + .indentation = args.indentation + 4, + }); + } + + if (i < len - 1) { + try writer.writeByte('\n'); + } + } + }, + } + } + + fn is_compound(self: Value) bool { + return switch (self) { + .list, .map => true, + else => false, + }; + } + + fn fromNode(arena: Allocator, tree: *const Tree, node: *const Node, type_hint: ?ValueType) YamlError!Value { + if (node.cast(Node.Doc)) |doc| { + const inner = doc.value orelse { + // empty doc + return Value{ .empty = {} }; + }; + return Value.fromNode(arena, tree, inner, null); + } else if (node.cast(Node.Map)) |map| { + var out_map = std.StringArrayHashMap(Value).init(arena); + try out_map.ensureUnusedCapacity(map.values.items.len); + + for (map.values.items) |entry| { + const key_tok = tree.tokens[entry.key]; + const key = try arena.dupe(u8, tree.source[key_tok.start..key_tok.end]); + const value = try Value.fromNode(arena, tree, entry.value, null); + + out_map.putAssumeCapacityNoClobber(key, value); + } + + return Value{ .map = out_map }; + } else if (node.cast(Node.List)) |list| { + var out_list = std.ArrayList(Value).init(arena); + try out_list.ensureUnusedCapacity(list.values.items.len); + + if (list.values.items.len > 0) { + const hint = if (list.values.items[0].cast(Node.Value)) |value| hint: { + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + const raw = tree.source[start.start..end.end]; + _ = std.fmt.parseInt(i64, raw, 10) catch { + _ = std.fmt.parseFloat(f64, raw) catch { + break :hint ValueType.string; + }; + break :hint ValueType.float; + }; + break :hint ValueType.int; + } else null; + + for (list.values.items) |elem| { + const value = try Value.fromNode(arena, tree, elem, hint); + out_list.appendAssumeCapacity(value); + } + } + + return Value{ .list = try out_list.toOwnedSlice() }; + } else if (node.cast(Node.Value)) |value| { + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + const raw = tree.source[start.start..end.end]; + + if (type_hint) |hint| { + return switch (hint) { + .int => Value{ .int = try std.fmt.parseInt(i64, raw, 10) }, + .float => Value{ .float = try std.fmt.parseFloat(f64, raw) }, + .string => Value{ .string = try arena.dupe(u8, raw) }, + else => unreachable, + }; + } + + try_int: { + // TODO infer base for int + const int = std.fmt.parseInt(i64, raw, 10) catch break :try_int; + return Value{ .int = int }; + } + try_float: { + const float = std.fmt.parseFloat(f64, raw) catch break :try_float; + return Value{ .float = float }; + } + return Value{ .string = try arena.dupe(u8, raw) }; + } else { + log.err("Unexpected node type: {}", .{node.tag}); + return error.UnexpectedNodeType; + } + } +}; + +pub const Yaml = struct { + arena: ArenaAllocator, + tree: ?Tree = null, + docs: std.ArrayList(Value), + + pub fn deinit(self: *Yaml) void { + self.arena.deinit(); + } + + pub fn stringify(self: Yaml, writer: anytype) !void { + for (self.docs.items) |doc| { + // if (doc.directive) |directive| { + // try writer.print("--- !{s}\n", .{directive}); + // } + try doc.stringify(writer, .{}); + // if (doc.directive != null) { + // try writer.writeAll("...\n"); + // } + } + } + + pub fn load(allocator: Allocator, source: []const u8) !Yaml { + var arena = ArenaAllocator.init(allocator); + const arena_allocator = arena.allocator(); + + var tree = Tree.init(arena_allocator); + try tree.parse(source); + + var docs = std.ArrayList(Value).init(arena_allocator); + try docs.ensureUnusedCapacity(tree.docs.items.len); + + for (tree.docs.items) |node| { + const value = try Value.fromNode(arena_allocator, &tree, node, null); + docs.appendAssumeCapacity(value); + } + + return Yaml{ + .arena = arena, + .tree = tree, + .docs = docs, + }; + } + + pub const Error = error{ + Unimplemented, + TypeMismatch, + StructFieldMissing, + ArraySizeMismatch, + UntaggedUnion, + UnionTagMissing, + Overflow, + OutOfMemory, + }; + + pub fn parse(self: *Yaml, comptime T: type) Error!T { + if (self.docs.items.len == 0) { + if (@typeInfo(T) == .Void) return {}; + return error.TypeMismatch; + } + + if (self.docs.items.len == 1) { + return self.parseValue(T, self.docs.items[0]); + } + + switch (@typeInfo(T)) { + .Array => |info| { + var parsed: T = undefined; + for (self.docs.items, 0..) |doc, i| { + parsed[i] = try self.parseValue(info.child, doc); + } + return parsed; + }, + .Pointer => |info| { + switch (info.size) { + .Slice => { + var parsed = try self.arena.allocator().alloc(info.child, self.docs.items.len); + for (self.docs.items, 0..) |doc, i| { + parsed[i] = try self.parseValue(info.child, doc); + } + return parsed; + }, + else => return error.TypeMismatch, + } + }, + .Union => return error.Unimplemented, + else => return error.TypeMismatch, + } + } + + fn parseValue(self: *Yaml, comptime T: type, value: Value) Error!T { + return switch (@typeInfo(T)) { + .Int => math.cast(T, try value.asInt()) orelse error.Overflow, + .Float => math.lossyCast(T, try value.asFloat()), + .Struct => self.parseStruct(T, try value.asMap()), + .Union => self.parseUnion(T, value), + .Array => self.parseArray(T, try value.asList()), + .Pointer => { + if (value.asList()) |list| { + return self.parsePointer(T, .{ .list = list }); + } else |_| { + return self.parsePointer(T, .{ .string = try value.asString() }); + } + }, + .Void => error.TypeMismatch, + .Optional => unreachable, + else => error.Unimplemented, + }; + } + + fn parseUnion(self: *Yaml, comptime T: type, value: Value) Error!T { + const union_info = @typeInfo(T).Union; + + if (union_info.tag_type) |_| { + inline for (union_info.fields) |field| { + if (self.parseValue(field.type, value)) |u_value| { + return @unionInit(T, field.name, u_value); + } else |err| { + if (@as(@TypeOf(err) || error{TypeMismatch}, err) != error.TypeMismatch) return err; + } + } + } else return error.UntaggedUnion; + + return error.UnionTagMissing; + } + + fn parseOptional(self: *Yaml, comptime T: type, value: ?Value) Error!T { + const unwrapped = value orelse return null; + const opt_info = @typeInfo(T).Optional; + return @as(T, try self.parseValue(opt_info.child, unwrapped)); + } + + fn parseStruct(self: *Yaml, comptime T: type, map: Map) Error!T { + const struct_info = @typeInfo(T).Struct; + var parsed: T = undefined; + + inline for (struct_info.fields) |field| { + const value: ?Value = map.get(field.name) orelse blk: { + const field_name = try mem.replaceOwned(u8, self.arena.allocator(), field.name, "_", "-"); + break :blk map.get(field_name); + }; + + if (@typeInfo(field.type) == .Optional) { + @field(parsed, field.name) = try self.parseOptional(field.type, value); + continue; + } + + const unwrapped = value orelse { + log.debug("missing struct field: {s}: {s}", .{ field.name, @typeName(field.type) }); + return error.StructFieldMissing; + }; + @field(parsed, field.name) = try self.parseValue(field.type, unwrapped); + } + + return parsed; + } + + fn parsePointer(self: *Yaml, comptime T: type, value: Value) Error!T { + const ptr_info = @typeInfo(T).Pointer; + const arena = self.arena.allocator(); + + switch (ptr_info.size) { + .Slice => { + const child_info = @typeInfo(ptr_info.child); + if (child_info == .Int and child_info.Int.bits == 8) { + return value.asString(); + } + + var parsed = try arena.alloc(ptr_info.child, value.list.len); + for (value.list, 0..) |elem, i| { + parsed[i] = try self.parseValue(ptr_info.child, elem); + } + return parsed; + }, + else => return error.Unimplemented, + } + } + + fn parseArray(self: *Yaml, comptime T: type, list: List) Error!T { + const array_info = @typeInfo(T).Array; + if (array_info.len != list.len) return error.ArraySizeMismatch; + + var parsed: T = undefined; + for (list, 0..) |elem, i| { + parsed[i] = try self.parseValue(array_info.child, elem); + } + + return parsed; + } +}; + +test { + testing.refAllDecls(@This()); +} + +test "simple list" { + const source = + \\- a + \\- b + \\- c + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const list = yaml.docs.items[0].list; + try testing.expectEqual(list.len, 3); + + try testing.expect(mem.eql(u8, list[0].string, "a")); + try testing.expect(mem.eql(u8, list[1].string, "b")); + try testing.expect(mem.eql(u8, list[2].string, "c")); +} + +test "simple list typed as array of strings" { + const source = + \\- a + \\- b + \\- c + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3][]const u8); + try testing.expectEqual(arr.len, 3); + try testing.expect(mem.eql(u8, arr[0], "a")); + try testing.expect(mem.eql(u8, arr[1], "b")); + try testing.expect(mem.eql(u8, arr[2], "c")); +} + +test "simple list typed as array of ints" { + const source = + \\- 0 + \\- 1 + \\- 2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3]u8); + try testing.expectEqual(arr.len, 3); + try testing.expectEqual(arr[0], 0); + try testing.expectEqual(arr[1], 1); + try testing.expectEqual(arr[2], 2); +} + +test "list of mixed sign integer" { + const source = + \\- 0 + \\- -1 + \\- 2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3]i8); + try testing.expectEqual(arr.len, 3); + try testing.expectEqual(arr[0], 0); + try testing.expectEqual(arr[1], -1); + try testing.expectEqual(arr[2], 2); +} + +test "simple map untyped" { + const source = + \\a: 0 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("a")); + try testing.expectEqual(map.get("a").?.int, 0); +} + +test "simple map untyped with a list of maps" { + const source = + \\a: 0 + \\b: + \\ - foo: 1 + \\ bar: 2 + \\ - foo: 3 + \\ bar: 4 + \\c: 1 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("a")); + try testing.expect(map.contains("b")); + try testing.expect(map.contains("c")); + try testing.expectEqual(map.get("a").?.int, 0); + try testing.expectEqual(map.get("c").?.int, 1); + try testing.expectEqual(map.get("b").?.list[0].map.get("foo").?.int, 1); + try testing.expectEqual(map.get("b").?.list[0].map.get("bar").?.int, 2); + try testing.expectEqual(map.get("b").?.list[1].map.get("foo").?.int, 3); + try testing.expectEqual(map.get("b").?.list[1].map.get("bar").?.int, 4); +} + +test "simple map untyped with a list of maps. no indent" { + const source = + \\b: + \\- foo: 1 + \\c: 1 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("b")); + try testing.expect(map.contains("c")); + try testing.expectEqual(map.get("c").?.int, 1); + try testing.expectEqual(map.get("b").?.list[0].map.get("foo").?.int, 1); +} + +test "simple map untyped with a list of maps. no indent 2" { + const source = + \\a: 0 + \\b: + \\- foo: 1 + \\ bar: 2 + \\- foo: 3 + \\ bar: 4 + \\c: 1 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("a")); + try testing.expect(map.contains("b")); + try testing.expect(map.contains("c")); + try testing.expectEqual(map.get("a").?.int, 0); + try testing.expectEqual(map.get("c").?.int, 1); + try testing.expectEqual(map.get("b").?.list[0].map.get("foo").?.int, 1); + try testing.expectEqual(map.get("b").?.list[0].map.get("bar").?.int, 2); + try testing.expectEqual(map.get("b").?.list[1].map.get("foo").?.int, 3); + try testing.expectEqual(map.get("b").?.list[1].map.get("bar").?.int, 4); +} + +test "simple map typed" { + const source = + \\a: 0 + \\b: hello there + \\c: 'wait, what?' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { a: usize, b: []const u8, c: []const u8 }); + try testing.expectEqual(simple.a, 0); + try testing.expect(mem.eql(u8, simple.b, "hello there")); + try testing.expect(mem.eql(u8, simple.c, "wait, what?")); +} + +test "typed nested structs" { + const source = + \\a: + \\ b: hello there + \\ c: 'wait, what?' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { + a: struct { + b: []const u8, + c: []const u8, + }, + }); + try testing.expect(mem.eql(u8, simple.a.b, "hello there")); + try testing.expect(mem.eql(u8, simple.a.c, "wait, what?")); +} + +test "multidoc typed as a slice of structs" { + const source = + \\--- + \\a: 0 + \\--- + \\a: 1 + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + { + const result = try yaml.parse([2]struct { a: usize }); + try testing.expectEqual(result.len, 2); + try testing.expectEqual(result[0].a, 0); + try testing.expectEqual(result[1].a, 1); + } + + { + const result = try yaml.parse([]struct { a: usize }); + try testing.expectEqual(result.len, 2); + try testing.expectEqual(result[0].a, 0); + try testing.expectEqual(result[1].a, 1); + } +} + +test "multidoc typed as a struct is an error" { + const source = + \\--- + \\a: 0 + \\--- + \\b: 1 + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize })); + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { b: usize })); + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize, b: usize })); +} + +test "multidoc typed as a slice of structs with optionals" { + const source = + \\--- + \\a: 0 + \\c: 1.0 + \\--- + \\a: 1 + \\b: different field + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const result = try yaml.parse([]struct { a: usize, b: ?[]const u8, c: ?f16 }); + try testing.expectEqual(result.len, 2); + + try testing.expectEqual(result[0].a, 0); + try testing.expect(result[0].b == null); + try testing.expect(result[0].c != null); + try testing.expectEqual(result[0].c.?, 1.0); + + try testing.expectEqual(result[1].a, 1); + try testing.expect(result[1].b != null); + try testing.expect(mem.eql(u8, result[1].b.?, "different field")); + try testing.expect(result[1].c == null); +} + +test "empty yaml can be represented as void" { + const source = ""; + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + const result = try yaml.parse(void); + try testing.expect(@TypeOf(result) == void); +} + +test "nonempty yaml cannot be represented as void" { + const source = + \\a: b + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(void)); +} + +test "typed array size mismatch" { + const source = + \\- 0 + \\- 0 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([1]usize)); + try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([5]usize)); +} diff --git a/src/archive/archive/zld/test.zig b/src/archive/archive/zld/test.zig new file mode 100644 index 000000000000..70fa34a8bf15 --- /dev/null +++ b/src/archive/archive/zld/test.zig @@ -0,0 +1,394 @@ +const std = @import("std"); +const build_options = @import("build_options"); +const builtin = std.builtin; +const mem = std.mem; +const testing = std.testing; +const process = std.process; +const log = std.log.scoped(.tests); + +const Allocator = mem.Allocator; +const ChildProcess = std.ChildProcess; +const Target = std.Target; +const CrossTarget = std.zig.CrossTarget; +const tmpDir = testing.tmpDir; +const ThreadPool = @import("ThreadPool.zig"); +const Zld = @import("Zld.zig"); + +const gpa = testing.allocator; + +test "unit" { + _ = @import("Zld.zig"); +} + +test "end-to-end" { + var ctx = TestContext.init(); + defer ctx.deinit(); + + try @import("end_to_end_tests").addCases(&ctx); + try ctx.run(); +} + +pub const TestContext = struct { + cases: std.ArrayList(Case), + + pub const Case = struct { + name: []const u8, + target: CrossTarget, + input_files: std.ArrayList(InputFile), + expected_out: ExpectedOutput = .{}, + + const ExpectedOutput = struct { + stdout: ?[]const u8 = null, + stderr: ?[]const u8 = null, + }; + + const InputFile = struct { + const FileType = enum { + Header, + C, + Cpp, + Zig, + }; + + filetype: FileType, + basename: []const u8, + contents: []const u8, + + /// Caller own the memory. + fn getFilename(self: InputFile, allocator: Allocator) ![]u8 { + const ext = switch (self.filetype) { + .Header => ".h", + .C => ".c", + .Cpp => ".cpp", + .Zig => ".zig", + }; + return std.fmt.allocPrint(allocator, "{s}{s}", .{ self.basename, ext }); + } + }; + + pub fn init(allocator: Allocator, name: []const u8, target: CrossTarget) Case { + var input_files = std.ArrayList(InputFile).init(allocator); + return .{ + .name = name, + .target = target, + .input_files = input_files, + }; + } + + pub fn deinit(self: *Case) void { + self.input_files.deinit(); + } + + pub fn addInput(self: *Case, filename: []const u8, contents: []const u8) !void { + const ext = std.fs.path.extension(filename); + const filetype: InputFile.FileType = blk: { + if (mem.eql(u8, ".h", ext)) { + break :blk .Header; + } else if (mem.eql(u8, ".c", ext)) { + break :blk .C; + } else if (mem.eql(u8, ".cpp", ext)) { + break :blk .Cpp; + } else if (mem.eql(u8, ".zig", ext)) { + break :blk .Zig; + } else { + log.warn("skipping file; unknown filetype detected with extension '{s}'", .{ext}); + return; + } + }; + const index = mem.lastIndexOf(u8, filename, ext).?; + const basename = filename[0..index]; + try self.input_files.append(.{ + .filetype = filetype, + .basename = basename, + .contents = contents, + }); + } + + pub fn expectedStdout(self: *Case, expected_stdout: []const u8) void { + self.expected_out.stdout = expected_stdout; + } + + pub fn expectedStderr(self: *Case, expected_stderr: []const u8) void { + self.expected_out.stderr = expected_stderr; + } + }; + + pub fn init() TestContext { + var cases = std.ArrayList(Case).init(gpa); + return .{ .cases = cases }; + } + + pub fn deinit(self: *TestContext) void { + for (self.cases.items) |*case| { + case.deinit(); + } + self.cases.deinit(); + } + + pub fn addCase(self: *TestContext, name: []const u8, target: CrossTarget) !*Case { + const idx = self.cases.items.len; + try self.cases.append(Case.init(gpa, name, target)); + return &self.cases.items[idx]; + } + + pub fn run(self: *TestContext) !void { + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + for (self.cases.items) |case| { + var tmp = tmpDir(.{}); + defer tmp.cleanup(); + + const cwd = try std.fs.path.join(arena, &[_][]const u8{ + "zig-cache", "tmp", &tmp.sub_path, + }); + + var objects = std.ArrayList(Zld.LinkObject).init(arena); + + const target_triple = try std.fmt.allocPrint(arena, "{s}-{s}-{s}", .{ + @tagName(case.target.cpu_arch.?), + @tagName(case.target.os_tag.?), + @tagName(case.target.abi.?), + }); + + var requires_crts: bool = true; + + for (case.input_files.items) |input_file| { + const input_filename = try input_file.getFilename(arena); + try tmp.dir.writeFile(input_filename, input_file.contents); + + var argv = std.ArrayList([]const u8).init(arena); + try argv.append("zig"); + + switch (input_file.filetype) { + .C => { + try argv.append("cc"); + try argv.append("-c"); + }, + .Cpp => { + try argv.append("c++"); + try argv.append("-c"); + }, + .Zig => { + try argv.append("build-obj"); + requires_crts = false; + }, + .Header => continue, + } + + try argv.append("-target"); + try argv.append(target_triple); + + try argv.append(input_filename); + + const output_filename = try std.fmt.allocPrint(arena, "{s}.o", .{input_file.basename}); + + if (input_file.filetype != .Zig) { + try argv.append("-o"); + try argv.append(output_filename); + } + + const output_file_path = try std.fs.path.join(arena, &[_][]const u8{ + cwd, output_filename, + }); + try objects.append(.{ .path = output_file_path, .must_link = false }); + + const result = try std.ChildProcess.exec(.{ + .allocator = arena, + .argv = argv.items, + .cwd = cwd, + }); + if (result.stdout.len != 0) { + log.warn("unexpected compiler stdout: {s}", .{result.stdout}); + } + if (result.stderr.len != 0) { + log.warn("unexpected compiler stderr: {s}", .{result.stderr}); + } + if (result.term != .Exited or result.term.Exited != 0) { + log.err("{s}", .{result.stderr}); + try printInvocation(argv.items); + return error.CompileError; + } + } + + // compiler_rt + const compiler_rt_path = try std.fs.path.join(arena, &[_][]const u8{ + "test", "assets", target_triple, "libcompiler_rt.a", + }); + try objects.append(.{ .path = compiler_rt_path, .must_link = false }); + + if (case.target.getAbi() == .musl) { + if (requires_crts) { + // crt1 + const crt1_path = try std.fs.path.join(arena, &[_][]const u8{ + "test", "assets", target_triple, "crt1.o", + }); + try objects.append(.{ .path = crt1_path, .must_link = true }); + // crti + const crti_path = try std.fs.path.join(arena, &[_][]const u8{ + "test", "assets", target_triple, "crti.o", + }); + try objects.append(.{ .path = crti_path, .must_link = true }); + // crtn + const crtn_path = try std.fs.path.join(arena, &[_][]const u8{ + "test", "assets", target_triple, "crtn.o", + }); + try objects.append(.{ .path = crtn_path, .must_link = true }); + } + // libc + const libc_path = try std.fs.path.join(arena, &[_][]const u8{ + "test", "assets", target_triple, "libc.a", + }); + try objects.append(.{ .path = libc_path, .must_link = false }); + } + + const output_path = try std.fs.path.join(arena, &[_][]const u8{ + "zig-cache", "tmp", &tmp.sub_path, "a.out", + }); + + var libs = std.StringArrayHashMap(Zld.SystemLib).init(arena); + var lib_dirs = std.ArrayList([]const u8).init(arena); + var frameworks = std.StringArrayHashMap(Zld.SystemLib).init(arena); + var framework_dirs = std.ArrayList([]const u8).init(arena); + + const host = try std.zig.system.NativeTargetInfo.detect(.{}); + const target_info = try std.zig.system.NativeTargetInfo.detect(case.target); + var syslibroot: ?[]const u8 = null; + + if (case.target.isDarwin()) { + try libs.put("System", .{}); + try lib_dirs.append("/usr/lib"); + try framework_dirs.append("/System/Library/Frameworks"); + + if (std.zig.system.darwin.isDarwinSDKInstalled(arena)) { + if (std.zig.system.darwin.getDarwinSDK(arena, host.target)) |sdk| { + syslibroot = sdk.path; + } + } + } + + const tag: Zld.Tag = switch (case.target.os_tag.?) { + .macos, + .ios, + .watchos, + .tvos, + => .macho, + .linux => .elf, + .windows => .coff, + else => unreachable, + }; + var opts: Zld.Options = switch (tag) { + .macho => .{ .macho = .{ + .emit = .{ + .directory = std.fs.cwd(), + .sub_path = output_path, + }, + .dynamic = true, + .target = case.target, + .platform_version = target_info.target.os.version_range.semver.min, + .sdk_version = target_info.target.os.version_range.semver.min, + .output_mode = .exe, + .syslibroot = syslibroot, + .positionals = objects.items, + .libs = libs, + .frameworks = frameworks, + .lib_dirs = lib_dirs.items, + .framework_dirs = framework_dirs.items, + .rpath_list = &[0][]const u8{}, + .dead_strip = true, + } }, + .elf => .{ .elf = .{ + .emit = .{ + .directory = std.fs.cwd(), + .sub_path = output_path, + }, + .target = case.target, + .output_mode = .exe, + .positionals = objects.items, + .libs = libs, + .lib_dirs = lib_dirs.items, + .rpath_list = &[0][]const u8{}, + .gc_sections = true, + } }, + .coff => .{ .coff = .{ + .emit = .{ + .directory = std.fs.cwd(), + .sub_path = output_path, + }, + .target = case.target, + .output_mode = .exe, + .positionals = objects.items, + .libs = libs, + .lib_dirs = &[0][]const u8{}, + } }, + .wasm => @panic("TODO"), + }; + + var thread_pool: ThreadPool = undefined; + try thread_pool.init(gpa); + defer thread_pool.deinit(); + + const zld = try Zld.openPath(gpa, tag, opts, &thread_pool); + defer zld.deinit(); + + var argv = std.ArrayList([]const u8).init(arena); + outer: { + switch (host.getExternalExecutor(target_info, .{})) { + .native => { + try zld.flush(); + try argv.append("./a.out"); + }, + .qemu => |qemu_bin_name| if (build_options.enable_qemu) { + try zld.flush(); + try argv.append(qemu_bin_name); + try argv.append("./a.out"); + } else { + break :outer; + }, + else => { + // TODO simply pass the test + break :outer; + }, + } + + const result = try std.ChildProcess.exec(.{ + .allocator = arena, + .argv = argv.items, + .cwd = cwd, + }); + + if (case.expected_out.stdout != null or case.expected_out.stderr != null) { + if (case.expected_out.stderr) |err| { + const pass = mem.eql(u8, result.stderr, err); + if (!pass) + log.err("STDERR: Test '{s}' failed\nExpected: '{s}'\nGot: '{s}'", .{ case.name, err, result.stderr }); + try testing.expect(pass); + } + if (case.expected_out.stdout) |out| { + const pass = mem.eql(u8, result.stdout, out); + if (!pass) + log.err("STDOUT: Test '{s}' failed\nExpected: '{s}'\nGot: '{s}'", .{ case.name, out, result.stdout }); + try testing.expect(pass); + } + continue; + } + if (result.stderr.len != 0) { + log.warn("unexpected exe stderr: {s}", .{result.stderr}); + } + if (result.term != .Exited or result.term.Exited != 0) { + log.err("{s}", .{result.stderr}); + try printInvocation(argv.items); + return error.ExeError; + } + log.warn("exe was run, but no expected output was provided", .{}); + } + } + } +}; + +fn printInvocation(argv: []const []const u8) !void { + const full_inv = try std.mem.join(gpa, " ", argv); + defer gpa.free(full_inv); + log.err("The following command failed:\n{s}", .{full_inv}); +} diff --git a/src/archive/archive/zld/tracy.zig b/src/archive/archive/zld/tracy.zig new file mode 100644 index 000000000000..ce85c6db410f --- /dev/null +++ b/src/archive/archive/zld/tracy.zig @@ -0,0 +1,308 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const build_options = @import("build_options"); + +pub const enable = if (builtin.is_test) false else build_options.enable_tracy; +pub const enable_allocation = false; // enable and build_options.enable_tracy_allocation; +pub const enable_callstack = false; //enable and build_options.enable_tracy_callstack; + +// TODO: make this configurable +const callstack_depth = 10; + +const ___tracy_c_zone_context = extern struct { + id: u32, + active: c_int, + + pub inline fn end(self: @This()) void { + ___tracy_emit_zone_end(self); + } + + pub inline fn addText(self: @This(), text: []const u8) void { + ___tracy_emit_zone_text(self, text.ptr, text.len); + } + + pub inline fn setName(self: @This(), name: []const u8) void { + ___tracy_emit_zone_name(self, name.ptr, name.len); + } + + pub inline fn setColor(self: @This(), color: u32) void { + ___tracy_emit_zone_color(self, color); + } + + pub inline fn setValue(self: @This(), value: u64) void { + ___tracy_emit_zone_value(self, value); + } +}; + +pub const Ctx = if (enable) ___tracy_c_zone_context else struct { + pub inline fn end(self: @This()) void { + _ = self; + } + + pub inline fn addText(self: @This(), text: []const u8) void { + _ = self; + _ = text; + } + + pub inline fn setName(self: @This(), name: []const u8) void { + _ = self; + _ = name; + } + + pub inline fn setColor(self: @This(), color: u32) void { + _ = self; + _ = color; + } + + pub inline fn setValue(self: @This(), value: u64) void { + _ = self; + _ = value; + } +}; + +pub inline fn trace(comptime src: std.builtin.SourceLocation) Ctx { + if (!enable) return .{}; + + if (enable_callstack) { + return ___tracy_emit_zone_begin_callstack(&.{ + .name = null, + .function = src.fn_name.ptr, + .file = src.file.ptr, + .line = src.line, + .color = 0, + }, callstack_depth, 1); + } else { + return ___tracy_emit_zone_begin(&.{ + .name = null, + .function = src.fn_name.ptr, + .file = src.file.ptr, + .line = src.line, + .color = 0, + }, 1); + } +} + +pub inline fn traceNamed(comptime src: std.builtin.SourceLocation, comptime name: [:0]const u8) Ctx { + if (!enable) return .{}; + + if (enable_callstack) { + return ___tracy_emit_zone_begin_callstack(&.{ + .name = name.ptr, + .function = src.fn_name.ptr, + .file = src.file.ptr, + .line = src.line, + .color = 0, + }, callstack_depth, 1); + } else { + return ___tracy_emit_zone_begin(&.{ + .name = name.ptr, + .function = src.fn_name.ptr, + .file = src.file.ptr, + .line = src.line, + .color = 0, + }, 1); + } +} + +pub fn tracyAllocator(allocator: std.mem.Allocator) TracyAllocator(null) { + return TracyAllocator(null).init(allocator); +} + +pub fn TracyAllocator(comptime name: ?[:0]const u8) type { + return struct { + parent_allocator: std.mem.Allocator, + + const Self = @This(); + + pub fn init(parent_allocator: std.mem.Allocator) Self { + return .{ + .parent_allocator = parent_allocator, + }; + } + + pub fn allocator(self: *Self) std.mem.Allocator { + return std.mem.Allocator.init(self, allocFn, resizeFn, freeFn); + } + + fn allocFn(self: *Self, len: usize, ptr_align: u29, len_align: u29, ret_addr: usize) std.mem.Allocator.Error![]u8 { + const result = self.parent_allocator.rawAlloc(len, ptr_align, len_align, ret_addr); + if (result) |data| { + if (data.len != 0) { + if (name) |n| { + allocNamed(data.ptr, data.len, n); + } else { + alloc(data.ptr, data.len); + } + } + } else |_| { + messageColor("allocation failed", 0xFF0000); + } + return result; + } + + fn resizeFn(self: *Self, buf: []u8, buf_align: u29, new_len: usize, len_align: u29, ret_addr: usize) ?usize { + if (self.parent_allocator.rawResize(buf, buf_align, new_len, len_align, ret_addr)) |resized_len| { + if (name) |n| { + freeNamed(buf.ptr, n); + allocNamed(buf.ptr, resized_len, n); + } else { + free(buf.ptr); + alloc(buf.ptr, resized_len); + } + + return resized_len; + } + + // during normal operation the compiler hits this case thousands of times due to this + // emitting messages for it is both slow and causes clutter + return null; + } + + fn freeFn(self: *Self, buf: []u8, buf_align: u29, ret_addr: usize) void { + self.parent_allocator.rawFree(buf, buf_align, ret_addr); + // this condition is to handle free being called on an empty slice that was never even allocated + // example case: `std.process.getSelfExeSharedLibPaths` can return `&[_][:0]u8{}` + if (buf.len != 0) { + if (name) |n| { + freeNamed(buf.ptr, n); + } else { + free(buf.ptr); + } + } + } + }; +} + +// This function only accepts comptime known strings, see `messageCopy` for runtime strings +pub inline fn message(comptime msg: [:0]const u8) void { + if (!enable) return; + ___tracy_emit_messageL(msg.ptr, if (enable_callstack) callstack_depth else 0); +} + +// This function only accepts comptime known strings, see `messageColorCopy` for runtime strings +pub inline fn messageColor(comptime msg: [:0]const u8, color: u32) void { + if (!enable) return; + ___tracy_emit_messageLC(msg.ptr, color, if (enable_callstack) callstack_depth else 0); +} + +pub inline fn messageCopy(msg: []const u8) void { + if (!enable) return; + ___tracy_emit_message(msg.ptr, msg.len, if (enable_callstack) callstack_depth else 0); +} + +pub inline fn messageColorCopy(msg: [:0]const u8, color: u32) void { + if (!enable) return; + ___tracy_emit_messageC(msg.ptr, msg.len, color, if (enable_callstack) callstack_depth else 0); +} + +pub inline fn frameMark() void { + if (!enable) return; + ___tracy_emit_frame_mark(null); +} + +pub inline fn frameMarkNamed(comptime name: [:0]const u8) void { + if (!enable) return; + ___tracy_emit_frame_mark(name.ptr); +} + +pub inline fn namedFrame(comptime name: [:0]const u8) Frame(name) { + frameMarkStart(name); + return .{}; +} + +pub fn Frame(comptime name: [:0]const u8) type { + return struct { + pub fn end(_: @This()) void { + frameMarkEnd(name); + } + }; +} + +inline fn frameMarkStart(comptime name: [:0]const u8) void { + if (!enable) return; + ___tracy_emit_frame_mark_start(name.ptr); +} + +inline fn frameMarkEnd(comptime name: [:0]const u8) void { + if (!enable) return; + ___tracy_emit_frame_mark_end(name.ptr); +} + +extern fn ___tracy_emit_frame_mark_start(name: [*:0]const u8) void; +extern fn ___tracy_emit_frame_mark_end(name: [*:0]const u8) void; + +inline fn alloc(ptr: [*]u8, len: usize) void { + if (!enable) return; + + if (enable_callstack) { + ___tracy_emit_memory_alloc_callstack(ptr, len, callstack_depth, 0); + } else { + ___tracy_emit_memory_alloc(ptr, len, 0); + } +} + +inline fn allocNamed(ptr: [*]u8, len: usize, comptime name: [:0]const u8) void { + if (!enable) return; + + if (enable_callstack) { + ___tracy_emit_memory_alloc_callstack_named(ptr, len, callstack_depth, 0, name.ptr); + } else { + ___tracy_emit_memory_alloc_named(ptr, len, 0, name.ptr); + } +} + +inline fn free(ptr: [*]u8) void { + if (!enable) return; + + if (enable_callstack) { + ___tracy_emit_memory_free_callstack(ptr, callstack_depth, 0); + } else { + ___tracy_emit_memory_free(ptr, 0); + } +} + +inline fn freeNamed(ptr: [*]u8, comptime name: [:0]const u8) void { + if (!enable) return; + + if (enable_callstack) { + ___tracy_emit_memory_free_callstack_named(ptr, callstack_depth, 0, name.ptr); + } else { + ___tracy_emit_memory_free_named(ptr, 0, name.ptr); + } +} + +extern fn ___tracy_emit_zone_begin( + srcloc: *const ___tracy_source_location_data, + active: c_int, +) ___tracy_c_zone_context; +extern fn ___tracy_emit_zone_begin_callstack( + srcloc: *const ___tracy_source_location_data, + depth: c_int, + active: c_int, +) ___tracy_c_zone_context; +extern fn ___tracy_emit_zone_text(ctx: ___tracy_c_zone_context, txt: [*]const u8, size: usize) void; +extern fn ___tracy_emit_zone_name(ctx: ___tracy_c_zone_context, txt: [*]const u8, size: usize) void; +extern fn ___tracy_emit_zone_color(ctx: ___tracy_c_zone_context, color: u32) void; +extern fn ___tracy_emit_zone_value(ctx: ___tracy_c_zone_context, value: u64) void; +extern fn ___tracy_emit_zone_end(ctx: ___tracy_c_zone_context) void; +extern fn ___tracy_emit_memory_alloc(ptr: *const anyopaque, size: usize, secure: c_int) void; +extern fn ___tracy_emit_memory_alloc_callstack(ptr: *const anyopaque, size: usize, depth: c_int, secure: c_int) void; +extern fn ___tracy_emit_memory_free(ptr: *const anyopaque, secure: c_int) void; +extern fn ___tracy_emit_memory_free_callstack(ptr: *const anyopaque, depth: c_int, secure: c_int) void; +extern fn ___tracy_emit_memory_alloc_named(ptr: *const anyopaque, size: usize, secure: c_int, name: [*:0]const u8) void; +extern fn ___tracy_emit_memory_alloc_callstack_named(ptr: *const anyopaque, size: usize, depth: c_int, secure: c_int, name: [*:0]const u8) void; +extern fn ___tracy_emit_memory_free_named(ptr: *const anyopaque, secure: c_int, name: [*:0]const u8) void; +extern fn ___tracy_emit_memory_free_callstack_named(ptr: *const anyopaque, depth: c_int, secure: c_int, name: [*:0]const u8) void; +extern fn ___tracy_emit_message(txt: [*]const u8, size: usize, callstack: c_int) void; +extern fn ___tracy_emit_messageL(txt: [*:0]const u8, callstack: c_int) void; +extern fn ___tracy_emit_messageC(txt: [*]const u8, size: usize, color: u32, callstack: c_int) void; +extern fn ___tracy_emit_messageLC(txt: [*:0]const u8, color: u32, callstack: c_int) void; +extern fn ___tracy_emit_frame_mark(name: ?[*:0]const u8) void; + +const ___tracy_source_location_data = extern struct { + name: ?[*:0]const u8, + function: [*:0]const u8, + file: [*:0]const u8, + line: u32, + color: u32, +}; diff --git a/src/archive/main.zig b/src/archive/main.zig new file mode 100644 index 000000000000..3da9aa052299 --- /dev/null +++ b/src/archive/main.zig @@ -0,0 +1,649 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const build_options = @import("build_options"); +const trace = @import("tracy.zig").trace; +const fs = std.fs; +const io = std.io; +const mem = std.mem; +const logger = std.log.scoped(.archive_main); +const process = std.process; + +pub const Archive = @import("archive/Archive.zig"); + +pub const zar_overview = + \\Zig Archiver + \\ + \\Usage: + \\ zar [options] [-][modifiers] [relpos] [count] [files] + \\ + \\Description: + \\ The Zig Archiver is the self-hosted implementation of the ar utility + \\ function that originated from Unix, created as a drop-in replacement for + \\ llvm's iplementation of ar (llvm ar). + \\ + \\ For more information on archivers and their usage, see: + \\ - https://en.wikipedia.org/wiki/Ar_(Unix) + \\ - https://www.freebsd.org/cgi/man.cgi?query=ar&sektion=1 + \\ - https://llvm.org/docs/CommandGuide/llvm-ar.html + \\ + \\Options: + \\ --format= + \\ Can be default, gnu, darwin or bsd. This determines the format used to + \\ serialise an archive, this is ignored when parsing archives as type + \\ there is always inferred. When creating an archive the host machine is + \\ used to infer if one is not specified. + \\ --thin + \\ Create and modify thin archives. By default archives aren't thin. Thin + \\ archives are converted to regular achives when modified without this + \\ option. + \\ --version + \\ Print program version details and exit. + \\ -h, --help + \\ Print (this) help text and exit. + \\ + \\Ignored for compatability: + \\ --plugin= + \\ + \\Operations: + \\ r - replace/insert [files] in , create archive if it does not exist. + \\ d - delete [files] from . + \\ m - move [files] in . + \\ p - print contents of files in . + \\ q - quick append [files] to . + \\ s - act as ranlib. + \\ t - display filenames in . + \\ x - extract [files] from . + \\ S - show symbols in the . + \\ + \\Modifiers: + \\ a - Put [files] after the archive member named by [relpos]. (r, m) + \\ b - Put [files] before the archive member named by [relpos]. (r, m) + \\ c - Disable creation warning if inserting files to new archive. (r, q) + \\ D - Use zero for timestamps, GIDs and UIDs in archived files (enabled by + \\ default). (r, q, s) + \\ h - Display this help text and exit. (alias for --help) + \\ i - Put [files] before the archive member named by [relpos]. (r, m) + \\ l - Ignored for compatability. + \\ L - When quick appending and archive to an archive, append members. (q) + \\ N - Delete the [count]th instance of duplicate member with [name]. (d) + \\ o - Preserve the archived modification times on extraction. (x) + \\ O - Display member offsets inside the archive. (?) + \\ P - Use full paths when matching member names. Default for thin archives. + \\ r - Create sorted symbol table. + \\ R - Do not create sorted symbol table. + \\ s - Generate symbol table, enabled by default. (i.e. as if using ranlib) + \\ S - Do not generate symbol table. + \\ T - Create and modify thin archives. (alias for --thin) + \\ u - Only update archive contents if [files] have more recent timestamps + \\ than it. + \\ U - Use real timestamps, GIDS and UIDs for archived files. + \\ v - Print verbose output, depending on opertion: + \\ S: show file names that symbols belong to. + \\ V - Display the version and exit. + \\ + \\Note, in the case of conflicting modifiers, the last one listed always takes + \\precedence. + \\ +; + +pub const ranlib_overview = + \\Zig Ranlib + \\ + \\Usage: zar ranlib [options] -[modifiers] + \\ + \\Options: + \\ -v, --version + \\ Print program version details and exit. + \\ -h, --help + \\ Print (this) help text and exit. + \\ + \\Modifiers: + \\ D - Use zero for timestamps, GIDs and UIDs in archived files (enabled by default). + \\ U - Use real timestamps, GIDS and UIDs for archived files. + \\ + \\Note, in the case of conflicting modifiers, the last one listed always takes precedence. + \\ +; + +pub const zar_error_prefix = "\x1B[1;31merror\x1B[0m: "; +pub const ranlib_error_prefix = "\x1B[1;31merror\x1B[0m: "; + +pub const full_zar_error_prefix = zar_overview ++ "\n" ++ zar_error_prefix; +pub const full_ranlib_error_prefix = ranlib_overview ++ "\n" ++ ranlib_error_prefix; + +const version = "0.0.1"; //build_options.version; + +const version_details = + \\zar {s} (https://github.com/moosichu/zar): + \\ zar version {s} + \\ {s} build + \\ default archive type: {s} + \\ host: {s}-{s}-{s} + \\ +; + +pub const full_logging = builtin.mode == .Debug; +pub const debug_errors = builtin.mode == .Debug; +pub const log_level: std.log.Level = if (full_logging) .debug else .warn; + +pub const Mode = enum { ar, ranlib }; + +pub var mode: Mode = .ar; + +fn printHelp(stdout: fs.File.Writer) void { + _ = switch (mode) { + .ar => stdout.print(zar_overview, .{}), + .ranlib => stdout.print(ranlib_overview, .{}), + } catch {}; +} + +fn printVersion(stdout: fs.File.Writer) void { + const target = builtin.target; + const default_archive_type = @tagName(Archive.getDefaultArchiveTypeFromHost()); + stdout.print(version_details, .{ @tagName(mode), version, @tagName(builtin.mode), default_archive_type, @tagName(target.cpu.arch), @tagName(target.os.tag), @tagName(target.abi) }) catch {}; +} + +// For the release standalone program, we just want to display concise errors +// to the end-user, but during development we want them to show up as part of +// the regular logging flow. +pub fn log( + comptime level: std.log.Level, + comptime scope: @TypeOf(.EnumLiteral), + comptime format: []const u8, + args: anytype, +) void { + const scope_prefix = "(" ++ @tagName(scope) ++ "): "; + + const prefix = comptime level.asText() ++ scope_prefix; + + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + const stderr = std.io.getStdErr().writer(); + if (full_logging) { + nosuspend stderr.print(prefix ++ format ++ "\n", args) catch return; + } else { + if (mode == .ranlib) { + nosuspend stderr.print(ranlib_error_prefix ++ format ++ "\n", args) catch return; + } else { + nosuspend stderr.print(zar_error_prefix ++ format ++ "\n", args) catch return; + } + } +} + +// We want to show program zar_overview if invalid argument combination is passed +// through to the program be the user, we do this often enough that it's worth +// having a procedure for it. +fn printArgumentError(comptime errorString: []const u8, args: anytype) void { + if (full_logging) { + logger.err(errorString, args); + } else { + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + const stderr = std.io.getStdErr().writer(); + if (mode == .ranlib) { + nosuspend stderr.print(full_ranlib_error_prefix ++ errorString ++ "\n", args) catch return; + } else { + nosuspend stderr.print(full_zar_error_prefix ++ errorString ++ "\n", args) catch return; + } + } +} + +fn checkOptionalArgsBounds( + args: []const []const u8, + index: u32, + comptime missing_argument: []const u8, + comptime for_modifier: []const u8, +) bool { + if (index >= args.len or args[index].len < 1 or args[index][0] == '-') { + printArgumentError(missing_argument ++ " must be provided for " ++ for_modifier ++ " modifier.", .{}); + return false; + } + return true; +} + +fn openOrCreateFile(cwd: fs.Dir, archive_path: []const u8, print_creation_warning: bool, created: *bool) !fs.File { + created.* = false; + const open_file_handle = cwd.openFile(archive_path, .{ .mode = .read_write }) catch |err| switch (err) { + error.FileNotFound => { + created.* = true; + if (print_creation_warning) { + logger.warn("Creating new archive as none exists at path provided\n", .{}); + } + const create_file_handle = try Archive.handleFileIoError(.creating, archive_path, cwd.createFile(archive_path, .{ .read = true })); + return create_file_handle; + }, + else => { + return Archive.printFileIoError(.opening, archive_path, err); + }, + }; + return open_file_handle; +} + +fn processModifier(modifier_char: u8, modifiers: *Archive.Modifiers) bool { + // TODO: make sure modifers are only allowed for their supported mode of + // operation! + switch (mode) { + .ar => switch (modifier_char) { + 'a' => modifiers.move_setting = .{ .before = null }, + 'b', 'i' => modifiers.move_setting = .{ .after = null }, + 'c' => modifiers.create = true, + 'D' => modifiers.use_real_timestamps_and_ids = false, + 'h' => modifiers.help = true, + 'l' => {}, // ignored for compatability + 'L' => modifiers.quick_append_members = true, + 'N' => modifiers.instance_to_delete = 0, + 'o' => modifiers.preserve_original_dates = true, + 'O' => unreachable, // TODO: implement this! + 'P' => modifiers.use_full_paths_when_matching = true, + 'r' => modifiers.sort_symbol_table = .set_true, + 'R' => modifiers.sort_symbol_table = .set_false, + 's' => modifiers.build_symbol_table = true, + 'S' => modifiers.build_symbol_table = false, + 'T' => modifiers.thin_archives = true, + 'u' => modifiers.update_only = true, + 'U' => modifiers.use_real_timestamps_and_ids = true, + 'v' => modifiers.verbose = true, + 'V' => modifiers.show_version = true, + // TODO: Ensure all modifiers we need to handle are handled! + else => { + printArgumentError("'{c}' is not a valid modifier.", .{modifier_char}); + return false; + }, + }, + .ranlib => switch (modifier_char) { + 'U' => modifiers.use_real_timestamps_and_ids = true, + 'D' => modifiers.use_real_timestamps_and_ids = false, + // ranlib will always priorities which one of these modifiers comes first + // this is in contrast to ar which always shows help over version if it's present + // matching this specific behaviour may be overkill, but would rather + // aggressively match llvm on this front as much as possible by default + // TODO: write tests for these cases of ordering modifiers + 'v' => if (!modifiers.help) { + modifiers.show_version = true; + }, + 'h' => if (!modifiers.show_version) { + modifiers.help = true; + }, + else => { + printArgumentError("'{c}' is not a valid option.", .{modifier_char}); + return false; + }, + }, + } + return true; +} + +// pub fn main() anyerror!void { +// const tracy = trace(@src()); +// defer tracy.end(); + +// var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); +// defer arena.deinit(); + +// var allocator = arena.allocator(); +// const args = process.argsAlloc(allocator) catch |err| if (debug_errors) { +// return err; +// } else { +// logger.err("Unknown error occured.", .{}); +// return; +// }; + +// const cwd = fs.cwd(); +// archiveMain(cwd, allocator, args) catch |err| { +// handleArchiveError(err) catch |e| if (debug_errors) { +// return e; +// } else { +// logger.err("Unknown error occured.", .{}); +// }; +// }; +// } + +pub fn linkAsArchive(archive_path: []const u8, file_names_ptr: []const [*:0]const u8, archive_type: Archive.ArchiveType) !void { + var modifiers: Archive.Modifiers = .{}; + modifiers.build_symbol_table = true; + modifiers.create = true; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + + var allocator = arena.allocator(); + + const cwd = fs.cwd(); + + var created = false; + const file = try openOrCreateFile(cwd, archive_path, !modifiers.create, &created); + defer file.close(); + + var files = std.ArrayList([]const u8).init(allocator); + defer files.deinit(); + for (file_names_ptr) |file_name_z| { + const file_name = file_name_z[0..std.mem.len(file_name_z)]; + try files.append(file_name); + } + + var archive = try Archive.create(cwd, file, archive_path, archive_type, modifiers, created); + try archive.parse(allocator); + try archive.insertFiles(allocator, files.items); + try archive.finalize(allocator); +} + +pub fn archiveMain(cwd: fs.Dir, allocator: anytype, args: []const []const u8) (Archive.UnhandledError || Archive.HandledError)!void { + // const tracy_zone = ztracy.zoneNC(@src(), "ArchiveMain", 0x00_ff_00_00, 1); + // defer tracy_zone.end(); + + // skip the executable name + const stdout = io.getStdOut().writer(); + const stderr = io.getStdErr().writer(); + + var arg_index: u32 = 1; + + var archive_type = Archive.ArchiveType.ambiguous; + + // Check if we are in ranlib mode! + mode = mode: { + if (arg_index < args.len) { + if (mem.eql(u8, "ranlib", args[arg_index])) { + arg_index = arg_index + 1; + break :mode .ranlib; + } + } + break :mode .ar; + }; + + var modifiers: Archive.Modifiers = .{}; + var operation: Archive.Operation = if (mode == .ranlib) .ranlib else .undefined; + var found_archive_path: ?[]const u8 = null; + var files = std.ArrayList([]const u8).init(allocator); + defer files.deinit(); + while (arg_index < args.len) { + defer arg_index += 1; + + if (arg_index >= args.len) { + break; + } + + var current_arg = args[arg_index]; + { + const format_string_prefix = "--format="; + const plugin_string_prefix = "--plugin="; + const help_string = "--help"; + const version_string = "--version"; + const thin_string = "--thin"; + if (mode == .ar and mem.startsWith(u8, current_arg, format_string_prefix)) { + const format_string = current_arg[format_string_prefix.len..]; + if (mem.eql(u8, format_string, "default")) { + archive_type = Archive.ArchiveType.ambiguous; + } else if (mem.eql(u8, format_string, "bsd")) { + archive_type = .bsd; + } else if (mem.eql(u8, format_string, "darwin")) { + archive_type = .darwin; + } else if (mem.eql(u8, format_string, "gnu")) { + archive_type = .gnu; + } else { + logger.err("Invalid format {s}", .{format_string}); + return Archive.HandledError.UnknownFormat; + } + continue; + } else if (mode == .ar and mem.startsWith(u8, current_arg, plugin_string_prefix)) { + // Ignored for compatability! + continue; + } else if (mode == .ar and mem.eql(u8, current_arg, thin_string)) { + modifiers.thin_archives = true; + continue; + } else if (current_arg.len == 0) { + continue; + } else if (mem.eql(u8, current_arg, help_string)) { + printHelp(stdout); + return; + } else if (mem.eql(u8, current_arg, version_string)) { + printVersion(stdout); + return; + } + } + + var modifier_slice: []const u8 = ""; + if (operation == .undefined) { + operation = operation: { + const operation_slice = slice: { + // the operation may start with a hyphen - so slice it! + var arg_slice = current_arg[0..]; + if (arg_slice[0] == '-') { + if (arg_slice.len == 1) { + printArgumentError("A valid operation must be provided - only hyphen found.", .{}); + return; + } + + arg_slice = arg_slice[1..]; + } + + break :slice arg_slice; + }; + + modifier_slice = operation_slice[1..]; + + // Process Operation + switch (operation_slice[0]) { + 'r' => break :operation .insert, + 'd' => break :operation .delete, + 'm' => break :operation .move, + 'p' => break :operation .print_contents, + 'q' => break :operation .quick_append, + 's' => break :operation .ranlib, + 't' => break :operation .print_names, + 'x' => break :operation .extract, + 'S' => break :operation .print_symbols, + else => { + printArgumentError("'{c}' is not a valid operation.", .{operation_slice[0]}); + return; + }, + } + }; + } else if (current_arg[0] == '-') { + if (current_arg.len > 1) { + modifier_slice = current_arg[1..]; + } + } else if (found_archive_path == null) { + found_archive_path = current_arg; + continue; + } else { + try files.append(current_arg); + continue; + } + + for (modifier_slice) |modifier_char| { + if (!processModifier(modifier_char, &modifiers)) { + return; + } + } + + // TODO: Figure out how to deal with multiple of these following settings! + + // Process [relpos] if needed! + switch (modifiers.move_setting) { + .end => {}, // do nothing! + .before => |before| if (before == null) { + arg_index += 1; + if (!checkOptionalArgsBounds(args, arg_index, "A [relpos]", "a, b or i")) return; + modifiers.move_setting.before = args[arg_index]; + }, + .after => |after| if (after == null) { + arg_index += 1; + if (!checkOptionalArgsBounds(args, arg_index, "A [relpos]", "a, b or i")) return; + modifiers.move_setting.after = args[arg_index]; + }, + } + + // Process [count] if needed! + if (modifiers.instance_to_delete == 0) { + arg_index += 1; + if (!checkOptionalArgsBounds(args, arg_index, "An [count]", "N")) return; + modifiers.instance_to_delete = std.fmt.parseUnsigned(u32, args[arg_index], 10) catch { + logger.err("[count] must be a positive number, received '{s}'.", .{args[arg_index]}); + return; + }; + } + } + + if (modifiers.help) { + printHelp(stdout); + return; + } + + if (modifiers.show_version) { + printVersion(stdout); + return; + } + + if (modifiers.move_setting != .end) { + // TODO: Implement this! + return error.TODO; + } + + if (modifiers.instance_to_delete > 1) { + // TODO: Implement this! + return error.TODO; + } + + if (modifiers.use_full_paths_when_matching) { + // TODO: Implement this! + return error.TODO; + } + + if (modifiers.thin_archives) { + // TODO: support thing archives! + return error.TODO; + } + + if (operation == .undefined) { + logger.err("An operation must be provided.", .{}); + return; + } + + const archive_path = archive_path: { + if (found_archive_path) |archive_path| { + break :archive_path archive_path; + } + + logger.err("An archive must be provided.", .{}); + return; + }; + + switch (operation) { + .insert => { + var created = false; + const file = try openOrCreateFile(cwd, archive_path, !modifiers.create, &created); + defer file.close(); + + var archive = try Archive.create(cwd, file, archive_path, archive_type, modifiers, created); + try archive.parse(allocator); + try archive.insertFiles(allocator, files.items); + try archive.finalize(allocator); + }, + .delete => { + var created = false; + const file = try openOrCreateFile(cwd, archive_path, !modifiers.create, &created); + defer file.close(); + + var archive = try Archive.create(cwd, file, archive_path, archive_type, modifiers, created); + try archive.parse(allocator); + try archive.deleteFiles(files.items); + try archive.finalize(allocator); + }, + .print_names => { + const file = try Archive.handleFileIoError(.opening, archive_path, cwd.openFile(archive_path, .{})); + defer file.close(); + + var archive = try Archive.create(cwd, file, archive_path, archive_type, modifiers, false); + try archive.parse(allocator); + for (archive.files.items) |parsed_file| { + stdout.print("{s}\n", .{parsed_file.name}) catch {}; + } + }, + .print_contents => { + const file = try Archive.handleFileIoError(.opening, archive_path, cwd.openFile(archive_path, .{})); + defer file.close(); + + var archive = try Archive.create(cwd, file, archive_path, archive_type, modifiers, false); + try archive.parse(allocator); + for (archive.files.items) |parsed_file| { + parsed_file.contents.write(stdout, stderr) catch {}; + } + }, + .print_symbols => { + const file = try Archive.handleFileIoError(.opening, archive_path, cwd.openFile(archive_path, .{})); + defer file.close(); + + var archive = try Archive.create(cwd, file, archive_path, archive_type, modifiers, false); + try archive.parse(allocator); + for (archive.symbols.items) |symbol| { + if (modifiers.verbose) { + if (symbol.file_index == Archive.invalid_file_index) { + stdout.print("?: {s}\n", .{symbol.name}) catch {}; + } else { + stdout.print("{s}: {s}\n", .{ archive.files.items[symbol.file_index].name, symbol.name }) catch {}; + } + } else { + stdout.print("{s}\n", .{symbol.name}) catch {}; + } + } + }, + .move => { + var created = false; + const file = try openOrCreateFile(cwd, archive_path, !modifiers.create, &created); + defer file.close(); + + var archive = try Archive.create(cwd, file, archive_path, archive_type, modifiers, created); + try archive.parse(allocator); + try archive.moveFiles(files.items); + try archive.finalize(allocator); + }, + .quick_append => { + logger.err("quick append still needs to be implemented!\n", .{}); + // TODO: ensure modifiers.quick_append_members is respected! + return error.TODO; + }, + .ranlib => { + const file = try Archive.handleFileIoError(.opening, archive_path, cwd.openFile(archive_path, .{ .mode = .read_write })); + defer file.close(); + var archive = try Archive.create(cwd, file, archive_path, archive_type, modifiers, false); + try archive.parse(allocator); + try archive.finalize(allocator); + }, + .extract => { + logger.err("extract still needs to be implemented!\n", .{}); + if (modifiers.preserve_original_dates) { + return error.TODO; // make sure this is implemented! + } + return error.TODO; + }, + .undefined => { + // This case is already handled earlier! + unreachable; + }, + } +} + +fn handleArchiveError(err: (Archive.HandledError || Archive.UnhandledError)) !void { + { + // we can ignore these errors because we log context specific + // information about them at the time that they are thrown. + const fields = comptime std.meta.fields(Archive.HandledError); + inline for (fields) |field| { + if (@field(Archive.HandledError, field.name) == err) { + return; + } + } + } + + const unhandled_err: Archive.UnhandledError = @errorCast(err); + + switch (unhandled_err) { + // These are errors which already have appropraite log messages printed + Archive.ParseError.NotArchive => logger.err("Provided file is not an archive.", .{}), + Archive.ParseError.MalformedArchive, Archive.ParseError.Overflow, Archive.ParseError.InvalidCharacter => logger.err("Malformed archive provided.", .{}), + error.OutOfMemory => logger.err("Program ran out of memory.", .{}), + error.TODO => logger.err("Unimplemented feature encountered (TODO error)", .{}), + } + + if (debug_errors) return err; +} diff --git a/src/archive/test.zig b/src/archive/test.zig new file mode 100644 index 000000000000..10109026738d --- /dev/null +++ b/src/archive/test.zig @@ -0,0 +1,620 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const fs = std.fs; +const io = std.io; +const mem = std.mem; +const testing = std.testing; +const logger = std.log.scoped(.tests); +const trace = @import("tracy.zig").trace; +const Allocator = std.mem.Allocator; + +const Archive = @import("archive/Archive.zig"); +const main = @import("main.zig"); + +const path_to_zar = "../../../zig-out/bin/zar"; + +const llvm_ar_archive_name = "llvm-ar-archive.a"; +const zig_ar_archive_name = "zig-ar-archive.a"; + +const no_files = [_][]const u8{}; +const no_symbols = [_][][]const u8{}; +const no_dir = "test/data/none"; + +// Testing TODOs: +// - Create symbol comparison tests (generate source files procedurally) +// - Add testing matrices for testing different combinations of arguments & modifiers +// - Create end-to-end tests that check stdout of parsing functionality (not just archive generation) +// Including variatns of modifiers +// - Create end-to-end tests that check extracted files are the same +// - Create "stress" tests that go beyond the basic tests & auto-generate a massive amount of +// archive input that can be tested against. +// - Test the failure cases (and see how we handle them) +// - Add parsing tests using archives created by native archivers on appropriate platforms +// - Fuzz test +// - Test weird combinations and try to match llvm-ar output +// - Test multiple os/format combinations (i.e. bsd style archives) +// - Test ranlib functionality +// - Test bad inputs/ +// - Test performance +// - Don't redo work between tests (compiling same files, running llvm ar multiple times). + +// Allows us to invoke zar as a program, just to really confirm it works +// end-to-end. +const always_invoke_zar_as_child_process = false; + +test "Test Archive Text Basic" { + const test1_dir = "test/data/test1"; + const test1_names = [_][]const u8{ "input1.txt", "input2.txt" }; + const test1_symbols = no_symbols; + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + try doStandardTests(arena.allocator(), test1_dir, &test1_names, &test1_symbols); +} + +test "Test Archive Text With Long Filenames" { + // Due to the fixed-size limits for filenames in the standard ar format, + // this tests that the different ar-type specific extensions for dealing + // with that properly work. + const test2_dir = "test/data/test2"; + const test2_names = [_][]const u8{ "input1.txt", "input2.txt", "input3_that_is_also_a_much_longer_file_name.txt", "input4_that_is_also_a_much_longer_file_name.txt" }; + const test2_symbols = no_symbols; + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + try doStandardTests(arena.allocator(), test2_dir, &test2_names, &test2_symbols); +} + +test "Test Archive With Symbols Basic" { + const test4_names = [_][]const u8{"input1.o"}; + const test4_symbols = [_][]const []const u8{ + &[_][]const u8{ "input1_symbol1", "input1_symbol2" }, + }; + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + try doStandardTests(arena.allocator(), no_dir, &test4_names, &test4_symbols); +} + +test "Test Archive With Long Names And Symbols" { + const test5_names = [_][]const u8{ "input1.o", "input2.o", "input3_that_is_also_a_much_longer_file_name.o" }; + const test5_symbols = [_][]const []const u8{ + &[_][]const u8{ "input1_symbol1", "input1_symbol2" }, + &[_][]const u8{ "input2_symbol1", "input2_symbol2_that_is_also_longer_symbol", "input2_symbol3" }, + &[_][]const u8{ "input3_that_is_also_a_much_longer_file_name_symbol1", "input3_symbol2_that_is_also_longer_symbol", "input3_symbol3_that_is_also_longer_symbol" }, + }; + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + try doStandardTests(arena.allocator(), no_dir, &test5_names, &test5_symbols); +} + +test "Test Archive Stress Test" { + // Generate 55 different files with an arbitrary number of symbols + const test6_filecount = 55; + const test6_symcount = 15; + var test6_names: [test6_filecount][]u8 = undefined; + var test6_symbols: [test6_filecount][][]u8 = undefined; + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + try initialiseTestData(arena.allocator(), &test6_names, &test6_symbols, test6_symcount); + try doStandardTests(arena.allocator(), no_dir, &test6_names, &test6_symbols); +} + +test "Test Archive Sorted" { + // Confirm that our archives default files & their symbols in the correct way + // for each target. + const test_sort_names = [_][]const u8{ "dddd.o", "eeee.o", "ccccc.o", "aaaaaaaa.o", "aa.o", "cccc.o", "aaaa.o", "bbbb.o", "cc.o", "bb.o", "zz.o" }; + const test_sort = [_][]const []const u8{ + &[_][]const u8{ "ddd", "aaa" }, + &[_][]const u8{ "cccc", "ddd", "aaaa" }, + &[_][]const u8{ "z", "aa", "a" }, + &[_][]const u8{ "agsg", "ssss", "aaaa" }, + &[_][]const u8{ "_1_2_3", "__1", "_00000" }, + &[_][]const u8{ "AA", "aa", "BB" }, + &[_][]const u8{ "aa", "AA", "BB" }, + &[_][]const u8{ "BB", "AA", "aa" }, + &[_][]const u8{ "_123", "_22", "_12" }, + &[_][]const u8{ "bB", "aB", "cB" }, + &[_][]const u8{ "_11", "_12", "_13" }, + }; + // TODO: remove redundancy maybe by excluding parsing component of this test? + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + try doStandardTests(arena.allocator(), no_dir, &test_sort_names, &test_sort); +} + +test "Test Argument Errors" { + if (builtin.target.os.tag == .windows) { + return; + } + const allocator = std.testing.allocator; + var test_dir_info = try TestDirInfo.getInfo(); + defer test_dir_info.cleanup(); + + var argv = std.ArrayList([]const u8).init(allocator); + defer argv.deinit(); + try argv.append(path_to_zar); + + { + try argv.resize(1); + const expected_out: ExpectedOut = .{ + .stderr = "error(archive_main): An operation must be provided.\n", + }; + + try invokeZar(allocator, argv.items, test_dir_info, expected_out); + } + + { + try argv.resize(1); + try argv.append("j"); + const expected_out: ExpectedOut = .{ + .stderr = "error(archive_main): 'j' is not a valid operation.\n", + }; + + try invokeZar(allocator, argv.items, test_dir_info, expected_out); + } + + { + try argv.resize(1); + try argv.append("rj"); + const expected_out: ExpectedOut = .{ + .stderr = "error(archive_main): 'j' is not a valid modifier.\n", + }; + + try invokeZar(allocator, argv.items, test_dir_info, expected_out); + } +} + +fn initialiseTestData(allocator: Allocator, file_names: [][]const u8, symbol_names: [][][]const u8, symbol_count: u32) !void { + for (file_names, 0..) |_, index| { + file_names[index] = try std.fmt.allocPrint(allocator, "index_{}.o", .{index}); + } + for (symbol_names, 0..) |_, file_index| { + symbol_names[file_index] = try allocator.alloc([]u8, symbol_count); + for (symbol_names[file_index], 0..) |_, symbol_index| { + symbol_names[file_index][symbol_index] = try std.fmt.allocPrint(allocator, "symbol_{}_file_{}", .{ symbol_index, file_index }); + } + } + return; +} + +const targets = result: { + const os_fields = std.meta.fields(OperatingSystem); + const arch_fields = std.meta.fields(Architecture); + comptime var aggregator: [os_fields.len * arch_fields.len]Target = undefined; + comptime var target_index = 0; + inline for (os_fields) |os_field| { + inline for (arch_fields) |arch_field| { + aggregator[target_index] = .{ + .architecture = @as(Architecture, @enumFromInt(arch_field.value)), + .operating_system = @as(OperatingSystem, @enumFromInt(os_field.value)), + }; + target_index += 1; + } + } + break :result aggregator; +}; + +const Target = struct { + architecture: Architecture, + operating_system: OperatingSystem, + + fn targetToArgument(comptime target: Target) []const u8 { + return @tagName(target.architecture) ++ "-" ++ @tagName(target.operating_system); + } +}; + +const OperatingSystem = enum { + linux, + macos, + freebsd, + // windows, + + fn toDefaultLlvmFormat(operating_system: OperatingSystem) LlvmFormat { + return switch (operating_system) { + .linux => .gnu, + .macos => .darwin, + .freebsd => .gnu, + }; + } +}; + +const Architecture = enum { + aarch64, + x86_64, +}; + +const llvm_formats = result: { + const fields = std.meta.fields(LlvmFormat); + comptime var aggregator: [fields.len]LlvmFormat = undefined; + inline for (fields, 0..) |field, field_index| { + aggregator[field_index] = @as(LlvmFormat, @enumFromInt(field.value)); + } + break :result aggregator; +}; + +const LlvmFormat = enum { + gnu, + bsd, + darwin, + implicit, + + fn llvmFormatToArgument(comptime format: LlvmFormat) []const u8 { + switch (format) { + .gnu => return "--format=gnu", + .bsd => return "--format=bsd", + .darwin => return "--format=darwin", + .implicit => return "", + } + } +}; + +const TestDirInfo = struct { + tmp_dir: std.testing.TmpDir, + cwd: []const u8, + + pub fn getInfo() !TestDirInfo { + var result: TestDirInfo = .{ + .tmp_dir = std.testing.tmpDir(.{}), + .cwd = undefined, + }; + result.cwd = try std.fs.path.join(std.testing.allocator, &[_][]const u8{ + "zig-cache", "tmp", &result.tmp_dir.sub_path, + }); + return result; + } + + pub fn cleanup(self: *TestDirInfo) void { + self.tmp_dir.cleanup(); + std.testing.allocator.free(self.cwd); + } +}; + +pub fn doStandardTests(framework_allocator: Allocator, comptime test_dir_path: []const u8, file_names: []const []const u8, symbol_names: []const []const []const u8) !void { + const tracy = trace(@src()); + defer tracy.end(); + const operation = "rc"; + + inline for (targets) |target| { + var test_dir_info = try TestDirInfo.getInfo(); + // if a test is going to fail anyway, this is a useful way to debug it for now.. + var cancel_cleanup = false; + defer if (!cancel_cleanup) test_dir_info.cleanup(); + errdefer { + cancel_cleanup = true; + logger.err("Failed to do archiving operation with files for target: {s}", .{target.targetToArgument()}); + } + + // Create an archive with llvm ar & zar and confirm that the outputs match + // byte-for-byte. + try copyAssetsToTestDirectory(test_dir_path, file_names, test_dir_info); + const llvm_format = comptime target.operating_system.toDefaultLlvmFormat(); + try generateCompiledFilesWithSymbols(framework_allocator, target, file_names, symbol_names, test_dir_info); + { + errdefer { + logger.err("Tests failed with explicitly provided archive format: {} ", .{llvm_format}); + } + // Create an archive explicitly with the format for the target operating system + try doLlvmArchiveOperation(llvm_format, operation, file_names, test_dir_info); + try testParsingOfLlvmGeneratedArchive(target, framework_allocator, llvm_format, file_names, symbol_names, test_dir_info); + try testArchiveCreation(llvm_format, file_names, test_dir_info); + try testSymbolStrippingAndRanlib(test_dir_info); + try test_dir_info.tmp_dir.dir.deleteFile(zig_ar_archive_name); + try test_dir_info.tmp_dir.dir.deleteFile(llvm_ar_archive_name); + } + + { + errdefer { + logger.err("Tests failed with implict archive format", .{}); + } + // Create an archive implcitly with the format for the target operating system + try doLlvmArchiveOperation(.implicit, operation, file_names, test_dir_info); + try testParsingOfLlvmGeneratedArchive(target, framework_allocator, .implicit, file_names, symbol_names, test_dir_info); + try testArchiveCreation(.implicit, file_names, test_dir_info); + try testSymbolStrippingAndRanlib(test_dir_info); + try test_dir_info.tmp_dir.dir.deleteFile(zig_ar_archive_name); + try test_dir_info.tmp_dir.dir.deleteFile(llvm_ar_archive_name); + } + } +} + +fn testSymbolStrippingAndRanlib(test_dir_info: TestDirInfo) !void { + const tracy = trace(@src()); + defer tracy.end(); + { + errdefer { + logger.err("Failed symbol stripping", .{}); + } + const operation = "rS"; + try doZarArchiveOperation(.implicit, operation, &no_files, test_dir_info); + try doLlvmArchiveOperation(.implicit, operation, &no_files, test_dir_info); + + try compareGeneratedArchives(test_dir_info); + } + + { + errdefer { + logger.err("Failed acting as ranlib", .{}); + } + const operation = "s"; + try doZarArchiveOperation(.implicit, operation, &no_files, test_dir_info); + try doLlvmArchiveOperation(.implicit, operation, &no_files, test_dir_info); + + try compareGeneratedArchives(test_dir_info); + } +} + +fn testArchiveCreation(comptime format: LlvmFormat, file_names: []const []const u8, test_dir_info: TestDirInfo) !void { + const tracy = trace(@src()); + defer tracy.end(); + + errdefer { + logger.err("Failed create archive with zar that matched llvm with target format {}", .{format}); + } + const operation = "rc"; + try doZarArchiveOperation(format, operation, file_names, test_dir_info); + try compareGeneratedArchives(test_dir_info); +} + +fn testParsingOfLlvmGeneratedArchive(target: Target, framework_allocator: Allocator, comptime format: LlvmFormat, file_names: []const []const u8, symbol_names: []const []const []const u8, test_dir_info: TestDirInfo) !void { + errdefer { + logger.err("Failed to get zar to parse file generated with the format {}", .{format}); + } + + try testArchiveParsing(target, framework_allocator, test_dir_info, file_names, symbol_names); +} + +fn compareGeneratedArchives(test_dir_info: TestDirInfo) !void { + const tracy = trace(@src()); + defer tracy.end(); + const allocator = std.testing.allocator; + const llvm_ar_file_handle = try test_dir_info.tmp_dir.dir.openFile(llvm_ar_archive_name, .{ .mode = .read_only }); + defer llvm_ar_file_handle.close(); + const zig_ar_file_handle = try test_dir_info.tmp_dir.dir.openFile(zig_ar_archive_name, .{ .mode = .read_only }); + defer zig_ar_file_handle.close(); + + const llvm_ar_stat = try llvm_ar_file_handle.stat(); + const zig_ar_stat = try zig_ar_file_handle.stat(); + + try testing.expectEqual(llvm_ar_stat.size, zig_ar_stat.size); + + const llvm_ar_buffer = try allocator.alloc(u8, llvm_ar_stat.size); + const zig_ar_buffer = try allocator.alloc(u8, zig_ar_stat.size); + defer allocator.free(llvm_ar_buffer); + defer allocator.free(zig_ar_buffer); + + { + const llvm_ar_read = try llvm_ar_file_handle.preadAll(llvm_ar_buffer, 0); + try testing.expectEqual(llvm_ar_read, llvm_ar_stat.size); + } + + { + const zig_ar_read = try zig_ar_file_handle.preadAll(zig_ar_buffer, 0); + try testing.expectEqual(zig_ar_read, zig_ar_stat.size); + } + + for (llvm_ar_buffer, 0..) |llvm_ar_byte, index| { + const zig_ar_byte = zig_ar_buffer[index]; + try testing.expectEqual(llvm_ar_byte, zig_ar_byte); + } +} + +fn testArchiveParsing(target: Target, framework_allocator: Allocator, test_dir_info: TestDirInfo, file_names: []const []const u8, symbol_names: []const []const []const u8) !void { + const tracy = trace(@src()); + defer tracy.end(); + const test_dir = test_dir_info.tmp_dir.dir; + + const archive_file = try test_dir.openFile(llvm_ar_archive_name, .{ .mode = .read_only }); + defer archive_file.close(); + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var testing_allocator = arena.allocator(); + + var archive = try Archive.create(test_dir, archive_file, llvm_ar_archive_name, Archive.ArchiveType.ambiguous, .{}, false); + try archive.parse(testing_allocator); + + var memory_buffer = try framework_allocator.alloc(u8, 1024 * 1024); + defer framework_allocator.free(memory_buffer); + for (file_names, 0..) |file_name, index| { + try testing.expectEqualStrings(file_name, archive.files.items[index].name); + const file = try test_dir.openFile(file_name, .{}); + defer file.close(); + + const reader = file.reader(); + + var current_start_pos: u64 = 0; + while (true) { + const num_read = try reader.read(memory_buffer); + if (num_read == 0) { + break; + } + try testing.expectEqualStrings(archive.files.items[index].contents.bytes[current_start_pos .. current_start_pos + num_read], memory_buffer[0..num_read]); + current_start_pos = current_start_pos + num_read; + } + } + + if (target.operating_system == .macos) { + // TODO: darwin files are sorted by default, we need to make sure our + // test can account for this! + return; + } + + var current_index = @as(u32, 0); + for (symbol_names, 0..) |symbol_names_in_file, file_index| { + for (symbol_names_in_file) |symbol_name| { + const parsed_symbol = archive.symbols.items[current_index]; + var parsed_symbol_name = parsed_symbol.name; + // darwin targets will prepend symbol names with underscores + if (target.operating_system == .macos) { + try testing.expectEqual(parsed_symbol_name[0], '_'); + parsed_symbol_name = parsed_symbol_name[1..parsed_symbol_name.len]; + } + try testing.expectEqualStrings(parsed_symbol_name, symbol_name); + try testing.expectEqualStrings(archive.files.items[parsed_symbol.file_index].name, file_names[file_index]); + current_index = current_index + 1; + } + } +} + +fn copyAssetsToTestDirectory(comptime test_src_dir_path: []const u8, file_names: []const []const u8, test_dir_info: TestDirInfo) !void { + const tracy = trace(@src()); + defer tracy.end(); + var test_src_dir = fs.cwd().openDir(test_src_dir_path, .{}) catch |err| switch (err) { + error.FileNotFound => return, + else => return err, + }; + defer test_src_dir.close(); + + for (file_names) |test_file| { + std.fs.Dir.copyFile(test_src_dir, test_file, test_dir_info.tmp_dir.dir, test_file, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => return err, + }; + } +} + +const ExpectedOut = struct { + stdout: ?[]const u8 = null, + stderr: ?[]const u8 = null, +}; + +fn invokeZar(allocator: mem.Allocator, arguments: []const []const u8, test_dir_info: TestDirInfo, expected_out: ExpectedOut) !void { + // argments[0] must be path_to_zar + var invoke_as_child_process = always_invoke_zar_as_child_process; + // At the moment it's easiest to verify the output of stdout/stderr by launching + // zar as a child process, so just doing it like this for now. + invoke_as_child_process = invoke_as_child_process or expected_out.stderr != null; + invoke_as_child_process = invoke_as_child_process or expected_out.stdout != null; + if (invoke_as_child_process) { + const result = try std.ChildProcess.exec(.{ + .allocator = allocator, + .argv = arguments, + .cwd = test_dir_info.cwd, + }); + + defer { + allocator.free(result.stdout); + allocator.free(result.stderr); + } + + if (expected_out.stdout) |expected_stdout| { + try testing.expectEqualStrings(expected_stdout, result.stdout); + } + if (expected_out.stderr) |expected_stderr| { + try testing.expectEqualStrings(expected_stderr, result.stderr); + } + } else { + // TODO: don't deinit testing allocator here so that we can confirm + // the archiver does everything by the books? + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + main.archiveMain(test_dir_info.tmp_dir.dir, arena.allocator(), arguments) catch {}; + } +} + +fn doZarArchiveOperation(comptime format: LlvmFormat, comptime operation: []const u8, file_names: []const []const u8, test_dir_info: TestDirInfo) !void { + const tracy = trace(@src()); + defer tracy.end(); + const allocator = std.testing.allocator; + + var argv = std.ArrayList([]const u8).init(allocator); + defer argv.deinit(); + + try argv.append(path_to_zar); + try argv.append(format.llvmFormatToArgument()); + + try argv.append(operation); + try argv.append(zig_ar_archive_name); + try argv.appendSlice(file_names); + + try invokeZar(allocator, argv.items, test_dir_info, .{}); +} + +fn doLlvmArchiveOperation(comptime format: LlvmFormat, comptime operation: []const u8, file_names: []const []const u8, test_dir_info: TestDirInfo) !void { + errdefer { + logger.err("Failed to run llvm ar operation {s} with the provided format: {}", .{ operation, format }); + } + const tracy = trace(@src()); + defer tracy.end(); + const allocator = std.testing.allocator; + var argv = std.ArrayList([]const u8).init(allocator); + defer argv.deinit(); + + try argv.append("zig"); + try argv.append("ar"); + try argv.append(format.llvmFormatToArgument()); + + try argv.append(operation); + try argv.append(llvm_ar_archive_name); + try argv.appendSlice(file_names); + + const result = try std.ChildProcess.exec(.{ + .allocator = allocator, + .argv = argv.items, + .cwd = test_dir_info.cwd, + }); + + defer { + allocator.free(result.stdout); + allocator.free(result.stderr); + } +} + +fn generateCompiledFilesWithSymbols(framework_allocator: Allocator, comptime target: Target, file_names: []const []const u8, symbol_names: []const []const []const u8, test_dir_info: TestDirInfo) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const worker_count = @max(1, std.Thread.getCpuCount() catch 1); + const child_processes = try framework_allocator.alloc(std.ChildProcess, worker_count); + + var argv = std.ArrayList([]const u8).init(framework_allocator); + defer argv.deinit(); + try argv.append("zig"); + try argv.append("cc"); + try argv.append("-c"); + try argv.append("-o"); + const file_name_arg = argv.items.len; + try argv.append(""); + const source_name_arg = argv.items.len; + try argv.append(""); + try argv.append("-target"); + // TODO: Test other target triples with appropriate corresponding archive format! + try argv.append(target.targetToArgument()); + + for (symbol_names, 0..) |file_symbols, index| { + const process_index = @mod(index, child_processes.len); + if (index >= child_processes.len) { + // TODO: read results etc. + _ = try child_processes[process_index].wait(); + } + + const file_name = file_names[index]; + const source_file_name = try std.fmt.allocPrint(framework_allocator, "{s}.c", .{file_name}); + defer framework_allocator.free(source_file_name); + { + const source_file = try test_dir_info.tmp_dir.dir.createFile(source_file_name, .{}); + defer source_file.close(); + + const writer = source_file.writer(); + for (file_symbols) |symbol| { + try writer.print("extern int {s}(int a) {{ return a; }}\n", .{symbol}); + } + } + + argv.items[file_name_arg] = file_name; + argv.items[source_name_arg] = source_file_name; + + child_processes[process_index] = std.ChildProcess.init(argv.items, framework_allocator); + child_processes[process_index].cwd = test_dir_info.cwd; + try child_processes[process_index].spawn(); + } + + { + var process_index: u32 = 0; + while (process_index < symbol_names.len and process_index < child_processes.len) { + // TODO: read results etc. + _ = try child_processes[process_index].wait(); + process_index += 1; + } + } +} diff --git a/src/archive/tracking_buffered_writer.zig b/src/archive/tracking_buffered_writer.zig new file mode 100644 index 000000000000..75e2dedaf4a5 --- /dev/null +++ b/src/archive/tracking_buffered_writer.zig @@ -0,0 +1,32 @@ +const std = @import("std"); +const assert = std.debug.assert; +const mem = std.mem; + +// This exists to allow us to track the current file position through a buffered +// writer as we need to keep track of this information for padding reasons when +// archiving. +pub fn TrackingBufferedWriter( + comptime BufferedWriter: type, +) type { + return struct { + pub const Error = BufferedWriter.Error; + buffered_writer: BufferedWriter, + file_pos: usize = 0, + const Self = @This(); + const Writer = std.io.Writer(*Self, Error, write); + + pub fn write(self: *Self, bytes: []const u8) BufferedWriter.Error!usize { + const file_pos_change = try self.buffered_writer.write(bytes); + self.file_pos += file_pos_change; + return file_pos_change; + } + + pub fn writer(self: *Self) Writer { + return .{ .context = self }; + } + + pub fn flush(self: *Self) !void { + try self.buffered_writer.flush(); + } + }; +} diff --git a/src/archive/tracy.zig b/src/archive/tracy.zig new file mode 100644 index 000000000000..6fdc1ffb92b7 --- /dev/null +++ b/src/archive/tracy.zig @@ -0,0 +1,308 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const build_options = @import("build_options"); + +pub const enable = build_options.enable_tracy; +pub const enable_allocation = enable and build_options.enable_tracy_allocation; +pub const enable_callstack = enable and build_options.enable_tracy_callstack; + +// TODO: make this configurable +const callstack_depth = 10; + +const ___tracy_c_zone_context = extern struct { + id: u32, + active: c_int, + + pub inline fn end(self: @This()) void { + ___tracy_emit_zone_end(self); + } + + pub inline fn addText(self: @This(), text: []const u8) void { + ___tracy_emit_zone_text(self, text.ptr, text.len); + } + + pub inline fn setName(self: @This(), name: []const u8) void { + ___tracy_emit_zone_name(self, name.ptr, name.len); + } + + pub inline fn setColor(self: @This(), color: u32) void { + ___tracy_emit_zone_color(self, color); + } + + pub inline fn setValue(self: @This(), value: u64) void { + ___tracy_emit_zone_value(self, value); + } +}; + +pub const Ctx = if (enable) ___tracy_c_zone_context else struct { + pub inline fn end(self: @This()) void { + _ = self; + } + + pub inline fn addText(self: @This(), text: []const u8) void { + _ = self; + _ = text; + } + + pub inline fn setName(self: @This(), name: []const u8) void { + _ = self; + _ = name; + } + + pub inline fn setColor(self: @This(), color: u32) void { + _ = self; + _ = color; + } + + pub inline fn setValue(self: @This(), value: u64) void { + _ = self; + _ = value; + } +}; + +pub inline fn trace(comptime src: std.builtin.SourceLocation) Ctx { + if (!enable) return .{}; + + if (enable_callstack) { + return ___tracy_emit_zone_begin_callstack(&.{ + .name = null, + .function = src.fn_name.ptr, + .file = src.file.ptr, + .line = src.line, + .color = 0, + }, callstack_depth, 1); + } else { + return ___tracy_emit_zone_begin(&.{ + .name = null, + .function = src.fn_name.ptr, + .file = src.file.ptr, + .line = src.line, + .color = 0, + }, 1); + } +} + +pub inline fn traceNamed(comptime src: std.builtin.SourceLocation, comptime name: [:0]const u8) Ctx { + if (!enable) return .{}; + + if (enable_callstack) { + return ___tracy_emit_zone_begin_callstack(&.{ + .name = name.ptr, + .function = src.fn_name.ptr, + .file = src.file.ptr, + .line = src.line, + .color = 0, + }, callstack_depth, 1); + } else { + return ___tracy_emit_zone_begin(&.{ + .name = name.ptr, + .function = src.fn_name.ptr, + .file = src.file.ptr, + .line = src.line, + .color = 0, + }, 1); + } +} + +pub fn tracyAllocator(allocator: std.mem.Allocator) TracyAllocator(null) { + return TracyAllocator(null).init(allocator); +} + +pub fn TracyAllocator(comptime name: ?[:0]const u8) type { + return struct { + parent_allocator: std.mem.Allocator, + + const Self = @This(); + + pub fn init(parent_allocator: std.mem.Allocator) Self { + return .{ + .parent_allocator = parent_allocator, + }; + } + + pub fn allocator(self: *Self) std.mem.Allocator { + return std.mem.Allocator.init(self, allocFn, resizeFn, freeFn); + } + + fn allocFn(self: *Self, len: usize, ptr_align: u29, len_align: u29, ret_addr: usize) std.mem.Allocator.Error![]u8 { + const result = self.parent_allocator.rawAlloc(len, ptr_align, len_align, ret_addr); + if (result) |data| { + if (data.len != 0) { + if (name) |n| { + allocNamed(data.ptr, data.len, n); + } else { + alloc(data.ptr, data.len); + } + } + } else |_| { + messageColor("allocation failed", 0xFF0000); + } + return result; + } + + fn resizeFn(self: *Self, buf: []u8, buf_align: u29, new_len: usize, len_align: u29, ret_addr: usize) ?usize { + if (self.parent_allocator.rawResize(buf, buf_align, new_len, len_align, ret_addr)) |resized_len| { + if (name) |n| { + freeNamed(buf.ptr, n); + allocNamed(buf.ptr, resized_len, n); + } else { + free(buf.ptr); + alloc(buf.ptr, resized_len); + } + + return resized_len; + } + + // during normal operation the compiler hits this case thousands of times due to this + // emitting messages for it is both slow and causes clutter + return null; + } + + fn freeFn(self: *Self, buf: []u8, buf_align: u29, ret_addr: usize) void { + self.parent_allocator.rawFree(buf, buf_align, ret_addr); + // this condition is to handle free being called on an empty slice that was never even allocated + // example case: `std.process.getSelfExeSharedLibPaths` can return `&[_][:0]u8{}` + if (buf.len != 0) { + if (name) |n| { + freeNamed(buf.ptr, n); + } else { + free(buf.ptr); + } + } + } + }; +} + +// This function only accepts comptime known strings, see `messageCopy` for runtime strings +pub inline fn message(comptime msg: [:0]const u8) void { + if (!enable) return; + ___tracy_emit_messageL(msg.ptr, if (enable_callstack) callstack_depth else 0); +} + +// This function only accepts comptime known strings, see `messageColorCopy` for runtime strings +pub inline fn messageColor(comptime msg: [:0]const u8, color: u32) void { + if (!enable) return; + ___tracy_emit_messageLC(msg.ptr, color, if (enable_callstack) callstack_depth else 0); +} + +pub inline fn messageCopy(msg: []const u8) void { + if (!enable) return; + ___tracy_emit_message(msg.ptr, msg.len, if (enable_callstack) callstack_depth else 0); +} + +pub inline fn messageColorCopy(msg: [:0]const u8, color: u32) void { + if (!enable) return; + ___tracy_emit_messageC(msg.ptr, msg.len, color, if (enable_callstack) callstack_depth else 0); +} + +pub inline fn frameMark() void { + if (!enable) return; + ___tracy_emit_frame_mark(null); +} + +pub inline fn frameMarkNamed(comptime name: [:0]const u8) void { + if (!enable) return; + ___tracy_emit_frame_mark(name.ptr); +} + +pub inline fn namedFrame(comptime name: [:0]const u8) Frame(name) { + frameMarkStart(name); + return .{}; +} + +pub fn Frame(comptime name: [:0]const u8) type { + return struct { + pub fn end(_: @This()) void { + frameMarkEnd(name); + } + }; +} + +inline fn frameMarkStart(comptime name: [:0]const u8) void { + if (!enable) return; + ___tracy_emit_frame_mark_start(name.ptr); +} + +inline fn frameMarkEnd(comptime name: [:0]const u8) void { + if (!enable) return; + ___tracy_emit_frame_mark_end(name.ptr); +} + +extern fn ___tracy_emit_frame_mark_start(name: [*:0]const u8) void; +extern fn ___tracy_emit_frame_mark_end(name: [*:0]const u8) void; + +inline fn alloc(ptr: [*]u8, len: usize) void { + if (!enable) return; + + if (enable_callstack) { + ___tracy_emit_memory_alloc_callstack(ptr, len, callstack_depth, 0); + } else { + ___tracy_emit_memory_alloc(ptr, len, 0); + } +} + +inline fn allocNamed(ptr: [*]u8, len: usize, comptime name: [:0]const u8) void { + if (!enable) return; + + if (enable_callstack) { + ___tracy_emit_memory_alloc_callstack_named(ptr, len, callstack_depth, 0, name.ptr); + } else { + ___tracy_emit_memory_alloc_named(ptr, len, 0, name.ptr); + } +} + +inline fn free(ptr: [*]u8) void { + if (!enable) return; + + if (enable_callstack) { + ___tracy_emit_memory_free_callstack(ptr, callstack_depth, 0); + } else { + ___tracy_emit_memory_free(ptr, 0); + } +} + +inline fn freeNamed(ptr: [*]u8, comptime name: [:0]const u8) void { + if (!enable) return; + + if (enable_callstack) { + ___tracy_emit_memory_free_callstack_named(ptr, callstack_depth, 0, name.ptr); + } else { + ___tracy_emit_memory_free_named(ptr, 0, name.ptr); + } +} + +extern fn ___tracy_emit_zone_begin( + srcloc: *const ___tracy_source_location_data, + active: c_int, +) ___tracy_c_zone_context; +extern fn ___tracy_emit_zone_begin_callstack( + srcloc: *const ___tracy_source_location_data, + depth: c_int, + active: c_int, +) ___tracy_c_zone_context; +extern fn ___tracy_emit_zone_text(ctx: ___tracy_c_zone_context, txt: [*]const u8, size: usize) void; +extern fn ___tracy_emit_zone_name(ctx: ___tracy_c_zone_context, txt: [*]const u8, size: usize) void; +extern fn ___tracy_emit_zone_color(ctx: ___tracy_c_zone_context, color: u32) void; +extern fn ___tracy_emit_zone_value(ctx: ___tracy_c_zone_context, value: u64) void; +extern fn ___tracy_emit_zone_end(ctx: ___tracy_c_zone_context) void; +extern fn ___tracy_emit_memory_alloc(ptr: *const anyopaque, size: usize, secure: c_int) void; +extern fn ___tracy_emit_memory_alloc_callstack(ptr: *const anyopaque, size: usize, depth: c_int, secure: c_int) void; +extern fn ___tracy_emit_memory_free(ptr: *const anyopaque, secure: c_int) void; +extern fn ___tracy_emit_memory_free_callstack(ptr: *const anyopaque, depth: c_int, secure: c_int) void; +extern fn ___tracy_emit_memory_alloc_named(ptr: *const anyopaque, size: usize, secure: c_int, name: [*:0]const u8) void; +extern fn ___tracy_emit_memory_alloc_callstack_named(ptr: *const anyopaque, size: usize, depth: c_int, secure: c_int, name: [*:0]const u8) void; +extern fn ___tracy_emit_memory_free_named(ptr: *const anyopaque, secure: c_int, name: [*:0]const u8) void; +extern fn ___tracy_emit_memory_free_callstack_named(ptr: *const anyopaque, depth: c_int, secure: c_int, name: [*:0]const u8) void; +extern fn ___tracy_emit_message(txt: [*]const u8, size: usize, callstack: c_int) void; +extern fn ___tracy_emit_messageL(txt: [*:0]const u8, callstack: c_int) void; +extern fn ___tracy_emit_messageC(txt: [*]const u8, size: usize, color: u32, callstack: c_int) void; +extern fn ___tracy_emit_messageLC(txt: [*:0]const u8, color: u32, callstack: c_int) void; +extern fn ___tracy_emit_frame_mark(name: ?[*:0]const u8) void; + +const ___tracy_source_location_data = extern struct { + name: ?[*:0]const u8, + function: [*:0]const u8, + file: [*:0]const u8, + line: u32, + color: u32, +}; diff --git a/src/link.zig b/src/link.zig index 6e5c809f6216..a88e5b440c31 100644 --- a/src/link.zig +++ b/src/link.zig @@ -19,6 +19,8 @@ const Module = @import("Module.zig"); const InternPool = @import("InternPool.zig"); const Type = @import("type.zig").Type; const TypedValue = @import("TypedValue.zig"); +const archive = @import("archive/main.zig"); +const Archive = archive.Archive; /// When adding a new field, remember to update `hashAddSystemLibs`. /// These are *always* dynamically linked. Static libraries will be @@ -1154,8 +1156,17 @@ pub const File = struct { const llvm = @import("codegen/llvm.zig"); Builder.initializeLLVMTarget(base.options.target.cpu.arch); const os_tag = llvm.targetOs(base.options.target.os.tag); - const bad = llvm_bindings.WriteArchive(full_out_path_z, object_files.items.ptr, object_files.items.len, os_tag); - if (bad) return error.UnableToWriteArchive; + if (false) { + const bad = llvm_bindings.WriteArchive(full_out_path_z, object_files.items.ptr, object_files.items.len, os_tag); + if (bad) return error.UnableToWriteArchive; + } else { + const archive_type: Archive.ArchiveType = switch (os_tag) { + .MacOSX, .Darwin => .darwin, + .Linux => .gnu, + else => .gnu, + }; + archive.linkAsArchive(full_out_path, object_files.items, archive_type) catch @panic("OOM"); + } if (!base.options.disable_lld_caching) { Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { From f007ce78434bfd5ec027eaa7d583e244a0dd0a4f Mon Sep 17 00:00:00 2001 From: Tom Read Cutting Date: Fri, 20 Oct 2023 13:37:53 +0100 Subject: [PATCH 2/2] Various fixes to archive copied across, use gpa as thing wrapped by the arena --- src/archive/archive/Archive.zig | 18 ++++++++++-------- src/archive/main.zig | 4 ++-- src/link.zig | 7 ++++++- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/archive/archive/Archive.zig b/src/archive/archive/Archive.zig index 23e290600c73..26110bacc077 100644 --- a/src/archive/archive/Archive.zig +++ b/src/archive/archive/Archive.zig @@ -416,6 +416,7 @@ const TrackingBufferedWriter = tracking_buffered_writer.TrackingBufferedWriter(s // used for parsing. (use same error handling workflow etc.) /// Use same naming scheme for objects (as found elsewhere in the file). pub fn finalize(self: *Archive, allocator: Allocator) (FinalizeError || HandledIoError || CriticalError)!void { + // std.debug.print("I'm being called!!!\n", .{}); const tracy = trace(@src()); defer tracy.end(); if (self.output_archive_type == .ambiguous) { @@ -900,14 +901,15 @@ pub fn addToSymbolTable(self: *Archive, allocator: Allocator, archived_file: *co if (macho_file.in_symtab) |in_symtab| { for (in_symtab, 0..) |_, sym_index| { - const sym = macho_file.getSourceSymbol(@as(u32, @intCast(sym_index))); - if (sym != null and sym.?.ext() and sym.?.sect()) { - const symbol = Symbol{ - .name = try allocator.dupe(u8, macho_file.getSymbolName(@as(u32, @intCast(sym_index)))), - .file_index = file_index, - }; - - try self.symbols.append(allocator, symbol); + if (macho_file.getSourceSymbol(@as(u32, @intCast(sym_index)))) |sym| { + if (sym.ext() and (sym.sect() or sym.tentative())) { + const symbol = Symbol{ + .name = try allocator.dupe(u8, macho_file.getSymbolName(@as(u32, @intCast(sym_index)))), + .file_index = file_index, + }; + + try self.symbols.append(allocator, symbol); + } } } } diff --git a/src/archive/main.zig b/src/archive/main.zig index 3da9aa052299..f70b89618542 100644 --- a/src/archive/main.zig +++ b/src/archive/main.zig @@ -298,12 +298,12 @@ fn processModifier(modifier_char: u8, modifiers: *Archive.Modifiers) bool { // }; // } -pub fn linkAsArchive(archive_path: []const u8, file_names_ptr: []const [*:0]const u8, archive_type: Archive.ArchiveType) !void { +pub fn linkAsArchive(gpa: std.mem.Allocator, archive_path: []const u8, file_names_ptr: []const [*:0]const u8, archive_type: Archive.ArchiveType) !void { var modifiers: Archive.Modifiers = .{}; modifiers.build_symbol_table = true; modifiers.create = true; - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + var arena = std.heap.ArenaAllocator.init(gpa); defer arena.deinit(); var allocator = arena.allocator(); diff --git a/src/link.zig b/src/link.zig index a88e5b440c31..20c338183fe3 100644 --- a/src/link.zig +++ b/src/link.zig @@ -1165,7 +1165,12 @@ pub const File = struct { .Linux => .gnu, else => .gnu, }; - archive.linkAsArchive(full_out_path, object_files.items, archive_type) catch @panic("OOM"); + archive.linkAsArchive( + comp.gpa, + full_out_path, + object_files.items, + archive_type, + ) catch @panic("OOM"); } if (!base.options.disable_lld_caching) {