From fc69529dab66254d353975844034dab545a7335b Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Mon, 4 Sep 2023 01:33:53 -0700 Subject: [PATCH 01/11] ErrorBundle.addOtherSourceLocation: Fix source_line ending up as garbage when source_line is 0 The zero value needs special handling since it means 'no source line' and should be preserved through the copy. --- lib/std/zig/ErrorBundle.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/std/zig/ErrorBundle.zig b/lib/std/zig/ErrorBundle.zig index 24d304543680..31a706b8e434 100644 --- a/lib/std/zig/ErrorBundle.zig +++ b/lib/std/zig/ErrorBundle.zig @@ -474,7 +474,10 @@ pub const Wip = struct { .span_start = other_sl.span_start, .span_main = other_sl.span_main, .span_end = other_sl.span_end, - .source_line = try wip.addString(other.nullTerminatedString(other_sl.source_line)), + .source_line = if (other_sl.source_line != 0) + try wip.addString(other.nullTerminatedString(other_sl.source_line)) + else + 0, .reference_trace_len = other_sl.reference_trace_len, }); From 8e35be0640998e1070cf892eaadc35fc71d64ead Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Mon, 4 Sep 2023 01:36:22 -0700 Subject: [PATCH 02/11] ErrorBundle: rename addBundle to addBundleAsNotes, add addBundleAsRoots --- lib/std/zig/ErrorBundle.zig | 17 ++++++++++++++++- src/Compilation.zig | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/std/zig/ErrorBundle.zig b/lib/std/zig/ErrorBundle.zig index 31a706b8e434..141cdb119531 100644 --- a/lib/std/zig/ErrorBundle.zig +++ b/lib/std/zig/ErrorBundle.zig @@ -421,7 +421,7 @@ pub const Wip = struct { _ = try addExtra(wip, rt); } - pub fn addBundle(wip: *Wip, other: ErrorBundle) !void { + pub fn addBundleAsNotes(wip: *Wip, other: ErrorBundle) !void { const gpa = wip.gpa; try wip.string_bytes.ensureUnusedCapacity(gpa, other.string_bytes.len); @@ -436,6 +436,21 @@ pub const Wip = struct { } } + pub fn addBundleAsRoots(wip: *Wip, other: ErrorBundle) !void { + const gpa = wip.gpa; + + try wip.string_bytes.ensureUnusedCapacity(gpa, other.string_bytes.len); + try wip.extra.ensureUnusedCapacity(gpa, other.extra.len); + + const other_list = other.getMessages(); + + try wip.root_list.ensureUnusedCapacity(gpa, other_list.len); + for (other_list) |other_msg| { + // The ensureUnusedCapacity calls above guarantees this. + wip.root_list.appendAssumeCapacity(wip.addOtherMessage(other, other_msg) catch unreachable); + } + } + pub fn reserveNotes(wip: *Wip, notes_len: u32) !u32 { try wip.extra.ensureUnusedCapacity(wip.gpa, notes_len + notes_len * @typeInfo(ErrorBundle.ErrorMessage).Struct.fields.len); diff --git a/src/Compilation.zig b/src/Compilation.zig index 0150d615e37f..14247978d8d4 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -2683,7 +2683,7 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle { .msg = try bundle.addString(value.msg), .notes_len = if (value.children) |b| b.errorMessageCount() else 0, }); - if (value.children) |b| try bundle.addBundle(b); + if (value.children) |b| try bundle.addBundleAsNotes(b); } if (self.alloc_failure_occurred) { try bundle.addRootErrorMessage(.{ From 2a56fe11756fc402a15f159d1c951000487ff3ad Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Fri, 30 Jun 2023 15:02:32 -0700 Subject: [PATCH 03/11] Add a .rc -> .res compiler to the Zig compiler --- lib/std/Build/Step/Compile.zig | 48 + src/Compilation.zig | 677 +++++- src/link.zig | 8 +- src/link/Coff/lld.zig | 7 + src/main.zig | 23 + src/resinator.zig | 18 + src/resinator/ani.zig | 58 + src/resinator/ast.zig | 1084 ++++++++++ src/resinator/bmp.zig | 268 +++ src/resinator/cli.zig | 1433 +++++++++++++ src/resinator/code_pages.zig | 487 +++++ src/resinator/comments.zig | 340 +++ src/resinator/compile.zig | 3356 ++++++++++++++++++++++++++++++ src/resinator/errors.zig | 1033 +++++++++ src/resinator/ico.zig | 310 +++ src/resinator/lang.zig | 877 ++++++++ src/resinator/lex.zig | 1104 ++++++++++ src/resinator/literals.zig | 904 ++++++++ src/resinator/parse.zig | 1880 +++++++++++++++++ src/resinator/rc.zig | 407 ++++ src/resinator/res.zig | 1108 ++++++++++ src/resinator/source_mapping.zig | 684 ++++++ src/resinator/utils.zig | 83 + src/resinator/windows1252.zig | 588 ++++++ 24 files changed, 16778 insertions(+), 7 deletions(-) create mode 100644 src/resinator.zig create mode 100644 src/resinator/ani.zig create mode 100644 src/resinator/ast.zig create mode 100644 src/resinator/bmp.zig create mode 100644 src/resinator/cli.zig create mode 100644 src/resinator/code_pages.zig create mode 100644 src/resinator/comments.zig create mode 100644 src/resinator/compile.zig create mode 100644 src/resinator/errors.zig create mode 100644 src/resinator/ico.zig create mode 100644 src/resinator/lang.zig create mode 100644 src/resinator/lex.zig create mode 100644 src/resinator/literals.zig create mode 100644 src/resinator/parse.zig create mode 100644 src/resinator/rc.zig create mode 100644 src/resinator/res.zig create mode 100644 src/resinator/source_mapping.zig create mode 100644 src/resinator/utils.zig create mode 100644 src/resinator/windows1252.zig diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index a3a6b61b611d..f721b52c8283 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -221,6 +221,26 @@ pub const CSourceFile = struct { } }; +pub const RcSourceFile = struct { + file: LazyPath, + /// Any option that rc.exe accepts will work here, with the exception of: + /// - `/fo`: The output filename is set by the build system + /// - Any MUI-related option + /// https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line- + /// + /// Implicitly defined options: + /// /x (ignore the INCLUDE environment variable) + /// /D_DEBUG or /DNDEBUG depending on the optimization mode + flags: []const []const u8 = &.{}, + + pub fn dupe(self: RcSourceFile, b: *std.Build) RcSourceFile { + return .{ + .file = self.file.dupe(b), + .flags = b.dupeStrings(self.flags), + }; + } +}; + pub const LinkObject = union(enum) { static_path: LazyPath, other_step: *Compile, @@ -228,6 +248,7 @@ pub const LinkObject = union(enum) { assembly_file: LazyPath, c_source_file: *CSourceFile, c_source_files: *CSourceFiles, + win32_resource_file: *RcSourceFile, }; pub const SystemLib = struct { @@ -910,6 +931,14 @@ pub fn addCSourceFile(self: *Compile, source: CSourceFile) void { source.file.addStepDependencies(&self.step); } +pub fn addWin32ResourceFile(self: *Compile, source: RcSourceFile) void { + const b = self.step.owner; + const rc_source_file = b.allocator.create(RcSourceFile) catch @panic("OOM"); + rc_source_file.* = source.dupe(b); + self.link_objects.append(.{ .win32_resource_file = rc_source_file }) catch @panic("OOM"); + source.file.addStepDependencies(&self.step); +} + pub fn setVerboseLink(self: *Compile, value: bool) void { self.verbose_link = value; } @@ -1358,6 +1387,7 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { try transitive_deps.add(self.link_objects.items); var prev_has_cflags = false; + var prev_has_rcflags = false; var prev_search_strategy: SystemLib.SearchStrategy = .paths_first; var prev_preferred_link_mode: std.builtin.LinkMode = .Dynamic; @@ -1500,6 +1530,24 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { try zig_args.append(b.pathFromRoot(file)); } }, + + .win32_resource_file => |rc_source_file| { + if (rc_source_file.flags.len == 0) { + if (prev_has_rcflags) { + try zig_args.append("-rcflags"); + try zig_args.append("--"); + prev_has_rcflags = false; + } + } else { + try zig_args.append("-rcflags"); + for (rc_source_file.flags) |arg| { + try zig_args.append(arg); + } + try zig_args.append("--"); + prev_has_rcflags = true; + } + try zig_args.append(rc_source_file.file.getPath(b)); + }, } } diff --git a/src/Compilation.zig b/src/Compilation.zig index 14247978d8d4..2135ab824580 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -39,6 +39,7 @@ const libtsan = @import("libtsan.zig"); const Zir = @import("Zir.zig"); const Autodoc = @import("Autodoc.zig"); const Color = @import("main.zig").Color; +const resinator = @import("resinator.zig"); /// General-purpose allocator. Used for both temporary and long-term storage. gpa: Allocator, @@ -46,6 +47,7 @@ gpa: Allocator, arena_state: std.heap.ArenaAllocator.State, bin_file: *link.File, c_object_table: std.AutoArrayHashMapUnmanaged(*CObject, void) = .{}, +win32_resource_table: std.AutoArrayHashMapUnmanaged(*Win32Resource, void) = .{}, /// This is a pointer to a local variable inside `update()`. whole_cache_manifest: ?*Cache.Manifest = null, whole_cache_manifest_mutex: std.Thread.Mutex = .{}, @@ -60,6 +62,10 @@ anon_work_queue: std.fifo.LinearFifo(Job, .Dynamic), /// gets linked with the Compilation. c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic), +/// These jobs are to invoke the RC compiler to create a compiled resource file (.res), which +/// gets linked with the Compilation. +win32_resource_work_queue: std.fifo.LinearFifo(*Win32Resource, .Dynamic), + /// These jobs are to tokenize, parse, and astgen files, which may be outdated /// since the last compilation, as well as scan for `@import` and queue up /// additional jobs corresponding to those new files. @@ -73,6 +79,10 @@ embed_file_work_queue: std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic), /// This data is accessed by multiple threads and is protected by `mutex`. failed_c_objects: std.AutoArrayHashMapUnmanaged(*CObject, *CObject.ErrorMsg) = .{}, +/// The ErrorBundle memory is owned by the `Win32Resource`, using Compilation's general purpose allocator. +/// This data is accessed by multiple threads and is protected by `mutex`. +failed_win32_resources: std.AutoArrayHashMapUnmanaged(*Win32Resource, ErrorBundle) = .{}, + /// Miscellaneous things that can fail. misc_failures: std.AutoArrayHashMapUnmanaged(MiscTask, MiscError) = .{}, @@ -109,6 +119,7 @@ last_update_was_cache_hit: bool = false, c_source_files: []const CSourceFile, clang_argv: []const []const u8, +rc_source_files: []const RcSourceFile, cache_parent: *Cache, /// Path to own executable for invoking `zig clang`. self_exe_path: ?[]const u8, @@ -125,6 +136,7 @@ local_cache_directory: Directory, global_cache_directory: Directory, libc_include_dir_list: []const []const u8, libc_framework_dir_list: []const []const u8, +rc_include_dir_list: []const []const u8, thread_pool: *ThreadPool, /// Populated when we build the libc++ static library. A Job to build this is placed in the queue @@ -225,6 +237,12 @@ pub const CSourceFile = struct { ext: ?FileExt = null, }; +/// For passing to resinator. +pub const RcSourceFile = struct { + src_path: []const u8, + extra_flags: []const []const u8 = &.{}, +}; + const Job = union(enum) { /// Write the constant value for a Decl to the output file. codegen_decl: Module.Decl.Index, @@ -326,6 +344,50 @@ pub const CObject = struct { } }; +pub const Win32Resource = struct { + /// Relative to cwd. Owned by arena. + src: RcSourceFile, + status: union(enum) { + new, + success: struct { + /// The outputted result. Owned by gpa. + res_path: []u8, + /// This is a file system lock on the cache hash manifest representing this + /// object. It prevents other invocations of the Zig compiler from interfering + /// with this object until released. + lock: Cache.Lock, + }, + /// There will be a corresponding ErrorMsg in Compilation.failed_win32_resources. + failure, + /// A transient failure happened when trying to compile the resource file; it may + /// succeed if we try again. There may be a corresponding ErrorMsg in + /// Compilation.failed_win32_resources. If there is not, the failure is out of memory. + failure_retryable, + }, + + /// Returns true if there was failure. + pub fn clearStatus(self: *Win32Resource, gpa: Allocator) bool { + switch (self.status) { + .new => return false, + .failure, .failure_retryable => { + self.status = .new; + return true; + }, + .success => |*success| { + gpa.free(success.res_path); + success.lock.release(); + self.status = .new; + return false; + }, + } + } + + pub fn destroy(self: *Win32Resource, gpa: Allocator) void { + _ = self.clearStatus(gpa); + gpa.destroy(self); + } +}; + pub const MiscTask = enum { write_builtin_zig, glibc_crt_file, @@ -505,6 +567,7 @@ pub const InitOptions = struct { rpath_list: []const []const u8 = &[0][]const u8{}, symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{}, c_source_files: []const CSourceFile = &[0]CSourceFile{}, + rc_source_files: []const RcSourceFile = &[0]RcSourceFile{}, link_objects: []LinkObject = &[0]LinkObject{}, framework_dirs: []const []const u8 = &[0][]const u8{}, frameworks: []const Framework = &.{}, @@ -938,6 +1001,16 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { options.libc_installation, ); + // .rc preprocessor needs to know the libc dirs even if we are not linking libc + const rc_dirs = try detectLibCIncludeDirs( + arena, + options.zig_lib_directory.path.?, + options.target, + options.is_native_abi, + true, + options.libc_installation, + ); + const sysroot = options.sysroot orelse libc_dirs.sysroot; const must_pie = target_util.requiresPIE(options.target); @@ -1591,16 +1664,19 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { .work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa), .anon_work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa), .c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa), + .win32_resource_work_queue = std.fifo.LinearFifo(*Win32Resource, .Dynamic).init(gpa), .astgen_work_queue = std.fifo.LinearFifo(*Module.File, .Dynamic).init(gpa), .embed_file_work_queue = std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic).init(gpa), .keep_source_files_loaded = options.keep_source_files_loaded, .use_clang = use_clang, .clang_argv = options.clang_argv, .c_source_files = options.c_source_files, + .rc_source_files = options.rc_source_files, .cache_parent = cache, .self_exe_path = options.self_exe_path, .libc_include_dir_list = libc_dirs.libc_include_dir_list, .libc_framework_dir_list = libc_dirs.libc_framework_dir_list, + .rc_include_dir_list = rc_dirs.libc_include_dir_list, .sanitize_c = sanitize_c, .thread_pool = options.thread_pool, .clang_passthrough_mode = options.clang_passthrough_mode, @@ -1647,6 +1723,19 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { comp.c_object_table.putAssumeCapacityNoClobber(c_object, {}); } + // Add a `Win32Resource` for each `rc_source_files`. + try comp.win32_resource_table.ensureTotalCapacity(gpa, options.rc_source_files.len); + for (options.rc_source_files) |rc_source_file| { + const win32_resource = try gpa.create(Win32Resource); + errdefer gpa.destroy(win32_resource); + + win32_resource.* = .{ + .status = .{ .new = {} }, + .src = rc_source_file, + }; + comp.win32_resource_table.putAssumeCapacityNoClobber(win32_resource, {}); + } + const have_bin_emit = comp.bin_file.options.emit != null or comp.whole_bin_sub_path != null; if (have_bin_emit and !comp.bin_file.options.skip_linker_dependencies and target.ofmt != .c) { @@ -1804,6 +1893,7 @@ pub fn destroy(self: *Compilation) void { self.work_queue.deinit(); self.anon_work_queue.deinit(); self.c_object_work_queue.deinit(); + self.win32_resource_work_queue.deinit(); self.astgen_work_queue.deinit(); self.embed_file_work_queue.deinit(); @@ -1852,6 +1942,16 @@ pub fn destroy(self: *Compilation) void { } self.failed_c_objects.deinit(gpa); + for (self.win32_resource_table.keys()) |key| { + key.destroy(gpa); + } + self.win32_resource_table.deinit(gpa); + + for (self.failed_win32_resources.values()) |*value| { + value.deinit(gpa); + } + self.failed_win32_resources.deinit(gpa); + for (self.lld_errors.items) |*lld_error| { lld_error.deinit(gpa); } @@ -2014,6 +2114,13 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void comp.c_object_work_queue.writeItemAssumeCapacity(key); } + // For compiling Win32 resources, we rely on the cache hash system to avoid duplicating work. + // Add a Job for each Win32 resource file. + try comp.win32_resource_work_queue.ensureUnusedCapacity(comp.win32_resource_table.count()); + for (comp.win32_resource_table.keys()) |key| { + comp.win32_resource_work_queue.writeItemAssumeCapacity(key); + } + if (comp.bin_file.options.module) |module| { module.compile_log_text.shrinkAndFree(module.gpa, 0); module.generation += 1; @@ -2336,6 +2443,11 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes man.hash.addListOfBytes(key.src.extra_flags); } + for (comp.win32_resource_table.keys()) |key| { + _ = try man.addFile(key.src.src_path, null); + man.hash.addListOfBytes(key.src.extra_flags); + } + cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_asm); cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_ir); cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_bc); @@ -2571,8 +2683,14 @@ pub fn makeBinFileWritable(self: *Compilation) !void { /// This function is temporally single-threaded. pub fn totalErrorCount(self: *Compilation) u32 { - var total: usize = self.failed_c_objects.count() + self.misc_failures.count() + - @intFromBool(self.alloc_failure_occurred) + self.lld_errors.items.len; + var total: usize = self.failed_c_objects.count() + + self.misc_failures.count() + + @intFromBool(self.alloc_failure_occurred) + + self.lld_errors.items.len; + + for (self.failed_win32_resources.values()) |errs| { + total += errs.errorMessageCount(); + } if (self.bin_file.options.module) |module| { total += module.failed_exports.count(); @@ -2664,6 +2782,13 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle { } } + { + var it = self.failed_win32_resources.iterator(); + while (it.next()) |entry| { + try bundle.addBundleAsRoots(entry.value_ptr.*); + } + } + for (self.lld_errors.items) |lld_error| { const notes_len = @as(u32, @intCast(lld_error.context_lines.len)); @@ -3082,6 +3207,9 @@ pub fn performAllTheWork( var c_obj_prog_node = main_progress_node.start("Compile C Objects", comp.c_source_files.len); defer c_obj_prog_node.end(); + var win32_resource_prog_node = main_progress_node.start("Compile Win32 Resources", comp.rc_source_files.len); + defer win32_resource_prog_node.end(); + var embed_file_prog_node = main_progress_node.start("Detect @embedFile updates", comp.embed_file_work_queue.count); defer embed_file_prog_node.end(); @@ -3130,6 +3258,13 @@ pub fn performAllTheWork( comp, c_object, &c_obj_prog_node, &comp.work_queue_wait_group, }); } + + while (comp.win32_resource_work_queue.readItem()) |win32_resource| { + comp.work_queue_wait_group.start(); + try comp.thread_pool.spawn(workerUpdateWin32Resource, .{ + comp, win32_resource, &win32_resource_prog_node, &comp.work_queue_wait_group, + }); + } } if (comp.bin_file.options.module) |mod| { @@ -3659,6 +3794,14 @@ pub fn obtainCObjectCacheManifest(comp: *const Compilation) Cache.Manifest { return man; } +pub fn obtainWin32ResourceCacheManifest(comp: *const Compilation) Cache.Manifest { + var man = comp.cache_parent.obtain(); + + man.hash.addListOfBytes(comp.rc_include_dir_list); + + return man; +} + test "cImport" { _ = cImport; } @@ -3832,6 +3975,26 @@ fn workerUpdateCObject( }; } +fn workerUpdateWin32Resource( + comp: *Compilation, + win32_resource: *Win32Resource, + progress_node: *std.Progress.Node, + wg: *WaitGroup, +) void { + defer wg.finish(); + + comp.updateWin32Resource(win32_resource, progress_node) catch |err| switch (err) { + error.AnalysisFail => return, + else => { + comp.reportRetryableWin32ResourceError(win32_resource, err) catch |oom| switch (oom) { + // Swallowing this error is OK because it's implied to be OOM when + // there is a missing failed_win32_resources error message. + error.OutOfMemory => {}, + }; + }, + }; +} + fn buildCompilerRtOneShot( comp: *Compilation, output_mode: std.builtin.OutputMode, @@ -3877,6 +4040,18 @@ fn reportRetryableCObjectError( } } +fn reportRetryableWin32ResourceError( + comp: *Compilation, + win32_resource: *Win32Resource, + err: anyerror, +) error{OutOfMemory}!void { + win32_resource.status = .failure_retryable; + + // TODO: something + _ = comp; + _ = @errorName(err); +} + fn reportRetryableAstGenError( comp: *Compilation, src: AstGenSrc, @@ -4233,6 +4408,298 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P }; } +fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32_resource_prog_node: *std.Progress.Node) !void { + if (!build_options.have_llvm) { + return comp.failWin32Resource(win32_resource, "clang not available: compiler built without LLVM extensions", .{}); + } + const self_exe_path = comp.self_exe_path orelse + return comp.failWin32Resource(win32_resource, "clang compilation disabled", .{}); + + const tracy_trace = trace(@src()); + defer tracy_trace.end(); + + log.debug("updating win32 resource: {s}", .{win32_resource.src.src_path}); + + if (win32_resource.clearStatus(comp.gpa)) { + // There was previous failure. + comp.mutex.lock(); + defer comp.mutex.unlock(); + // If the failure was OOM, there will not be an entry here, so we do + // not assert discard. + _ = comp.failed_win32_resources.swapRemove(win32_resource); + } + + var man = comp.obtainWin32ResourceCacheManifest(); + defer man.deinit(); + + _ = try man.addFile(win32_resource.src.src_path, null); + man.hash.addListOfBytes(win32_resource.src.extra_flags); + + var arena_allocator = std.heap.ArenaAllocator.init(comp.gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const rc_basename = std.fs.path.basename(win32_resource.src.src_path); + + win32_resource_prog_node.activate(); + var child_progress_node = win32_resource_prog_node.start(rc_basename, 0); + child_progress_node.activate(); + defer child_progress_node.end(); + + const rc_basename_noext = rc_basename[0 .. rc_basename.len - std.fs.path.extension(rc_basename).len]; + + const digest = if (try man.hit()) man.final() else blk: { + const rcpp_filename = try std.fmt.allocPrint(arena, "{s}.rcpp", .{rc_basename_noext}); + + const out_rcpp_path = try comp.tmpFilePath(arena, rcpp_filename); + var zig_cache_tmp_dir = try comp.local_cache_directory.handle.makeOpenPath("tmp", .{}); + defer zig_cache_tmp_dir.close(); + + const res_filename = try std.fmt.allocPrint(arena, "{s}.res", .{rc_basename_noext}); + + // We can't know the digest until we do the compilation, + // so we need a temporary filename. + const out_res_path = try comp.tmpFilePath(arena, res_filename); + + var options = options: { + var resinator_args = try std.ArrayListUnmanaged([]const u8).initCapacity(comp.gpa, win32_resource.src.extra_flags.len + 4); + defer resinator_args.deinit(comp.gpa); + + resinator_args.appendAssumeCapacity(""); // dummy 'process name' arg + resinator_args.appendSliceAssumeCapacity(win32_resource.src.extra_flags); + resinator_args.appendSliceAssumeCapacity(&.{ "--", out_rcpp_path, out_res_path }); + + var cli_diagnostics = resinator.cli.Diagnostics.init(comp.gpa); + defer cli_diagnostics.deinit(); + var options = resinator.cli.parse(comp.gpa, resinator_args.items, &cli_diagnostics) catch |err| switch (err) { + error.ParseError => { + return comp.failWin32ResourceCli(win32_resource, &cli_diagnostics); + }, + else => |e| return e, + }; + break :options options; + }; + defer options.deinit(); + + var argv = std.ArrayList([]const u8).init(comp.gpa); + defer argv.deinit(); + var temp_strings = std.ArrayList([]const u8).init(comp.gpa); + defer { + for (temp_strings.items) |temp_string| { + comp.gpa.free(temp_string); + } + temp_strings.deinit(); + } + + // TODO: support options.preprocess == .no and .only + // alternatively, error if those options are used + try argv.appendSlice(&[_][]const u8{ + self_exe_path, + "clang", + "-E", // preprocessor only + "--comments", + "-fuse-line-directives", // #line instead of # + "-xc", // output c + "-Werror=null-character", // error on null characters instead of converting them to spaces + "-fms-compatibility", // Allow things like "header.h" to be resolved relative to the 'root' .rc file, among other things + "-DRC_INVOKED", // https://learn.microsoft.com/en-us/windows/win32/menurc/predefined-macros + }); + for (options.extra_include_paths.items) |extra_include_path| { + try argv.append("--include-directory"); + try argv.append(extra_include_path); + } + var symbol_it = options.symbols.iterator(); + while (symbol_it.next()) |entry| { + switch (entry.value_ptr.*) { + .define => |value| { + try argv.append("-D"); + const define_arg = arg: { + const arg = try std.fmt.allocPrint(comp.gpa, "{s}={s}", .{ entry.key_ptr.*, value }); + errdefer comp.gpa.free(arg); + try temp_strings.append(arg); + break :arg arg; + }; + try argv.append(define_arg); + }, + .undefine => { + try argv.append("-U"); + try argv.append(entry.key_ptr.*); + }, + } + } + try argv.append(win32_resource.src.src_path); + try argv.appendSlice(&[_][]const u8{ + "-o", + out_rcpp_path, + }); + + const out_dep_path = try std.fmt.allocPrint(arena, "{s}.d", .{out_rcpp_path}); + // Note: addCCArgs will implicitly add _DEBUG/NDEBUG depending on the optimization + // mode. While these defines are not normally present when calling rc.exe directly, + // them being defined matches the behavior of how MSVC calls rc.exe which is the more + // relevant behavior in this case. + try comp.addCCArgs(arena, &argv, .rc, out_dep_path); + + if (comp.verbose_cc) { + dump_argv(argv.items); + } + + if (std.process.can_spawn) { + var child = std.ChildProcess.init(argv.items, arena); + child.stdin_behavior = .Ignore; + child.stdout_behavior = .Ignore; + child.stderr_behavior = .Pipe; + + try child.spawn(); + + const stderr_reader = child.stderr.?.reader(); + + const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024); + + const term = child.wait() catch |err| { + return comp.failWin32Resource(win32_resource, "unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) }); + }; + + switch (term) { + .Exited => |code| { + if (code != 0) { + // TODO parse clang stderr and turn it into an error message + // and then call failCObjWithOwnedErrorMsg + log.err("clang preprocessor failed with stderr:\n{s}", .{stderr}); + return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{code}); + } + }, + else => { + log.err("clang preprocessor terminated with stderr:\n{s}", .{stderr}); + return comp.failWin32Resource(win32_resource, "clang preprocessor terminated unexpectedly", .{}); + }, + } + } else { + const exit_code = try clangMain(arena, argv.items); + if (exit_code != 0) { + return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{exit_code}); + } + } + + const dep_basename = std.fs.path.basename(out_dep_path); + // Add the files depended on to the cache system. + try man.addDepFilePost(zig_cache_tmp_dir, dep_basename); + if (comp.whole_cache_manifest) |whole_cache_manifest| { + comp.whole_cache_manifest_mutex.lock(); + defer comp.whole_cache_manifest_mutex.unlock(); + try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename); + } + // Just to save disk space, we delete the file because it is never needed again. + zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| { + log.warn("failed to delete '{s}': {s}", .{ out_dep_path, @errorName(err) }); + }; + + var full_input = std.fs.cwd().readFileAlloc(arena, out_rcpp_path, std.math.maxInt(usize)) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => |e| { + return comp.failWin32Resource(win32_resource, "failed to read preprocessed file '{s}': {s}", .{ out_rcpp_path, @errorName(e) }); + }, + }; + + var mapping_results = try resinator.source_mapping.parseAndRemoveLineCommands(arena, full_input, full_input, .{ .initial_filename = win32_resource.src.src_path }); + defer mapping_results.mappings.deinit(arena); + + var final_input = resinator.comments.removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings); + + var output_file = zig_cache_tmp_dir.createFile(out_res_path, .{}) catch |err| { + return comp.failWin32Resource(win32_resource, "failed to create output file '{s}': {s}", .{ out_res_path, @errorName(err) }); + }; + var output_file_closed = false; + defer if (!output_file_closed) output_file.close(); + + var diagnostics = resinator.errors.Diagnostics.init(arena); + defer diagnostics.deinit(); + + var dependencies_list = std.ArrayList([]const u8).init(comp.gpa); + defer { + for (dependencies_list.items) |item| { + comp.gpa.free(item); + } + dependencies_list.deinit(); + } + + var output_buffered_stream = std.io.bufferedWriter(output_file.writer()); + + resinator.compile.compile(arena, final_input, output_buffered_stream.writer(), .{ + .cwd = std.fs.cwd(), + .diagnostics = &diagnostics, + .source_mappings = &mapping_results.mappings, + .dependencies_list = &dependencies_list, + .system_include_paths = comp.rc_include_dir_list, + .ignore_include_env_var = true, + // options + .extra_include_paths = options.extra_include_paths.items, + .default_language_id = options.default_language_id, + .default_code_page = options.default_code_page orelse .windows1252, + .verbose = options.verbose, + .null_terminate_string_table_strings = options.null_terminate_string_table_strings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + .silent_duplicate_control_ids = options.silent_duplicate_control_ids, + .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, + }) catch |err| switch (err) { + error.ParseError, error.CompileError => { + // Delete the output file on error + output_file.close(); + output_file_closed = true; + // Failing to delete is not really a big deal, so swallow any errors + zig_cache_tmp_dir.deleteFile(out_res_path) catch { + log.warn("failed to delete '{s}': {s}", .{ out_res_path, @errorName(err) }); + }; + return comp.failWin32ResourceCompile(win32_resource, final_input, &diagnostics, mapping_results.mappings); + }, + else => |e| return e, + }; + + try output_buffered_stream.flush(); + + for (dependencies_list.items) |dep_file_path| { + try man.addFilePost(dep_file_path); + if (comp.whole_cache_manifest) |whole_cache_manifest| { + comp.whole_cache_manifest_mutex.lock(); + defer comp.whole_cache_manifest_mutex.unlock(); + try whole_cache_manifest.addFilePost(dep_file_path); + } + } + + // Rename into place. + const digest = man.final(); + const o_sub_path = try std.fs.path.join(arena, &[_][]const u8{ "o", &digest }); + var o_dir = try comp.local_cache_directory.handle.makeOpenPath(o_sub_path, .{}); + defer o_dir.close(); + const tmp_basename = std.fs.path.basename(out_res_path); + try std.fs.rename(zig_cache_tmp_dir, tmp_basename, o_dir, res_filename); + const tmp_rcpp_basename = std.fs.path.basename(out_rcpp_path); + try std.fs.rename(zig_cache_tmp_dir, tmp_rcpp_basename, o_dir, rcpp_filename); + break :blk digest; + }; + + if (man.have_exclusive_lock) { + // Write the updated manifest. This is a no-op if the manifest is not dirty. Note that it is + // possible we had a hit and the manifest is dirty, for example if the file mtime changed but + // the contents were the same, we hit the cache but the manifest is dirty and we need to update + // it to prevent doing a full file content comparison the next time around. + man.writeManifest() catch |err| { + log.warn("failed to write cache manifest when compiling '{s}': {s}", .{ win32_resource.src.src_path, @errorName(err) }); + }; + } + + const res_basename = try std.fmt.allocPrint(arena, "{s}.res", .{rc_basename_noext}); + + win32_resource.status = .{ + .success = .{ + .res_path = try comp.local_cache_directory.join(comp.gpa, &[_][]const u8{ + "o", &digest, res_basename, + }), + .lock = man.toOwnedLock(), + }, + }; +} + pub fn tmpFilePath(comp: *Compilation, ally: Allocator, suffix: []const u8) error{OutOfMemory}![]const u8 { const s = std.fs.path.sep_str; const rand_int = std.crypto.random.int(u64); @@ -4347,7 +4814,7 @@ pub fn addCCArgs( try argv.appendSlice(&[_][]const u8{ "-target", llvm_triple }); switch (ext) { - .c, .cpp, .m, .mm, .h, .cu => { + .c, .cpp, .m, .mm, .h, .cu, .rc => { try argv.appendSlice(&[_][]const u8{ "-nostdinc", "-fno-spell-checking", @@ -4375,9 +4842,16 @@ pub fn addCCArgs( try argv.append("-isystem"); try argv.append(c_headers_dir); - for (comp.libc_include_dir_list) |include_dir| { - try argv.append("-isystem"); - try argv.append(include_dir); + if (ext == .rc) { + for (comp.rc_include_dir_list) |include_dir| { + try argv.append("-isystem"); + try argv.append(include_dir); + } + } else { + for (comp.libc_include_dir_list) |include_dir| { + try argv.append("-isystem"); + try argv.append(include_dir); + } } if (target.cpu.model.llvm_name) |llvm_name| { @@ -4680,6 +5154,192 @@ fn failCObjWithOwnedErrorMsg( return error.AnalysisFail; } +fn failWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, comptime format: []const u8, args: anytype) SemaError { + @setCold(true); + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(comp.gpa); + errdefer bundle.deinit(); + try bundle.addRootErrorMessage(.{ + .msg = try bundle.printString(format, args), + .src_loc = try bundle.addSourceLocation(.{ + .src_path = try bundle.addString(win32_resource.src.src_path), + .line = 0, + .column = 0, + .span_start = 0, + .span_main = 0, + .span_end = 0, + }), + }); + const finished_bundle = try bundle.toOwnedBundle(""); + return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle); +} + +fn failWin32ResourceWithOwnedBundle( + comp: *Compilation, + win32_resource: *Win32Resource, + err_bundle: ErrorBundle, +) SemaError { + @setCold(true); + { + comp.mutex.lock(); + defer comp.mutex.unlock(); + try comp.failed_win32_resources.putNoClobber(comp.gpa, win32_resource, err_bundle); + } + win32_resource.status = .failure; + return error.AnalysisFail; +} + +fn failWin32ResourceCli( + comp: *Compilation, + win32_resource: *Win32Resource, + diagnostics: *resinator.cli.Diagnostics, +) SemaError { + @setCold(true); + + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(comp.gpa); + errdefer bundle.deinit(); + + try bundle.addRootErrorMessage(.{ + .msg = try bundle.addString("invalid command line option(s)"), + .src_loc = try bundle.addSourceLocation(.{ + .src_path = try bundle.addString(win32_resource.src.src_path), + .line = 0, + .column = 0, + .span_start = 0, + .span_main = 0, + .span_end = 0, + }), + }); + + var cur_err: ?ErrorBundle.ErrorMessage = null; + var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; + defer cur_notes.deinit(comp.gpa); + for (diagnostics.errors.items) |err_details| { + switch (err_details.type) { + .err => { + if (cur_err) |err| { + try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); + } + cur_err = .{ + .msg = try bundle.addString(err_details.msg.items), + }; + cur_notes.clearRetainingCapacity(); + }, + .warning => cur_err = null, + .note => { + if (cur_err == null) continue; + cur_err.?.notes_len += 1; + try cur_notes.append(comp.gpa, .{ + .msg = try bundle.addString(err_details.msg.items), + }); + }, + } + } + if (cur_err) |err| { + try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); + } + + const finished_bundle = try bundle.toOwnedBundle(""); + return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle); +} + +fn failWin32ResourceCompile( + comp: *Compilation, + win32_resource: *Win32Resource, + source: []const u8, + diagnostics: *resinator.errors.Diagnostics, + mappings: resinator.source_mapping.SourceMappings, +) SemaError { + @setCold(true); + + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(comp.gpa); + errdefer bundle.deinit(); + + var msg_buf: std.ArrayListUnmanaged(u8) = .{}; + defer msg_buf.deinit(comp.gpa); + var cur_err: ?ErrorBundle.ErrorMessage = null; + var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; + defer cur_notes.deinit(comp.gpa); + for (diagnostics.errors.items) |err_details| { + switch (err_details.type) { + .hint => continue, + // Clear the current error so that notes don't bleed into unassociated errors + .warning => { + cur_err = null; + continue; + }, + .note => if (cur_err == null) continue, + .err => {}, + } + const corresponding_span = mappings.get(err_details.token.line_number); + const corresponding_file = mappings.files.get(corresponding_span.filename_offset); + + const source_line_start = err_details.token.getLineStart(source); + const column = err_details.token.calculateColumn(source, 1, source_line_start); + const err_line = corresponding_span.start_line; + + msg_buf.clearRetainingCapacity(); + try err_details.render(msg_buf.writer(comp.gpa), source, diagnostics.strings.items); + + const src_loc = src_loc: { + var src_loc: ErrorBundle.SourceLocation = .{ + .src_path = try bundle.addString(corresponding_file), + .line = @intCast(err_line - 1), // 1-based -> 0-based + .column = @intCast(column), + .span_start = 0, + .span_main = 0, + .span_end = 0, + }; + if (err_details.print_source_line) { + const source_line = err_details.token.getLine(source, source_line_start); + const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len); + src_loc.span_start = @intCast(visual_info.point_offset - visual_info.before_len); + src_loc.span_main = @intCast(visual_info.point_offset); + src_loc.span_end = @intCast(visual_info.point_offset + 1 + visual_info.after_len); + src_loc.source_line = try bundle.addString(source_line); + } + break :src_loc try bundle.addSourceLocation(src_loc); + }; + + switch (err_details.type) { + .err => { + if (cur_err) |err| { + try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); + } + cur_err = .{ + .msg = try bundle.addString(msg_buf.items), + .src_loc = src_loc, + }; + cur_notes.clearRetainingCapacity(); + }, + .note => { + cur_err.?.notes_len += 1; + try cur_notes.append(comp.gpa, .{ + .msg = try bundle.addString(msg_buf.items), + .src_loc = src_loc, + }); + }, + .warning, .hint => unreachable, + } + } + if (cur_err) |err| { + try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); + } + + const finished_bundle = try bundle.toOwnedBundle(""); + return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle); +} + +fn win32ResourceFlushErrorMessage(wip: *ErrorBundle.Wip, msg: ErrorBundle.ErrorMessage, notes: []const ErrorBundle.ErrorMessage) !void { + try wip.addRootErrorMessage(msg); + const notes_start = try wip.reserveNotes(@intCast(notes.len)); + for (notes_start.., notes) |i, note| { + wip.extra.items[i] = @intFromEnum(wip.addErrorMessageAssumeCapacity(note)); + } +} + pub const FileExt = enum { c, cpp, @@ -4696,6 +5356,7 @@ pub const FileExt = enum { static_library, zig, def, + rc, res, unknown, @@ -4712,6 +5373,7 @@ pub const FileExt = enum { .static_library, .zig, .def, + .rc, .res, .unknown, => false, @@ -4735,6 +5397,7 @@ pub const FileExt = enum { .static_library => target.staticLibSuffix(), .zig => ".zig", .def => ".def", + .rc => ".rc", .res => ".res", .unknown => "", }; @@ -4827,6 +5490,8 @@ pub fn classifyFileExt(filename: []const u8) FileExt { return .cu; } else if (mem.endsWith(u8, filename, ".def")) { return .def; + } else if (mem.endsWith(u8, filename, ".rc")) { + return .rc; } else if (mem.endsWith(u8, filename, ".res")) { return .res; } else { diff --git a/src/link.zig b/src/link.zig index 4db946658a8d..a1c816550c2d 100644 --- a/src/link.zig +++ b/src/link.zig @@ -1027,6 +1027,9 @@ pub const File = struct { for (comp.c_object_table.keys()) |key| { _ = try man.addFile(key.status.success.object_path, null); } + for (comp.win32_resource_table.keys()) |key| { + _ = try man.addFile(key.status.success.res_path, null); + } try man.addOptionalFile(module_obj_path); try man.addOptionalFile(compiler_rt_path); @@ -1056,7 +1059,7 @@ pub const File = struct { }; } - const num_object_files = base.options.objects.len + comp.c_object_table.count() + 2; + const num_object_files = base.options.objects.len + comp.c_object_table.count() + comp.win32_resource_table.count() + 2; var object_files = try std.ArrayList([*:0]const u8).initCapacity(base.allocator, num_object_files); defer object_files.deinit(); @@ -1066,6 +1069,9 @@ pub const File = struct { for (comp.c_object_table.keys()) |key| { object_files.appendAssumeCapacity(try arena.dupeZ(u8, key.status.success.object_path)); } + for (comp.win32_resource_table.keys()) |key| { + object_files.appendAssumeCapacity(try arena.dupeZ(u8, key.status.success.res_path)); + } if (module_obj_path) |p| { object_files.appendAssumeCapacity(try arena.dupeZ(u8, p)); } diff --git a/src/link/Coff/lld.zig b/src/link/Coff/lld.zig index c0f88704e5c5..38385ceedf35 100644 --- a/src/link/Coff/lld.zig +++ b/src/link/Coff/lld.zig @@ -72,6 +72,9 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod for (comp.c_object_table.keys()) |key| { _ = try man.addFile(key.status.success.object_path, null); } + for (comp.win32_resource_table.keys()) |key| { + _ = try man.addFile(key.status.success.res_path, null); + } try man.addOptionalFile(module_obj_path); man.hash.addOptionalBytes(self.base.options.entry); man.hash.addOptional(self.base.options.stack_size_override); @@ -268,6 +271,10 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod try argv.append(key.status.success.object_path); } + for (comp.win32_resource_table.keys()) |key| { + try argv.append(key.status.success.res_path); + } + if (module_obj_path) |p| { try argv.append(p); } diff --git a/src/main.zig b/src/main.zig index 6e7b330c407b..0f90e110e17e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -472,6 +472,7 @@ const usage_build_generic = \\ -D[macro]=[value] Define C [macro] to [value] (1 if [value] omitted) \\ --libc [file] Provide a file which specifies libc paths \\ -cflags [flags] -- Set extra flags for the next positional C source files + \\ -rcflags [flags] -- Set extra flags for the next positional .rc source files \\ \\Link Options: \\ -l[lib], --library [lib] Link against system library (only if actually used) @@ -919,11 +920,13 @@ fn buildOutputType( var wasi_emulated_libs = std.ArrayList(wasi_libc.CRTFile).init(arena); var clang_argv = std.ArrayList([]const u8).init(arena); var extra_cflags = std.ArrayList([]const u8).init(arena); + var extra_rcflags = std.ArrayList([]const u8).init(arena); // These are before resolving sysroot. var lib_dir_args = std.ArrayList([]const u8).init(arena); var rpath_list = std.ArrayList([]const u8).init(arena); var symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{}; var c_source_files = std.ArrayList(Compilation.CSourceFile).init(arena); + var rc_source_files = std.ArrayList(Compilation.RcSourceFile).init(arena); var link_objects = std.ArrayList(Compilation.LinkObject).init(arena); var framework_dirs = std.ArrayList([]const u8).init(arena); var frameworks: std.StringArrayHashMapUnmanaged(Framework) = .{}; @@ -1042,6 +1045,15 @@ fn buildOutputType( if (mem.eql(u8, next_arg, "--")) break; try extra_cflags.append(next_arg); } + } else if (mem.eql(u8, arg, "-rcflags")) { + extra_rcflags.shrinkRetainingCapacity(0); + while (true) { + const next_arg = args_iter.next() orelse { + fatal("expected -- after -rcflags", .{}); + }; + if (mem.eql(u8, next_arg, "--")) break; + try extra_rcflags.append(next_arg); + } } else if (mem.eql(u8, arg, "--color")) { const next_arg = args_iter.next() orelse { fatal("expected [auto|on|off] after --color", .{}); @@ -1599,6 +1611,12 @@ fn buildOutputType( .ext = file_ext, }); }, + .rc => { + try rc_source_files.append(.{ + .src_path = arg, + .extra_flags = try arena.dupe([]const u8, extra_rcflags.items), + }); + }, .zig => { if (root_src_file) |other| { fatal("found another zig file '{s}' after root source file '{s}'", .{ arg, other }); @@ -1691,6 +1709,9 @@ fn buildOutputType( .def => { linker_module_definition_file = it.only_arg; }, + .rc => { + try rc_source_files.append(.{ .src_path = it.only_arg }); + }, .zig => { if (root_src_file) |other| { fatal("found another zig file '{s}' after root source file '{s}'", .{ it.only_arg, other }); @@ -2933,6 +2954,7 @@ fn buildOutputType( if (output_mode == .Obj and (object_format == .coff or object_format == .macho)) { const total_obj_count = c_source_files.items.len + @intFromBool(root_src_file != null) + + rc_source_files.items.len + link_objects.items.len; if (total_obj_count > 1) { fatal("{s} does not support linking multiple objects into one", .{@tagName(object_format)}); @@ -3319,6 +3341,7 @@ fn buildOutputType( .rpath_list = rpath_list.items, .symbol_wrap_set = symbol_wrap_set, .c_source_files = c_source_files.items, + .rc_source_files = rc_source_files.items, .link_objects = link_objects.items, .framework_dirs = framework_dirs.items, .frameworks = resolved_frameworks.items, diff --git a/src/resinator.zig b/src/resinator.zig new file mode 100644 index 000000000000..3287641a7de0 --- /dev/null +++ b/src/resinator.zig @@ -0,0 +1,18 @@ +pub const ani = @import("resinator/ani.zig"); +pub const ast = @import("resinator/ast.zig"); +pub const bmp = @import("resinator/bmp.zig"); +pub const cli = @import("resinator/cli.zig"); +pub const code_pages = @import("resinator/code_pages.zig"); +pub const comments = @import("resinator/comments.zig"); +pub const compile = @import("resinator/compile.zig"); +pub const errors = @import("resinator/errors.zig"); +pub const ico = @import("resinator/ico.zig"); +pub const lang = @import("resinator/lang.zig"); +pub const lex = @import("resinator/lex.zig"); +pub const literals = @import("resinator/literals.zig"); +pub const parse = @import("resinator/parse.zig"); +pub const rc = @import("resinator/rc.zig"); +pub const res = @import("resinator/res.zig"); +pub const source_mapping = @import("resinator/source_mapping.zig"); +pub const utils = @import("resinator/utils.zig"); +pub const windows1252 = @import("resinator/windows1252.zig"); diff --git a/src/resinator/ani.zig b/src/resinator/ani.zig new file mode 100644 index 000000000000..7b8b05564fba --- /dev/null +++ b/src/resinator/ani.zig @@ -0,0 +1,58 @@ +//! https://en.wikipedia.org/wiki/Resource_Interchange_File_Format +//! https://www.moon-soft.com/program/format/windows/ani.htm +//! https://www.gdgsoft.com/anituner/help/aniformat.htm +//! https://www.lomont.org/software/aniexploit/ExploitANI.pdf +//! +//! RIFF( 'ACON' +//! [LIST( 'INFO' )] +//! [] +//! anih( ) +//! [rate( )] +//! ['seq '( )] +//! LIST( 'fram' icon( ) ... ) +//! ) + +const std = @import("std"); + +const AF_ICON: u32 = 1; + +pub fn isAnimatedIcon(reader: anytype) bool { + const flags = getAniheaderFlags(reader) catch return false; + return flags & AF_ICON == AF_ICON; +} + +fn getAniheaderFlags(reader: anytype) !u32 { + const riff_header = try reader.readBytesNoEof(4); + if (!std.mem.eql(u8, &riff_header, "RIFF")) return error.InvalidFormat; + + _ = try reader.readIntLittle(u32); // size of RIFF chunk + + const form_type = try reader.readBytesNoEof(4); + if (!std.mem.eql(u8, &form_type, "ACON")) return error.InvalidFormat; + + while (true) { + const chunk_id = try reader.readBytesNoEof(4); + const chunk_len = try reader.readIntLittle(u32); + if (!std.mem.eql(u8, &chunk_id, "anih")) { + // TODO: Move file cursor instead of skipBytes + try reader.skipBytes(chunk_len, .{}); + continue; + } + + const aniheader = try reader.readStruct(ANIHEADER); + return std.mem.nativeToLittle(u32, aniheader.flags); + } +} + +/// From Microsoft Multimedia Data Standards Update April 15, 1994 +const ANIHEADER = extern struct { + cbSizeof: u32, + cFrames: u32, + cSteps: u32, + cx: u32, + cy: u32, + cBitCount: u32, + cPlanes: u32, + jifRate: u32, + flags: u32, +}; diff --git a/src/resinator/ast.zig b/src/resinator/ast.zig new file mode 100644 index 000000000000..e6f6c030c08c --- /dev/null +++ b/src/resinator/ast.zig @@ -0,0 +1,1084 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Token = @import("lex.zig").Token; +const CodePage = @import("code_pages.zig").CodePage; + +pub const Tree = struct { + node: *Node, + input_code_pages: CodePageLookup, + output_code_pages: CodePageLookup, + + /// not owned by the tree + source: []const u8, + + arena: std.heap.ArenaAllocator.State, + allocator: Allocator, + + pub fn deinit(self: *Tree) void { + self.arena.promote(self.allocator).deinit(); + } + + pub fn root(self: *Tree) *Node.Root { + return @fieldParentPtr(Node.Root, "base", self.node); + } + + pub fn dump(self: *Tree, writer: anytype) @TypeOf(writer).Error!void { + try self.node.dump(self, writer, 0); + } +}; + +pub const CodePageLookup = struct { + lookup: std.ArrayListUnmanaged(CodePage) = .{}, + allocator: Allocator, + default_code_page: CodePage, + + pub fn init(allocator: Allocator, default_code_page: CodePage) CodePageLookup { + return .{ + .allocator = allocator, + .default_code_page = default_code_page, + }; + } + + pub fn deinit(self: *CodePageLookup) void { + self.lookup.deinit(self.allocator); + } + + /// line_num is 1-indexed + pub fn setForLineNum(self: *CodePageLookup, line_num: usize, code_page: CodePage) !void { + const index = line_num - 1; + if (index >= self.lookup.items.len) { + const new_size = line_num; + const missing_lines_start_index = self.lookup.items.len; + try self.lookup.resize(self.allocator, new_size); + + // If there are any gaps created, we need to fill them in with the value of the + // last line before the gap. This can happen for e.g. string literals that + // span multiple lines, or if the start of a file has multiple empty lines. + const fill_value = if (missing_lines_start_index > 0) + self.lookup.items[missing_lines_start_index - 1] + else + self.default_code_page; + var i: usize = missing_lines_start_index; + while (i < new_size - 1) : (i += 1) { + self.lookup.items[i] = fill_value; + } + } + self.lookup.items[index] = code_page; + } + + pub fn setForToken(self: *CodePageLookup, token: Token, code_page: CodePage) !void { + return self.setForLineNum(token.line_number, code_page); + } + + /// line_num is 1-indexed + pub fn getForLineNum(self: CodePageLookup, line_num: usize) CodePage { + return self.lookup.items[line_num - 1]; + } + + pub fn getForToken(self: CodePageLookup, token: Token) CodePage { + return self.getForLineNum(token.line_number); + } +}; + +test "CodePageLookup" { + var lookup = CodePageLookup.init(std.testing.allocator, .windows1252); + defer lookup.deinit(); + + try lookup.setForLineNum(5, .utf8); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(1)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(2)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(3)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(4)); + try std.testing.expectEqual(CodePage.utf8, lookup.getForLineNum(5)); + try std.testing.expectEqual(@as(usize, 5), lookup.lookup.items.len); + + try lookup.setForLineNum(7, .windows1252); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(1)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(2)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(3)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(4)); + try std.testing.expectEqual(CodePage.utf8, lookup.getForLineNum(5)); + try std.testing.expectEqual(CodePage.utf8, lookup.getForLineNum(6)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(7)); + try std.testing.expectEqual(@as(usize, 7), lookup.lookup.items.len); +} + +pub const Node = struct { + id: Id, + + pub const Id = enum { + root, + resource_external, + resource_raw_data, + literal, + binary_expression, + grouped_expression, + not_expression, + accelerators, + accelerator, + dialog, + control_statement, + toolbar, + menu, + menu_item, + menu_item_separator, + menu_item_ex, + popup, + popup_ex, + version_info, + version_statement, + block, + block_value, + block_value_value, + string_table, + string_table_string, + language_statement, + font_statement, + simple_statement, + invalid, + + pub fn Type(comptime id: Id) type { + return switch (id) { + .root => Root, + .resource_external => ResourceExternal, + .resource_raw_data => ResourceRawData, + .literal => Literal, + .binary_expression => BinaryExpression, + .grouped_expression => GroupedExpression, + .not_expression => NotExpression, + .accelerators => Accelerators, + .accelerator => Accelerator, + .dialog => Dialog, + .control_statement => ControlStatement, + .toolbar => Toolbar, + .menu => Menu, + .menu_item => MenuItem, + .menu_item_separator => MenuItemSeparator, + .menu_item_ex => MenuItemEx, + .popup => Popup, + .popup_ex => PopupEx, + .version_info => VersionInfo, + .version_statement => VersionStatement, + .block => Block, + .block_value => BlockValue, + .block_value_value => BlockValueValue, + .string_table => StringTable, + .string_table_string => StringTableString, + .language_statement => LanguageStatement, + .font_statement => FontStatement, + .simple_statement => SimpleStatement, + .invalid => Invalid, + }; + } + }; + + pub fn cast(base: *Node, comptime id: Id) ?*id.Type() { + if (base.id == id) { + return @fieldParentPtr(id.Type(), "base", base); + } + return null; + } + + pub const Root = struct { + base: Node = .{ .id = .root }, + body: []*Node, + }; + + pub const ResourceExternal = struct { + base: Node = .{ .id = .resource_external }, + id: Token, + type: Token, + common_resource_attributes: []Token, + filename: *Node, + }; + + pub const ResourceRawData = struct { + base: Node = .{ .id = .resource_raw_data }, + id: Token, + type: Token, + common_resource_attributes: []Token, + begin_token: Token, + raw_data: []*Node, + end_token: Token, + }; + + pub const Literal = struct { + base: Node = .{ .id = .literal }, + token: Token, + }; + + pub const BinaryExpression = struct { + base: Node = .{ .id = .binary_expression }, + operator: Token, + left: *Node, + right: *Node, + }; + + pub const GroupedExpression = struct { + base: Node = .{ .id = .grouped_expression }, + open_token: Token, + expression: *Node, + close_token: Token, + }; + + pub const NotExpression = struct { + base: Node = .{ .id = .not_expression }, + not_token: Token, + number_token: Token, + }; + + pub const Accelerators = struct { + base: Node = .{ .id = .accelerators }, + id: Token, + type: Token, + common_resource_attributes: []Token, + optional_statements: []*Node, + begin_token: Token, + accelerators: []*Node, + end_token: Token, + }; + + pub const Accelerator = struct { + base: Node = .{ .id = .accelerator }, + event: *Node, + idvalue: *Node, + type_and_options: []Token, + }; + + pub const Dialog = struct { + base: Node = .{ .id = .dialog }, + id: Token, + type: Token, + common_resource_attributes: []Token, + x: *Node, + y: *Node, + width: *Node, + height: *Node, + help_id: ?*Node, + optional_statements: []*Node, + begin_token: Token, + controls: []*Node, + end_token: Token, + }; + + pub const ControlStatement = struct { + base: Node = .{ .id = .control_statement }, + type: Token, + text: ?Token, + /// Only relevant for the user-defined CONTROL control + class: ?*Node, + id: *Node, + x: *Node, + y: *Node, + width: *Node, + height: *Node, + style: ?*Node, + exstyle: ?*Node, + help_id: ?*Node, + extra_data_begin: ?Token, + extra_data: []*Node, + extra_data_end: ?Token, + + /// Returns true if this node describes a user-defined CONTROL control + /// https://learn.microsoft.com/en-us/windows/win32/menurc/control-control + pub fn isUserDefined(self: *const ControlStatement) bool { + return self.class != null; + } + }; + + pub const Toolbar = struct { + base: Node = .{ .id = .toolbar }, + id: Token, + type: Token, + common_resource_attributes: []Token, + button_width: *Node, + button_height: *Node, + begin_token: Token, + /// Will contain Literal and SimpleStatement nodes + buttons: []*Node, + end_token: Token, + }; + + pub const Menu = struct { + base: Node = .{ .id = .menu }, + id: Token, + type: Token, + common_resource_attributes: []Token, + optional_statements: []*Node, + /// `help_id` will never be non-null if `type` is MENU + help_id: ?*Node, + begin_token: Token, + items: []*Node, + end_token: Token, + }; + + pub const MenuItem = struct { + base: Node = .{ .id = .menu_item }, + menuitem: Token, + text: Token, + result: *Node, + option_list: []Token, + }; + + pub const MenuItemSeparator = struct { + base: Node = .{ .id = .menu_item_separator }, + menuitem: Token, + separator: Token, + }; + + pub const MenuItemEx = struct { + base: Node = .{ .id = .menu_item_ex }, + menuitem: Token, + text: Token, + id: ?*Node, + type: ?*Node, + state: ?*Node, + }; + + pub const Popup = struct { + base: Node = .{ .id = .popup }, + popup: Token, + text: Token, + option_list: []Token, + begin_token: Token, + items: []*Node, + end_token: Token, + }; + + pub const PopupEx = struct { + base: Node = .{ .id = .popup_ex }, + popup: Token, + text: Token, + id: ?*Node, + type: ?*Node, + state: ?*Node, + help_id: ?*Node, + begin_token: Token, + items: []*Node, + end_token: Token, + }; + + pub const VersionInfo = struct { + base: Node = .{ .id = .version_info }, + id: Token, + versioninfo: Token, + common_resource_attributes: []Token, + /// Will contain VersionStatement and/or SimpleStatement nodes + fixed_info: []*Node, + begin_token: Token, + block_statements: []*Node, + end_token: Token, + }; + + /// Used for FILEVERSION and PRODUCTVERSION statements + pub const VersionStatement = struct { + base: Node = .{ .id = .version_statement }, + type: Token, + /// Between 1-4 parts + parts: []*Node, + }; + + pub const Block = struct { + base: Node = .{ .id = .block }, + /// The BLOCK token itself + identifier: Token, + key: Token, + /// This is undocumented but BLOCK statements support values after + /// the key just like VALUE statements. + values: []*Node, + begin_token: Token, + children: []*Node, + end_token: Token, + }; + + pub const BlockValue = struct { + base: Node = .{ .id = .block_value }, + /// The VALUE token itself + identifier: Token, + key: Token, + /// These will be BlockValueValue nodes + values: []*Node, + }; + + pub const BlockValueValue = struct { + base: Node = .{ .id = .block_value_value }, + expression: *Node, + /// Whether or not the value has a trailing comma is relevant + trailing_comma: bool, + }; + + pub const StringTable = struct { + base: Node = .{ .id = .string_table }, + type: Token, + common_resource_attributes: []Token, + optional_statements: []*Node, + begin_token: Token, + strings: []*Node, + end_token: Token, + }; + + pub const StringTableString = struct { + base: Node = .{ .id = .string_table_string }, + id: *Node, + maybe_comma: ?Token, + string: Token, + }; + + pub const LanguageStatement = struct { + base: Node = .{ .id = .language_statement }, + /// The LANGUAGE token itself + language_token: Token, + primary_language_id: *Node, + sublanguage_id: *Node, + }; + + pub const FontStatement = struct { + base: Node = .{ .id = .font_statement }, + /// The FONT token itself + identifier: Token, + point_size: *Node, + typeface: Token, + weight: ?*Node, + italic: ?*Node, + char_set: ?*Node, + }; + + /// A statement with one value associated with it. + /// Used for CAPTION, CHARACTERISTICS, CLASS, EXSTYLE, MENU, STYLE, VERSION, + /// as well as VERSIONINFO-specific statements FILEFLAGSMASK, FILEFLAGS, FILEOS, + /// FILETYPE, FILESUBTYPE + pub const SimpleStatement = struct { + base: Node = .{ .id = .simple_statement }, + identifier: Token, + value: *Node, + }; + + pub const Invalid = struct { + base: Node = .{ .id = .invalid }, + context: []Token, + }; + + pub fn isNumberExpression(node: *const Node) bool { + switch (node.id) { + .literal => { + const literal = @fieldParentPtr(Node.Literal, "base", node); + return switch (literal.token.id) { + .number => true, + else => false, + }; + }, + .binary_expression, .grouped_expression, .not_expression => return true, + else => return false, + } + } + + pub fn isStringLiteral(node: *const Node) bool { + switch (node.id) { + .literal => { + const literal = @fieldParentPtr(Node.Literal, "base", node); + return switch (literal.token.id) { + .quoted_ascii_string, .quoted_wide_string => true, + else => false, + }; + }, + else => return false, + } + } + + pub fn getFirstToken(node: *const Node) Token { + switch (node.id) { + .root => unreachable, + .resource_external => { + const casted = @fieldParentPtr(Node.ResourceExternal, "base", node); + return casted.id; + }, + .resource_raw_data => { + const casted = @fieldParentPtr(Node.ResourceRawData, "base", node); + return casted.id; + }, + .literal => { + const casted = @fieldParentPtr(Node.Literal, "base", node); + return casted.token; + }, + .binary_expression => { + const casted = @fieldParentPtr(Node.BinaryExpression, "base", node); + return casted.left.getFirstToken(); + }, + .grouped_expression => { + const casted = @fieldParentPtr(Node.GroupedExpression, "base", node); + return casted.open_token; + }, + .not_expression => { + const casted = @fieldParentPtr(Node.NotExpression, "base", node); + return casted.not_token; + }, + .accelerators => { + const casted = @fieldParentPtr(Node.Accelerators, "base", node); + return casted.id; + }, + .accelerator => { + const casted = @fieldParentPtr(Node.Accelerator, "base", node); + return casted.event.getFirstToken(); + }, + .dialog => { + const casted = @fieldParentPtr(Node.Dialog, "base", node); + return casted.id; + }, + .control_statement => { + const casted = @fieldParentPtr(Node.ControlStatement, "base", node); + return casted.type; + }, + .toolbar => { + const casted = @fieldParentPtr(Node.Toolbar, "base", node); + return casted.id; + }, + .menu => { + const casted = @fieldParentPtr(Node.Menu, "base", node); + return casted.id; + }, + inline .menu_item, .menu_item_separator, .menu_item_ex => |menu_item_type| { + const node_type = menu_item_type.Type(); + const casted = @fieldParentPtr(node_type, "base", node); + return casted.menuitem; + }, + inline .popup, .popup_ex => |popup_type| { + const node_type = popup_type.Type(); + const casted = @fieldParentPtr(node_type, "base", node); + return casted.popup; + }, + .version_info => { + const casted = @fieldParentPtr(Node.VersionInfo, "base", node); + return casted.id; + }, + .version_statement => { + const casted = @fieldParentPtr(Node.VersionStatement, "base", node); + return casted.type; + }, + .block => { + const casted = @fieldParentPtr(Node.Block, "base", node); + return casted.identifier; + }, + .block_value => { + const casted = @fieldParentPtr(Node.BlockValue, "base", node); + return casted.identifier; + }, + .block_value_value => { + const casted = @fieldParentPtr(Node.BlockValueValue, "base", node); + return casted.expression.getFirstToken(); + }, + .string_table => { + const casted = @fieldParentPtr(Node.StringTable, "base", node); + return casted.type; + }, + .string_table_string => { + const casted = @fieldParentPtr(Node.StringTableString, "base", node); + return casted.id.getFirstToken(); + }, + .language_statement => { + const casted = @fieldParentPtr(Node.LanguageStatement, "base", node); + return casted.language_token; + }, + .font_statement => { + const casted = @fieldParentPtr(Node.FontStatement, "base", node); + return casted.identifier; + }, + .simple_statement => { + const casted = @fieldParentPtr(Node.SimpleStatement, "base", node); + return casted.identifier; + }, + .invalid => { + const casted = @fieldParentPtr(Node.Invalid, "base", node); + return casted.context[0]; + }, + } + } + + pub fn getLastToken(node: *const Node) Token { + switch (node.id) { + .root => unreachable, + .resource_external => { + const casted = @fieldParentPtr(Node.ResourceExternal, "base", node); + return casted.filename.getLastToken(); + }, + .resource_raw_data => { + const casted = @fieldParentPtr(Node.ResourceRawData, "base", node); + return casted.end_token; + }, + .literal => { + const casted = @fieldParentPtr(Node.Literal, "base", node); + return casted.token; + }, + .binary_expression => { + const casted = @fieldParentPtr(Node.BinaryExpression, "base", node); + return casted.right.getLastToken(); + }, + .grouped_expression => { + const casted = @fieldParentPtr(Node.GroupedExpression, "base", node); + return casted.close_token; + }, + .not_expression => { + const casted = @fieldParentPtr(Node.NotExpression, "base", node); + return casted.number_token; + }, + .accelerators => { + const casted = @fieldParentPtr(Node.Accelerators, "base", node); + return casted.end_token; + }, + .accelerator => { + const casted = @fieldParentPtr(Node.Accelerator, "base", node); + if (casted.type_and_options.len > 0) return casted.type_and_options[casted.type_and_options.len - 1]; + return casted.idvalue.getLastToken(); + }, + .dialog => { + const casted = @fieldParentPtr(Node.Dialog, "base", node); + return casted.end_token; + }, + .control_statement => { + const casted = @fieldParentPtr(Node.ControlStatement, "base", node); + if (casted.extra_data_end) |token| return token; + if (casted.help_id) |help_id_node| return help_id_node.getLastToken(); + if (casted.exstyle) |exstyle_node| return exstyle_node.getLastToken(); + // For user-defined CONTROL controls, the style comes before 'x', but + // otherwise it comes after 'height' so it could be the last token if + // it's present. + if (!casted.isUserDefined()) { + if (casted.style) |style_node| return style_node.getLastToken(); + } + return casted.height.getLastToken(); + }, + .toolbar => { + const casted = @fieldParentPtr(Node.Toolbar, "base", node); + return casted.end_token; + }, + .menu => { + const casted = @fieldParentPtr(Node.Menu, "base", node); + return casted.end_token; + }, + .menu_item => { + const casted = @fieldParentPtr(Node.MenuItem, "base", node); + if (casted.option_list.len > 0) return casted.option_list[casted.option_list.len - 1]; + return casted.result.getLastToken(); + }, + .menu_item_separator => { + const casted = @fieldParentPtr(Node.MenuItemSeparator, "base", node); + return casted.separator; + }, + .menu_item_ex => { + const casted = @fieldParentPtr(Node.MenuItemEx, "base", node); + if (casted.state) |state_node| return state_node.getLastToken(); + if (casted.type) |type_node| return type_node.getLastToken(); + if (casted.id) |id_node| return id_node.getLastToken(); + return casted.text; + }, + inline .popup, .popup_ex => |popup_type| { + const node_type = popup_type.Type(); + const casted = @fieldParentPtr(node_type, "base", node); + return casted.end_token; + }, + .version_info => { + const casted = @fieldParentPtr(Node.VersionInfo, "base", node); + return casted.end_token; + }, + .version_statement => { + const casted = @fieldParentPtr(Node.VersionStatement, "base", node); + return casted.parts[casted.parts.len - 1].getLastToken(); + }, + .block => { + const casted = @fieldParentPtr(Node.Block, "base", node); + return casted.end_token; + }, + .block_value => { + const casted = @fieldParentPtr(Node.BlockValue, "base", node); + if (casted.values.len > 0) return casted.values[casted.values.len - 1].getLastToken(); + return casted.key; + }, + .block_value_value => { + const casted = @fieldParentPtr(Node.BlockValueValue, "base", node); + return casted.expression.getLastToken(); + }, + .string_table => { + const casted = @fieldParentPtr(Node.StringTable, "base", node); + return casted.end_token; + }, + .string_table_string => { + const casted = @fieldParentPtr(Node.StringTableString, "base", node); + return casted.string; + }, + .language_statement => { + const casted = @fieldParentPtr(Node.LanguageStatement, "base", node); + return casted.sublanguage_id.getLastToken(); + }, + .font_statement => { + const casted = @fieldParentPtr(Node.FontStatement, "base", node); + if (casted.char_set) |char_set_node| return char_set_node.getLastToken(); + if (casted.italic) |italic_node| return italic_node.getLastToken(); + if (casted.weight) |weight_node| return weight_node.getLastToken(); + return casted.typeface; + }, + .simple_statement => { + const casted = @fieldParentPtr(Node.SimpleStatement, "base", node); + return casted.value.getLastToken(); + }, + .invalid => { + const casted = @fieldParentPtr(Node.Invalid, "base", node); + return casted.context[casted.context.len - 1]; + }, + } + } + + pub fn dump( + node: *const Node, + tree: *const Tree, + writer: anytype, + indent: usize, + ) @TypeOf(writer).Error!void { + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(@tagName(node.id)); + switch (node.id) { + .root => { + try writer.writeAll("\n"); + const root = @fieldParentPtr(Node.Root, "base", node); + for (root.body) |body_node| { + try body_node.dump(tree, writer, indent + 1); + } + }, + .resource_external => { + const resource = @fieldParentPtr(Node.ResourceExternal, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ resource.id.slice(tree.source), resource.type.slice(tree.source), resource.common_resource_attributes.len }); + try resource.filename.dump(tree, writer, indent + 1); + }, + .resource_raw_data => { + const resource = @fieldParentPtr(Node.ResourceRawData, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes] raw data: {}\n", .{ resource.id.slice(tree.source), resource.type.slice(tree.source), resource.common_resource_attributes.len, resource.raw_data.len }); + for (resource.raw_data) |data_expression| { + try data_expression.dump(tree, writer, indent + 1); + } + }, + .literal => { + const literal = @fieldParentPtr(Node.Literal, "base", node); + try writer.writeAll(" "); + try writer.writeAll(literal.token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .binary_expression => { + const binary = @fieldParentPtr(Node.BinaryExpression, "base", node); + try writer.writeAll(" "); + try writer.writeAll(binary.operator.slice(tree.source)); + try writer.writeAll("\n"); + try binary.left.dump(tree, writer, indent + 1); + try binary.right.dump(tree, writer, indent + 1); + }, + .grouped_expression => { + const grouped = @fieldParentPtr(Node.GroupedExpression, "base", node); + try writer.writeAll("\n"); + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(grouped.open_token.slice(tree.source)); + try writer.writeAll("\n"); + try grouped.expression.dump(tree, writer, indent + 1); + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(grouped.close_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .not_expression => { + const not = @fieldParentPtr(Node.NotExpression, "base", node); + try writer.writeAll(" "); + try writer.writeAll(not.not_token.slice(tree.source)); + try writer.writeAll(" "); + try writer.writeAll(not.number_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .accelerators => { + const accelerators = @fieldParentPtr(Node.Accelerators, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ accelerators.id.slice(tree.source), accelerators.type.slice(tree.source), accelerators.common_resource_attributes.len }); + for (accelerators.optional_statements) |statement| { + try statement.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(accelerators.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (accelerators.accelerators) |accelerator| { + try accelerator.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(accelerators.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .accelerator => { + const accelerator = @fieldParentPtr(Node.Accelerator, "base", node); + for (accelerator.type_and_options, 0..) |option, i| { + if (i != 0) try writer.writeAll(","); + try writer.writeByte(' '); + try writer.writeAll(option.slice(tree.source)); + } + try writer.writeAll("\n"); + try accelerator.event.dump(tree, writer, indent + 1); + try accelerator.idvalue.dump(tree, writer, indent + 1); + }, + .dialog => { + const dialog = @fieldParentPtr(Node.Dialog, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ dialog.id.slice(tree.source), dialog.type.slice(tree.source), dialog.common_resource_attributes.len }); + inline for (.{ "x", "y", "width", "height" }) |arg| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try @field(dialog, arg).dump(tree, writer, indent + 2); + } + if (dialog.help_id) |help_id| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll("help_id:\n"); + try help_id.dump(tree, writer, indent + 2); + } + for (dialog.optional_statements) |statement| { + try statement.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(dialog.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (dialog.controls) |control| { + try control.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(dialog.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .control_statement => { + const control = @fieldParentPtr(Node.ControlStatement, "base", node); + try writer.print(" {s}", .{control.type.slice(tree.source)}); + if (control.text) |text| { + try writer.print(" text: {s}", .{text.slice(tree.source)}); + } + try writer.writeByte('\n'); + if (control.class) |class| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll("class:\n"); + try class.dump(tree, writer, indent + 2); + } + inline for (.{ "id", "x", "y", "width", "height" }) |arg| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try @field(control, arg).dump(tree, writer, indent + 2); + } + inline for (.{ "style", "exstyle", "help_id" }) |arg| { + if (@field(control, arg)) |val_node| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try val_node.dump(tree, writer, indent + 2); + } + } + if (control.extra_data_begin != null) { + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(control.extra_data_begin.?.slice(tree.source)); + try writer.writeAll("\n"); + for (control.extra_data) |data_node| { + try data_node.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(control.extra_data_end.?.slice(tree.source)); + try writer.writeAll("\n"); + } + }, + .toolbar => { + const toolbar = @fieldParentPtr(Node.Toolbar, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ toolbar.id.slice(tree.source), toolbar.type.slice(tree.source), toolbar.common_resource_attributes.len }); + inline for (.{ "button_width", "button_height" }) |arg| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try @field(toolbar, arg).dump(tree, writer, indent + 2); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(toolbar.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (toolbar.buttons) |button_or_sep| { + try button_or_sep.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(toolbar.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .menu => { + const menu = @fieldParentPtr(Node.Menu, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ menu.id.slice(tree.source), menu.type.slice(tree.source), menu.common_resource_attributes.len }); + for (menu.optional_statements) |statement| { + try statement.dump(tree, writer, indent + 1); + } + if (menu.help_id) |help_id| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll("help_id:\n"); + try help_id.dump(tree, writer, indent + 2); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(menu.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (menu.items) |item| { + try item.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(menu.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .menu_item => { + const menu_item = @fieldParentPtr(Node.MenuItem, "base", node); + try writer.print(" {s} {s} [{d} options]\n", .{ menu_item.menuitem.slice(tree.source), menu_item.text.slice(tree.source), menu_item.option_list.len }); + try menu_item.result.dump(tree, writer, indent + 1); + }, + .menu_item_separator => { + const menu_item = @fieldParentPtr(Node.MenuItemSeparator, "base", node); + try writer.print(" {s} {s}\n", .{ menu_item.menuitem.slice(tree.source), menu_item.separator.slice(tree.source) }); + }, + .menu_item_ex => { + const menu_item = @fieldParentPtr(Node.MenuItemEx, "base", node); + try writer.print(" {s} {s}\n", .{ menu_item.menuitem.slice(tree.source), menu_item.text.slice(tree.source) }); + inline for (.{ "id", "type", "state" }) |arg| { + if (@field(menu_item, arg)) |val_node| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try val_node.dump(tree, writer, indent + 2); + } + } + }, + .popup => { + const popup = @fieldParentPtr(Node.Popup, "base", node); + try writer.print(" {s} {s} [{d} options]\n", .{ popup.popup.slice(tree.source), popup.text.slice(tree.source), popup.option_list.len }); + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(popup.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (popup.items) |item| { + try item.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(popup.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .popup_ex => { + const popup = @fieldParentPtr(Node.PopupEx, "base", node); + try writer.print(" {s} {s}\n", .{ popup.popup.slice(tree.source), popup.text.slice(tree.source) }); + inline for (.{ "id", "type", "state", "help_id" }) |arg| { + if (@field(popup, arg)) |val_node| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try val_node.dump(tree, writer, indent + 2); + } + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(popup.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (popup.items) |item| { + try item.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(popup.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .version_info => { + const version_info = @fieldParentPtr(Node.VersionInfo, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ version_info.id.slice(tree.source), version_info.versioninfo.slice(tree.source), version_info.common_resource_attributes.len }); + for (version_info.fixed_info) |fixed_info| { + try fixed_info.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(version_info.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (version_info.block_statements) |block| { + try block.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(version_info.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .version_statement => { + const version_statement = @fieldParentPtr(Node.VersionStatement, "base", node); + try writer.print(" {s}\n", .{version_statement.type.slice(tree.source)}); + for (version_statement.parts) |part| { + try part.dump(tree, writer, indent + 1); + } + }, + .block => { + const block = @fieldParentPtr(Node.Block, "base", node); + try writer.print(" {s} {s}\n", .{ block.identifier.slice(tree.source), block.key.slice(tree.source) }); + for (block.values) |value| { + try value.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(block.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (block.children) |child| { + try child.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(block.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .block_value => { + const block_value = @fieldParentPtr(Node.BlockValue, "base", node); + try writer.print(" {s} {s}\n", .{ block_value.identifier.slice(tree.source), block_value.key.slice(tree.source) }); + for (block_value.values) |value| { + try value.dump(tree, writer, indent + 1); + } + }, + .block_value_value => { + const block_value = @fieldParentPtr(Node.BlockValueValue, "base", node); + if (block_value.trailing_comma) { + try writer.writeAll(" ,"); + } + try writer.writeAll("\n"); + try block_value.expression.dump(tree, writer, indent + 1); + }, + .string_table => { + const string_table = @fieldParentPtr(Node.StringTable, "base", node); + try writer.print(" {s} [{d} common_resource_attributes]\n", .{ string_table.type.slice(tree.source), string_table.common_resource_attributes.len }); + for (string_table.optional_statements) |statement| { + try statement.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(string_table.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (string_table.strings) |string| { + try string.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(string_table.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .string_table_string => { + try writer.writeAll("\n"); + const string = @fieldParentPtr(Node.StringTableString, "base", node); + try string.id.dump(tree, writer, indent + 1); + try writer.writeByteNTimes(' ', indent + 1); + try writer.print("{s}\n", .{string.string.slice(tree.source)}); + }, + .language_statement => { + const language = @fieldParentPtr(Node.LanguageStatement, "base", node); + try writer.print(" {s}\n", .{language.language_token.slice(tree.source)}); + try language.primary_language_id.dump(tree, writer, indent + 1); + try language.sublanguage_id.dump(tree, writer, indent + 1); + }, + .font_statement => { + const font = @fieldParentPtr(Node.FontStatement, "base", node); + try writer.print(" {s} typeface: {s}\n", .{ font.identifier.slice(tree.source), font.typeface.slice(tree.source) }); + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll("point_size:\n"); + try font.point_size.dump(tree, writer, indent + 2); + inline for (.{ "weight", "italic", "char_set" }) |arg| { + if (@field(font, arg)) |arg_node| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try arg_node.dump(tree, writer, indent + 2); + } + } + }, + .simple_statement => { + const statement = @fieldParentPtr(Node.SimpleStatement, "base", node); + try writer.print(" {s}\n", .{statement.identifier.slice(tree.source)}); + try statement.value.dump(tree, writer, indent + 1); + }, + .invalid => { + const invalid = @fieldParentPtr(Node.Invalid, "base", node); + try writer.print(" context.len: {}\n", .{invalid.context.len}); + for (invalid.context) |context_token| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.print("{s}:{s}", .{ @tagName(context_token.id), context_token.slice(tree.source) }); + try writer.writeByte('\n'); + } + }, + } + } +}; diff --git a/src/resinator/bmp.zig b/src/resinator/bmp.zig new file mode 100644 index 000000000000..f6fdb9f28041 --- /dev/null +++ b/src/resinator/bmp.zig @@ -0,0 +1,268 @@ +//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader +//! https://learn.microsoft.com/en-us/previous-versions//dd183376(v=vs.85) +//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfo +//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapcoreheader +//! https://archive.org/details/mac_Graphics_File_Formats_Second_Edition_1996/page/n607/mode/2up +//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapv5header +//! +//! Notes: +//! - The Microsoft documentation is incredibly unclear about the color table when the +//! bit depth is >= 16. +//! + For bit depth 24 it says "the bmiColors member of BITMAPINFO is NULL" but also +//! says "the bmiColors color table is used for optimizing colors used on palette-based +//! devices, and must contain the number of entries specified by the bV5ClrUsed member" +//! + For bit depth 16 and 32, it seems to imply that if the compression is BI_BITFIELDS +//! or BI_ALPHABITFIELDS, then the color table *only* consists of the bit masks, but +//! doesn't really say this outright and the Wikipedia article seems to disagree +//! For the purposes of this implementation, color tables can always be present for any +//! bit depth and compression, and the color table follows the header + any optional +//! bit mask fields dictated by the specified compression. + +const std = @import("std"); +const BitmapHeader = @import("ico.zig").BitmapHeader; + +pub const windows_format_id = std.mem.readIntNative(u16, "BM"); +pub const file_header_len = 14; + +pub const ReadError = error{ + UnexpectedEOF, + InvalidFileHeader, + ImpossiblePixelDataOffset, + UnknownBitmapVersion, + InvalidBitsPerPixel, + TooManyColorsInPalette, + MissingBitfieldMasks, +}; + +pub const BitmapInfo = struct { + dib_header_size: u32, + /// Contains the interpreted number of colors in the palette (e.g. + /// if the field's value is zero and the bit depth is <= 8, this + /// will contain the maximum number of colors for the bit depth + /// rather than the field's value directly). + colors_in_palette: u32, + bytes_per_color_palette_element: u8, + pixel_data_offset: u32, + compression: Compression, + + pub fn getExpectedPaletteByteLen(self: *const BitmapInfo) u64 { + return @as(u64, self.colors_in_palette) * self.bytes_per_color_palette_element; + } + + pub fn getActualPaletteByteLen(self: *const BitmapInfo) u64 { + return self.getByteLenBetweenHeadersAndPixels() - self.getBitmasksByteLen(); + } + + pub fn getByteLenBetweenHeadersAndPixels(self: *const BitmapInfo) u64 { + return @as(u64, self.pixel_data_offset) - self.dib_header_size - file_header_len; + } + + pub fn getBitmasksByteLen(self: *const BitmapInfo) u8 { + return switch (self.compression) { + .BI_BITFIELDS => 12, + .BI_ALPHABITFIELDS => 16, + else => 0, + }; + } + + pub fn getMissingPaletteByteLen(self: *const BitmapInfo) u64 { + if (self.getActualPaletteByteLen() >= self.getExpectedPaletteByteLen()) return 0; + return self.getExpectedPaletteByteLen() - self.getActualPaletteByteLen(); + } + + /// Returns the full byte len of the DIB header + optional bitmasks + color palette + pub fn getExpectedByteLenBeforePixelData(self: *const BitmapInfo) u64 { + return @as(u64, self.dib_header_size) + self.getBitmasksByteLen() + self.getExpectedPaletteByteLen(); + } + + /// Returns the full expected byte len + pub fn getExpectedByteLen(self: *const BitmapInfo, file_size: u64) u64 { + return self.getExpectedByteLenBeforePixelData() + self.getPixelDataLen(file_size); + } + + pub fn getPixelDataLen(self: *const BitmapInfo, file_size: u64) u64 { + return file_size - self.pixel_data_offset; + } +}; + +pub fn read(reader: anytype, max_size: u64) ReadError!BitmapInfo { + var bitmap_info: BitmapInfo = undefined; + const file_header = reader.readBytesNoEof(file_header_len) catch return error.UnexpectedEOF; + + const id = std.mem.readIntNative(u16, file_header[0..2]); + if (id != windows_format_id) return error.InvalidFileHeader; + + bitmap_info.pixel_data_offset = std.mem.readIntNative(u32, file_header[10..14]); + if (bitmap_info.pixel_data_offset > max_size) return error.ImpossiblePixelDataOffset; + + bitmap_info.dib_header_size = reader.readIntLittle(u32) catch return error.UnexpectedEOF; + if (bitmap_info.pixel_data_offset < file_header_len + bitmap_info.dib_header_size) return error.ImpossiblePixelDataOffset; + const dib_version = BitmapHeader.Version.get(bitmap_info.dib_header_size); + switch (dib_version) { + .@"nt3.1", .@"nt4.0", .@"nt5.0" => { + var dib_header_buf: [@sizeOf(BITMAPINFOHEADER)]u8 align(@alignOf(BITMAPINFOHEADER)) = undefined; + std.mem.writeIntLittle(u32, dib_header_buf[0..4], bitmap_info.dib_header_size); + reader.readNoEof(dib_header_buf[4..]) catch return error.UnexpectedEOF; + var dib_header: *BITMAPINFOHEADER = @ptrCast(&dib_header_buf); + structFieldsLittleToNative(BITMAPINFOHEADER, dib_header); + + bitmap_info.colors_in_palette = try dib_header.numColorsInTable(); + bitmap_info.bytes_per_color_palette_element = 4; + bitmap_info.compression = @enumFromInt(dib_header.biCompression); + + if (bitmap_info.getByteLenBetweenHeadersAndPixels() < bitmap_info.getBitmasksByteLen()) { + return error.MissingBitfieldMasks; + } + }, + .@"win2.0" => { + var dib_header_buf: [@sizeOf(BITMAPCOREHEADER)]u8 align(@alignOf(BITMAPCOREHEADER)) = undefined; + std.mem.writeIntLittle(u32, dib_header_buf[0..4], bitmap_info.dib_header_size); + reader.readNoEof(dib_header_buf[4..]) catch return error.UnexpectedEOF; + var dib_header: *BITMAPCOREHEADER = @ptrCast(&dib_header_buf); + structFieldsLittleToNative(BITMAPCOREHEADER, dib_header); + + // > The size of the color palette is calculated from the BitsPerPixel value. + // > The color palette has 2, 16, 256, or 0 entries for a BitsPerPixel of + // > 1, 4, 8, and 24, respectively. + bitmap_info.colors_in_palette = switch (dib_header.bcBitCount) { + inline 1, 4, 8 => |bit_count| 1 << bit_count, + 24 => 0, + else => return error.InvalidBitsPerPixel, + }; + bitmap_info.bytes_per_color_palette_element = 3; + + bitmap_info.compression = .BI_RGB; + }, + .unknown => return error.UnknownBitmapVersion, + } + + return bitmap_info; +} + +/// https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapcoreheader +pub const BITMAPCOREHEADER = extern struct { + bcSize: u32, + bcWidth: u16, + bcHeight: u16, + bcPlanes: u16, + bcBitCount: u16, +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader +pub const BITMAPINFOHEADER = extern struct { + bcSize: u32, + biWidth: i32, + biHeight: i32, + biPlanes: u16, + biBitCount: u16, + biCompression: u32, + biSizeImage: u32, + biXPelsPerMeter: i32, + biYPelsPerMeter: i32, + biClrUsed: u32, + biClrImportant: u32, + + /// Returns error.TooManyColorsInPalette if the number of colors specified + /// exceeds the number of possible colors referenced in the pixel data (i.e. + /// if 1 bit is used per pixel, then the color table can't have more than 2 colors + /// since any more couldn't possibly be indexed in the pixel data) + /// + /// Returns error.InvalidBitsPerPixel if the bit depth is not 1, 4, 8, 16, 24, or 32. + pub fn numColorsInTable(self: BITMAPINFOHEADER) !u32 { + switch (self.biBitCount) { + inline 1, 4, 8 => |bit_count| switch (self.biClrUsed) { + // > If biClrUsed is zero, the array contains the maximum number of + // > colors for the given bitdepth; that is, 2^biBitCount colors + 0 => return 1 << bit_count, + // > If biClrUsed is nonzero and the biBitCount member is less than 16, + // > the biClrUsed member specifies the actual number of colors the + // > graphics engine or device driver accesses. + else => { + const max_colors = 1 << bit_count; + if (self.biClrUsed > max_colors) { + return error.TooManyColorsInPalette; + } + return self.biClrUsed; + }, + }, + // > If biBitCount is 16 or greater, the biClrUsed member specifies + // > the size of the color table used to optimize performance of the + // > system color palettes. + // + // Note: Bit depths >= 16 only use the color table 'for optimizing colors + // used on palette-based devices', but it still makes sense to limit their + // colors since the pixel data is still limited to this number of colors + // (i.e. even though the color table is not indexed by the pixel data, + // the color table having more colors than the pixel data can represent + // would never make sense and indicates a malformed bitmap). + inline 16, 24, 32 => |bit_count| { + const max_colors = 1 << bit_count; + if (self.biClrUsed > max_colors) { + return error.TooManyColorsInPalette; + } + return self.biClrUsed; + }, + else => return error.InvalidBitsPerPixel, + } + } +}; + +pub const Compression = enum(u32) { + BI_RGB = 0, + BI_RLE8 = 1, + BI_RLE4 = 2, + BI_BITFIELDS = 3, + BI_JPEG = 4, + BI_PNG = 5, + BI_ALPHABITFIELDS = 6, + BI_CMYK = 11, + BI_CMYKRLE8 = 12, + BI_CMYKRLE4 = 13, + _, +}; + +fn structFieldsLittleToNative(comptime T: type, x: *T) void { + inline for (@typeInfo(T).Struct.fields) |field| { + @field(x, field.name) = std.mem.littleToNative(field.type, @field(x, field.name)); + } +} + +test "read" { + var bmp_data = "BM<\x00\x00\x00\x00\x00\x00\x006\x00\x00\x00(\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x10\x00\x00\x00\x00\x00\x06\x00\x00\x00\x12\x0b\x00\x00\x12\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x7f\x00\x00\x00\x00".*; + var fbs = std.io.fixedBufferStream(&bmp_data); + + { + const bitmap = try read(fbs.reader(), bmp_data.len); + try std.testing.expectEqual(@as(u32, BitmapHeader.Version.@"nt3.1".len()), bitmap.dib_header_size); + } + + { + fbs.reset(); + bmp_data[file_header_len] = 11; + try std.testing.expectError(error.UnknownBitmapVersion, read(fbs.reader(), bmp_data.len)); + + // restore + bmp_data[file_header_len] = BitmapHeader.Version.@"nt3.1".len(); + } + + { + fbs.reset(); + bmp_data[0] = 'b'; + try std.testing.expectError(error.InvalidFileHeader, read(fbs.reader(), bmp_data.len)); + + // restore + bmp_data[0] = 'B'; + } + + { + const cutoff_len = file_header_len + BitmapHeader.Version.@"nt3.1".len() - 1; + var dib_cutoff_fbs = std.io.fixedBufferStream(bmp_data[0..cutoff_len]); + try std.testing.expectError(error.UnexpectedEOF, read(dib_cutoff_fbs.reader(), bmp_data.len)); + } + + { + const cutoff_len = file_header_len - 1; + var bmp_cutoff_fbs = std.io.fixedBufferStream(bmp_data[0..cutoff_len]); + try std.testing.expectError(error.UnexpectedEOF, read(bmp_cutoff_fbs.reader(), bmp_data.len)); + } +} diff --git a/src/resinator/cli.zig b/src/resinator/cli.zig new file mode 100644 index 000000000000..2e244b878e48 --- /dev/null +++ b/src/resinator/cli.zig @@ -0,0 +1,1433 @@ +const std = @import("std"); +const CodePage = @import("code_pages.zig").CodePage; +const lang = @import("lang.zig"); +const res = @import("res.zig"); +const Allocator = std.mem.Allocator; +const lex = @import("lex.zig"); + +/// This is what /SL 100 will set the maximum string literal length to +pub const max_string_literal_length_100_percent = 8192; + +pub const usage_string = + \\Usage: resinator [options] [--] [] + \\ + \\The sequence -- can be used to signify when to stop parsing options. + \\This is necessary when the input path begins with a forward slash. + \\ + \\Supported Win32 RC Options: + \\ /?, /h Print this help and exit. + \\ /v Verbose (print progress messages). + \\ /d [=] Define a symbol (during preprocessing). + \\ /u Undefine a symbol (during preprocessing). + \\ /fo Specify output file path. + \\ /l Set default language using hexadecimal id (ex: 409). + \\ /ln Set default language using language name (ex: en-us). + \\ /i Add an include path. + \\ /x Ignore INCLUDE environment variable. + \\ /c Set default code page (ex: 65001). + \\ /w Warn on invalid code page in .rc (instead of error). + \\ /y Suppress warnings for duplicate control IDs. + \\ /n Null-terminate all strings in string tables. + \\ /sl Specify string literal length limit in percentage (1-100) + \\ where 100 corresponds to a limit of 8192. If the /sl + \\ option is not specified, the default limit is 4097. + \\ /p Only run the preprocessor and output a .rcpp file. + \\ + \\No-op Win32 RC Options: + \\ /nologo, /a, /r Options that are recognized but do nothing. + \\ + \\Unsupported Win32 RC Options: + \\ /fm, /q, /g, /gn, /g1, /g2 Unsupported MUI-related options. + \\ /?c, /hc, /t, /tp:, Unsupported LCX/LCE-related options. + \\ /tn, /tm, /tc, /tw, /te, + \\ /ti, /ta + \\ /z Unsupported font-substitution-related option. + \\ /s Unsupported HWB-related option. + \\ + \\Custom Options (resinator-specific): + \\ /:no-preprocess Do not run the preprocessor. + \\ /:debug Output the preprocessed .rc file and the parsed AST. + \\ /:auto-includes Set the automatic include path detection behavior. + \\ any (default) Use MSVC if available, fall back to MinGW + \\ msvc Use MSVC include paths (must be present on the system) + \\ gnu Use MinGW include paths (requires Zig as the preprocessor) + \\ none Do not use any autodetected include paths + \\ + \\Note: For compatibility reasons, all custom options start with : + \\ +; + +pub const Diagnostics = struct { + errors: std.ArrayListUnmanaged(ErrorDetails) = .{}, + allocator: Allocator, + + pub const ErrorDetails = struct { + arg_index: usize, + arg_span: ArgSpan = .{}, + msg: std.ArrayListUnmanaged(u8) = .{}, + type: Type = .err, + print_args: bool = true, + + pub const Type = enum { err, warning, note }; + pub const ArgSpan = struct { + point_at_next_arg: bool = false, + name_offset: usize = 0, + prefix_len: usize = 0, + value_offset: usize = 0, + name_len: usize = 0, + }; + }; + + pub fn init(allocator: Allocator) Diagnostics { + return .{ + .allocator = allocator, + }; + } + + pub fn deinit(self: *Diagnostics) void { + for (self.errors.items) |*details| { + details.msg.deinit(self.allocator); + } + self.errors.deinit(self.allocator); + } + + pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void { + try self.errors.append(self.allocator, error_details); + } + + pub fn renderToStdErr(self: *Diagnostics, args: []const []const u8, config: std.io.tty.Config) void { + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + const stderr = std.io.getStdErr().writer(); + self.renderToWriter(args, stderr, config) catch return; + } + + pub fn renderToWriter(self: *Diagnostics, args: []const []const u8, writer: anytype, config: std.io.tty.Config) !void { + for (self.errors.items) |err_details| { + try renderErrorMessage(writer, config, err_details, args); + } + } + + pub fn hasError(self: *const Diagnostics) bool { + for (self.errors.items) |err| { + if (err.type == .err) return true; + } + return false; + } +}; + +pub const Options = struct { + allocator: Allocator, + input_filename: []const u8 = &[_]u8{}, + output_filename: []const u8 = &[_]u8{}, + extra_include_paths: std.ArrayListUnmanaged([]const u8) = .{}, + ignore_include_env_var: bool = false, + preprocess: Preprocess = .yes, + default_language_id: ?u16 = null, + default_code_page: ?CodePage = null, + verbose: bool = false, + symbols: std.StringArrayHashMapUnmanaged(SymbolValue) = .{}, + null_terminate_string_table_strings: bool = false, + max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints, + silent_duplicate_control_ids: bool = false, + warn_instead_of_error_on_invalid_code_page: bool = false, + debug: bool = false, + print_help_and_exit: bool = false, + auto_includes: AutoIncludes = .any, + + pub const AutoIncludes = enum { any, msvc, gnu, none }; + pub const Preprocess = enum { no, yes, only }; + pub const SymbolAction = enum { define, undefine }; + pub const SymbolValue = union(SymbolAction) { + define: []const u8, + undefine: void, + + pub fn deinit(self: SymbolValue, allocator: Allocator) void { + switch (self) { + .define => |value| allocator.free(value), + .undefine => {}, + } + } + }; + + /// Does not check that identifier contains only valid characters + pub fn define(self: *Options, identifier: []const u8, value: []const u8) !void { + if (self.symbols.getPtr(identifier)) |val_ptr| { + // If the symbol is undefined, then that always takes precedence so + // we shouldn't change anything. + if (val_ptr.* == .undefine) return; + // Otherwise, the new value takes precedence. + var duped_value = try self.allocator.dupe(u8, value); + errdefer self.allocator.free(duped_value); + val_ptr.deinit(self.allocator); + val_ptr.* = .{ .define = duped_value }; + return; + } + var duped_key = try self.allocator.dupe(u8, identifier); + errdefer self.allocator.free(duped_key); + var duped_value = try self.allocator.dupe(u8, value); + errdefer self.allocator.free(duped_value); + try self.symbols.put(self.allocator, duped_key, .{ .define = duped_value }); + } + + /// Does not check that identifier contains only valid characters + pub fn undefine(self: *Options, identifier: []const u8) !void { + if (self.symbols.getPtr(identifier)) |action| { + action.deinit(self.allocator); + action.* = .{ .undefine = {} }; + return; + } + var duped_key = try self.allocator.dupe(u8, identifier); + errdefer self.allocator.free(duped_key); + try self.symbols.put(self.allocator, duped_key, .{ .undefine = {} }); + } + + /// If the current input filename both: + /// - does not have an extension, and + /// - does not exist in the cwd + /// then this function will append `.rc` to the input filename + /// + /// Note: This behavior is different from the Win32 compiler. + /// It always appends .RC if the filename does not have + /// a `.` in it and it does not even try the verbatim name + /// in that scenario. + /// + /// The approach taken here is meant to give us a 'best of both + /// worlds' situation where we'll be compatible with most use-cases + /// of the .rc extension being omitted from the CLI args, but still + /// work fine if the file itself does not have an extension. + pub fn maybeAppendRC(options: *Options, cwd: std.fs.Dir) !void { + if (std.fs.path.extension(options.input_filename).len == 0) { + cwd.access(options.input_filename, .{}) catch |err| switch (err) { + error.FileNotFound => { + var filename_bytes = try options.allocator.alloc(u8, options.input_filename.len + 3); + std.mem.copy(u8, filename_bytes, options.input_filename); + std.mem.copy(u8, filename_bytes[filename_bytes.len - 3 ..], ".rc"); + options.allocator.free(options.input_filename); + options.input_filename = filename_bytes; + }, + else => {}, + }; + } + } + + pub fn deinit(self: *Options) void { + for (self.extra_include_paths.items) |extra_include_path| { + self.allocator.free(extra_include_path); + } + self.extra_include_paths.deinit(self.allocator); + self.allocator.free(self.input_filename); + self.allocator.free(self.output_filename); + var symbol_it = self.symbols.iterator(); + while (symbol_it.next()) |entry| { + self.allocator.free(entry.key_ptr.*); + entry.value_ptr.deinit(self.allocator); + } + self.symbols.deinit(self.allocator); + } + + pub fn dumpVerbose(self: *const Options, writer: anytype) !void { + try writer.print("Input filename: {s}\n", .{self.input_filename}); + try writer.print("Output filename: {s}\n", .{self.output_filename}); + if (self.extra_include_paths.items.len > 0) { + try writer.writeAll(" Extra include paths:\n"); + for (self.extra_include_paths.items) |extra_include_path| { + try writer.print(" \"{s}\"\n", .{extra_include_path}); + } + } + if (self.ignore_include_env_var) { + try writer.writeAll(" The INCLUDE environment variable will be ignored\n"); + } + if (self.preprocess == .no) { + try writer.writeAll(" The preprocessor will not be invoked\n"); + } else if (self.preprocess == .only) { + try writer.writeAll(" Only the preprocessor will be invoked\n"); + } + if (self.symbols.count() > 0) { + try writer.writeAll(" Symbols:\n"); + var it = self.symbols.iterator(); + while (it.next()) |symbol| { + try writer.print(" {s} {s}", .{ switch (symbol.value_ptr.*) { + .define => "#define", + .undefine => "#undef", + }, symbol.key_ptr.* }); + if (symbol.value_ptr.* == .define) { + try writer.print(" {s}", .{symbol.value_ptr.define}); + } + try writer.writeAll("\n"); + } + } + if (self.null_terminate_string_table_strings) { + try writer.writeAll(" Strings in string tables will be null-terminated\n"); + } + if (self.max_string_literal_codepoints != lex.default_max_string_literal_codepoints) { + try writer.print(" Max string literal length: {}\n", .{self.max_string_literal_codepoints}); + } + if (self.silent_duplicate_control_ids) { + try writer.writeAll(" Duplicate control IDs will not emit warnings\n"); + } + if (self.silent_duplicate_control_ids) { + try writer.writeAll(" Invalid code page in .rc will produce a warning (instead of an error)\n"); + } + + const language_id = self.default_language_id orelse res.Language.default; + const language_name = language_name: { + if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| { + break :language_name @tagName(lang_enum_val); + } else |_| {} + if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) { + break :language_name "LOCALE_CUSTOM_UNSPECIFIED"; + } + break :language_name ""; + }; + try writer.print("Default language: {s} (id=0x{x})\n", .{ language_name, language_id }); + + const code_page = self.default_code_page orelse .windows1252; + try writer.print("Default codepage: {s} (id={})\n", .{ @tagName(code_page), @intFromEnum(code_page) }); + } +}; + +pub const Arg = struct { + prefix: enum { long, short, slash }, + name_offset: usize, + full: []const u8, + + pub fn fromString(str: []const u8) ?@This() { + if (std.mem.startsWith(u8, str, "--")) { + return .{ .prefix = .long, .name_offset = 2, .full = str }; + } else if (std.mem.startsWith(u8, str, "-")) { + return .{ .prefix = .short, .name_offset = 1, .full = str }; + } else if (std.mem.startsWith(u8, str, "/")) { + return .{ .prefix = .slash, .name_offset = 1, .full = str }; + } + return null; + } + + pub fn prefixSlice(self: Arg) []const u8 { + return self.full[0..(if (self.prefix == .long) 2 else 1)]; + } + + pub fn name(self: Arg) []const u8 { + return self.full[self.name_offset..]; + } + + pub fn optionWithoutPrefix(self: Arg, option_len: usize) []const u8 { + return self.name()[0..option_len]; + } + + pub fn missingSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan { + return .{ + .point_at_next_arg = true, + .value_offset = 0, + .name_offset = self.name_offset, + .prefix_len = self.prefixSlice().len, + }; + } + + pub fn optionAndAfterSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan { + return self.optionSpan(0); + } + + pub fn optionSpan(self: Arg, option_len: usize) Diagnostics.ErrorDetails.ArgSpan { + return .{ + .name_offset = self.name_offset, + .prefix_len = self.prefixSlice().len, + .name_len = option_len, + }; + } + + pub const Value = struct { + slice: []const u8, + index_increment: u2 = 1, + + pub fn argSpan(self: Value, arg: Arg) Diagnostics.ErrorDetails.ArgSpan { + const prefix_len = arg.prefixSlice().len; + switch (self.index_increment) { + 1 => return .{ + .value_offset = @intFromPtr(self.slice.ptr) - @intFromPtr(arg.full.ptr), + .prefix_len = prefix_len, + .name_offset = arg.name_offset, + }, + 2 => return .{ + .point_at_next_arg = true, + .prefix_len = prefix_len, + .name_offset = arg.name_offset, + }, + else => unreachable, + } + } + + pub fn index(self: Value, arg_index: usize) usize { + if (self.index_increment == 2) return arg_index + 1; + return arg_index; + } + }; + + pub fn value(self: Arg, option_len: usize, index: usize, args: []const []const u8) error{MissingValue}!Value { + const rest = self.full[self.name_offset + option_len ..]; + if (rest.len > 0) return .{ .slice = rest }; + if (index + 1 >= args.len) return error.MissingValue; + return .{ .slice = args[index + 1], .index_increment = 2 }; + } + + pub const Context = struct { + index: usize, + arg: Arg, + value: Value, + }; +}; + +pub const ParseError = error{ParseError} || Allocator.Error; + +/// Note: Does not run `Options.maybeAppendRC` automatically. If that behavior is desired, +/// it must be called separately. +pub fn parse(allocator: Allocator, args: []const []const u8, diagnostics: *Diagnostics) ParseError!Options { + var options = Options{ .allocator = allocator }; + errdefer options.deinit(); + + var output_filename: ?[]const u8 = null; + var output_filename_context: Arg.Context = undefined; + + var arg_i: usize = 1; // start at 1 to skip past the exe name + next_arg: while (arg_i < args.len) { + var arg = Arg.fromString(args[arg_i]) orelse break; + if (arg.name().len == 0) { + switch (arg.prefix) { + // -- on its own ends arg parsing + .long => { + arg_i += 1; + break; + }, + // - or / on its own is an error + else => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid option: {s}", .{arg.prefixSlice()}); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + }, + } + } + + while (arg.name().len > 0) { + const arg_name = arg.name(); + // Note: These cases should be in order from longest to shortest, since + // shorter options that are a substring of a longer one could make + // the longer option's branch unreachable. + if (std.ascii.startsWithIgnoreCase(arg_name, ":no-preprocess")) { + options.preprocess = .no; + arg.name_offset += ":no-preprocess".len; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":auto-includes")) { + const value = arg.value(":auto-includes".len, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":auto-includes".len) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + options.auto_includes = std.meta.stringToEnum(Options.AutoIncludes, value.slice) orelse blk: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid auto includes setting: {s} ", .{value.slice}); + try diagnostics.append(err_details); + break :blk options.auto_includes; + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "nologo")) { + // No-op, we don't display any 'logo' to suppress + arg.name_offset += "nologo".len; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":debug")) { + options.debug = true; + arg.name_offset += ":debug".len; + } + // Unsupported LCX/LCE options that need a value (within the same arg only) + else if (std.ascii.startsWithIgnoreCase(arg_name, "tp:")) { + const rest = arg.full[arg.name_offset + 3 ..]; + if (rest.len == 0) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = .{ + .name_offset = arg.name_offset, + .prefix_len = arg.prefixSlice().len, + .value_offset = arg.name_offset + 3, + } }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value for {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) }); + try diagnostics.append(err_details); + } + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) }); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + } + // Unsupported LCX/LCE options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "tn")) { + const value = arg.value(2, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 2; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Unsupported MUI options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "fm") or + std.ascii.startsWithIgnoreCase(arg_name, "gn") or + std.ascii.startsWithIgnoreCase(arg_name, "g2")) + { + const value = arg.value(2, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 2; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Unsupported MUI options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "g1")) { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg.name_offset += 2; + } + // Unsupported LCX/LCE options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "tm") or + std.ascii.startsWithIgnoreCase(arg_name, "tc") or + std.ascii.startsWithIgnoreCase(arg_name, "tw") or + std.ascii.startsWithIgnoreCase(arg_name, "te") or + std.ascii.startsWithIgnoreCase(arg_name, "ti") or + std.ascii.startsWithIgnoreCase(arg_name, "ta")) + { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg.name_offset += 2; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "fo")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing output path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + output_filename_context = .{ .index = arg_i, .arg = arg, .value = value }; + output_filename = value.slice; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "sl")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const percent_str = value.slice; + const percent: u32 = parsePercent(percent_str) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid percent format '{s}'", .{percent_str}); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)"); + try diagnostics.append(note_details); + arg_i += value.index_increment; + continue :next_arg; + }; + if (percent == 0 or percent > 100) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("percent out of range: {} (parsed from '{s}')", .{ percent, percent_str }); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)"); + try diagnostics.append(note_details); + arg_i += value.index_increment; + continue :next_arg; + } + const percent_float = @as(f32, @floatFromInt(percent)) / 100; + options.max_string_literal_codepoints = @intFromFloat(percent_float * max_string_literal_length_100_percent); + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "ln")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const tag = value.slice; + options.default_language_id = lang.tagToInt(tag) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid language tag: {s}", .{tag}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + if (options.default_language_id.? == lang.LOCALE_CUSTOM_UNSPECIFIED) { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("language tag '{s}' does not have an assigned ID so it will be resolved to LOCALE_CUSTOM_UNSPECIFIED (id=0x{x})", .{ tag, lang.LOCALE_CUSTOM_UNSPECIFIED }); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "l")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const num_str = value.slice; + options.default_language_id = lang.parseInt(num_str) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid language ID: {s}", .{num_str}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "h") or std.mem.startsWith(u8, arg_name, "?")) { + options.print_help_and_exit = true; + // If there's been an error to this point, then we still want to fail + if (diagnostics.hasError()) return error.ParseError; + return options; + } + // 1 char unsupported MUI options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "q") or + std.ascii.startsWithIgnoreCase(arg_name, "g")) + { + const value = arg.value(1, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 1; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Undocumented (and unsupported) options that need a value + // /z has to do something with font substitution + // /s has something to do with HWB resources being inserted into the .res + else if (std.ascii.startsWithIgnoreCase(arg_name, "z") or + std.ascii.startsWithIgnoreCase(arg_name, "s")) + { + const value = arg.value(1, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 1; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // 1 char unsupported LCX/LCE options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "t")) { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(1) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "c")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing code page ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const num_str = value.slice; + const code_page_id = std.fmt.parseUnsigned(u16, num_str, 10) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid code page ID: {s}", .{num_str}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + options.default_code_page = CodePage.getByIdentifierEnsureSupported(code_page_id) catch |err| switch (err) { + error.InvalidCodePage => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid or unknown code page ID: {}", .{code_page_id}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }, + error.UnsupportedCodePage => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("unsupported code page: {s} (id={})", .{ + @tagName(CodePage.getByIdentifier(code_page_id) catch unreachable), + code_page_id, + }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }, + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "v")) { + options.verbose = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "x")) { + options.ignore_include_env_var = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "p")) { + options.preprocess = .only; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "i")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing include path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const path = value.slice; + const duped = try allocator.dupe(u8, path); + errdefer allocator.free(duped); + try options.extra_include_paths.append(options.allocator, duped); + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "r")) { + // From https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line- + // "Ignored. Provided for compatibility with existing makefiles." + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "n")) { + options.null_terminate_string_table_strings = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "y")) { + options.silent_duplicate_control_ids = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "w")) { + options.warn_instead_of_error_on_invalid_code_page = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "a")) { + // Undocumented option with unknown function + // TODO: More investigation to figure out what it does (if anything) + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = arg.optionSpan(1) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("option {s}{s} has no effect (it is undocumented and its function is unknown in the Win32 RC compiler)", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "d")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing symbol to define after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + var tokenizer = std.mem.tokenize(u8, value.slice, "="); + // guaranteed to exist since an empty value.slice would invoke + // the 'missing symbol to define' branch above + const symbol = tokenizer.next().?; + const symbol_value = tokenizer.next() orelse "1"; + + if (isValidIdentifier(symbol)) { + try options.define(symbol, symbol_value); + } else { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be defined", .{symbol}); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "u")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing symbol to undefine after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const symbol = value.slice; + if (isValidIdentifier(symbol)) { + try options.undefine(symbol); + } else { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be undefined", .{symbol}); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid option: {s}{s}", .{ arg.prefixSlice(), arg.name() }); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + } + } else { + // The while loop exited via its conditional, meaning we are done with + // the current arg and can move on the the next + arg_i += 1; + continue; + } + } + + var positionals = args[arg_i..]; + + if (positionals.len < 1) { + var err_details = Diagnostics.ErrorDetails{ .print_args = false, .arg_index = arg_i }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.writeAll("missing input filename"); + try diagnostics.append(err_details); + + const last_arg = args[args.len - 1]; + if (arg_i > 1 and last_arg.len > 0 and last_arg[0] == '/' and std.ascii.endsWithIgnoreCase(last_arg, ".rc")) { + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = true, .arg_index = arg_i - 1 }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing"); + try diagnostics.append(note_details); + } + + // This is a fatal enough problem to justify an early return, since + // things after this rely on the value of the input filename. + return error.ParseError; + } + options.input_filename = try allocator.dupe(u8, positionals[0]); + + if (positionals.len > 1) { + if (output_filename != null) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i + 1 }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.writeAll("output filename already specified"); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ + .type = .note, + .arg_index = output_filename_context.value.index(output_filename_context.index), + .arg_span = output_filename_context.value.argSpan(output_filename_context.arg), + }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("output filename previously specified here"); + try diagnostics.append(note_details); + } else { + output_filename = positionals[1]; + } + } + if (output_filename == null) { + var buf = std.ArrayList(u8).init(allocator); + errdefer buf.deinit(); + + if (std.fs.path.dirname(options.input_filename)) |dirname| { + var end_pos = dirname.len; + // We want to ensure that we write a path separator at the end, so if the dirname + // doesn't end with a path sep then include the char after the dirname + // which must be a path sep. + if (!std.fs.path.isSep(dirname[dirname.len - 1])) end_pos += 1; + try buf.appendSlice(options.input_filename[0..end_pos]); + } + try buf.appendSlice(std.fs.path.stem(options.input_filename)); + if (options.preprocess == .only) { + try buf.appendSlice(".rcpp"); + } else { + try buf.appendSlice(".res"); + } + + options.output_filename = try buf.toOwnedSlice(); + } else { + options.output_filename = try allocator.dupe(u8, output_filename.?); + } + + if (diagnostics.hasError()) { + return error.ParseError; + } + + return options; +} + +/// Returns true if the str is a valid C identifier for use in a #define/#undef macro +pub fn isValidIdentifier(str: []const u8) bool { + for (str, 0..) |c, i| switch (c) { + '0'...'9' => if (i == 0) return false, + 'a'...'z', 'A'...'Z', '_' => {}, + else => return false, + }; + return true; +} + +/// This function is specific to how the Win32 RC command line interprets +/// max string literal length percent. +/// - Wraps on overflow of u32 +/// - Stops parsing on any invalid hexadecimal digits +/// - Errors if a digit is not the first char +/// - `-` (negative) prefix is allowed +pub fn parsePercent(str: []const u8) error{InvalidFormat}!u32 { + var result: u32 = 0; + const radix: u8 = 10; + var buf = str; + + const Prefix = enum { none, minus }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + else => {}, + } + + for (buf, 0..) |c, i| { + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + '0'...'9' => std.fmt.charToDigit(c, radix) catch break, + else => { + // First digit must be valid + if (i == 0) { + return error.InvalidFormat; + } + break; + }, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result = 0 -% result, + } + + return result; +} + +test parsePercent { + try std.testing.expectEqual(@as(u32, 16), try parsePercent("16")); + try std.testing.expectEqual(@as(u32, 0), try parsePercent("0x1A")); + try std.testing.expectEqual(@as(u32, 0x1), try parsePercent("1zzzz")); + try std.testing.expectEqual(@as(u32, 0xffffffff), try parsePercent("-1")); + try std.testing.expectEqual(@as(u32, 0xfffffff0), try parsePercent("-16")); + try std.testing.expectEqual(@as(u32, 1), try parsePercent("4294967297")); + try std.testing.expectError(error.InvalidFormat, parsePercent("--1")); + try std.testing.expectError(error.InvalidFormat, parsePercent("ha")); + try std.testing.expectError(error.InvalidFormat, parsePercent("¹")); + try std.testing.expectError(error.InvalidFormat, parsePercent("~1")); +} + +pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, err_details: Diagnostics.ErrorDetails, args: []const []const u8) !void { + try config.setColor(writer, .dim); + try writer.writeAll(""); + try config.setColor(writer, .reset); + try config.setColor(writer, .bold); + try writer.writeAll(": "); + switch (err_details.type) { + .err => { + try config.setColor(writer, .red); + try writer.writeAll("error: "); + }, + .warning => { + try config.setColor(writer, .yellow); + try writer.writeAll("warning: "); + }, + .note => { + try config.setColor(writer, .cyan); + try writer.writeAll("note: "); + }, + } + try config.setColor(writer, .reset); + try config.setColor(writer, .bold); + try writer.writeAll(err_details.msg.items); + try writer.writeByte('\n'); + try config.setColor(writer, .reset); + + if (!err_details.print_args) { + try writer.writeByte('\n'); + return; + } + + try config.setColor(writer, .dim); + const prefix = " ... "; + try writer.writeAll(prefix); + try config.setColor(writer, .reset); + + const arg_with_name = args[err_details.arg_index]; + const prefix_slice = arg_with_name[0..err_details.arg_span.prefix_len]; + const before_name_slice = arg_with_name[err_details.arg_span.prefix_len..err_details.arg_span.name_offset]; + var name_slice = arg_with_name[err_details.arg_span.name_offset..]; + if (err_details.arg_span.name_len > 0) name_slice.len = err_details.arg_span.name_len; + const after_name_slice = arg_with_name[err_details.arg_span.name_offset + name_slice.len ..]; + + try writer.writeAll(prefix_slice); + if (before_name_slice.len > 0) { + try config.setColor(writer, .dim); + try writer.writeAll(before_name_slice); + try config.setColor(writer, .reset); + } + try writer.writeAll(name_slice); + if (after_name_slice.len > 0) { + try config.setColor(writer, .dim); + try writer.writeAll(after_name_slice); + try config.setColor(writer, .reset); + } + + var next_arg_len: usize = 0; + if (err_details.arg_span.point_at_next_arg and err_details.arg_index + 1 < args.len) { + const next_arg = args[err_details.arg_index + 1]; + try writer.writeByte(' '); + try writer.writeAll(next_arg); + next_arg_len = next_arg.len; + } + + const last_shown_arg_index = if (err_details.arg_span.point_at_next_arg) err_details.arg_index + 1 else err_details.arg_index; + if (last_shown_arg_index + 1 < args.len) { + // special case for when pointing to a missing value within the same arg + // as the name + if (err_details.arg_span.value_offset >= arg_with_name.len) { + try writer.writeByte(' '); + } + try config.setColor(writer, .dim); + try writer.writeAll(" ..."); + try config.setColor(writer, .reset); + } + try writer.writeByte('\n'); + + try config.setColor(writer, .green); + try writer.writeByteNTimes(' ', prefix.len); + // Special case for when the option is *only* a prefix (e.g. invalid option: -) + if (err_details.arg_span.prefix_len == arg_with_name.len) { + try writer.writeByteNTimes('^', err_details.arg_span.prefix_len); + } else { + try writer.writeByteNTimes('~', err_details.arg_span.prefix_len); + try writer.writeByteNTimes(' ', err_details.arg_span.name_offset - err_details.arg_span.prefix_len); + if (!err_details.arg_span.point_at_next_arg and err_details.arg_span.value_offset == 0) { + try writer.writeByte('^'); + try writer.writeByteNTimes('~', name_slice.len - 1); + } else if (err_details.arg_span.value_offset > 0) { + try writer.writeByteNTimes('~', err_details.arg_span.value_offset - err_details.arg_span.name_offset); + try writer.writeByte('^'); + if (err_details.arg_span.value_offset < arg_with_name.len) { + try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.value_offset - 1); + } + } else if (err_details.arg_span.point_at_next_arg) { + try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.name_offset + 1); + try writer.writeByte('^'); + if (next_arg_len > 0) { + try writer.writeByteNTimes('~', next_arg_len - 1); + } + } + } + try writer.writeByte('\n'); + try config.setColor(writer, .reset); +} + +fn testParse(args: []const []const u8) !Options { + return (try testParseOutput(args, "")).?; +} + +fn testParseWarning(args: []const []const u8, expected_output: []const u8) !Options { + return (try testParseOutput(args, expected_output)).?; +} + +fn testParseError(args: []const []const u8, expected_output: []const u8) !void { + var maybe_options = try testParseOutput(args, expected_output); + if (maybe_options != null) { + std.debug.print("expected error, got options: {}\n", .{maybe_options.?}); + maybe_options.?.deinit(); + return error.TestExpectedError; + } +} + +fn testParseOutput(args: []const []const u8, expected_output: []const u8) !?Options { + var diagnostics = Diagnostics.init(std.testing.allocator); + defer diagnostics.deinit(); + + var output = std.ArrayList(u8).init(std.testing.allocator); + defer output.deinit(); + + var options = parse(std.testing.allocator, args, &diagnostics) catch |err| switch (err) { + error.ParseError => { + try diagnostics.renderToWriter(args, output.writer(), .no_color); + try std.testing.expectEqualStrings(expected_output, output.items); + return null; + }, + else => |e| return e, + }; + errdefer options.deinit(); + + try diagnostics.renderToWriter(args, output.writer(), .no_color); + try std.testing.expectEqualStrings(expected_output, output.items); + return options; +} + +test "parse errors: basic" { + try testParseError(&.{ "foo.exe", "/" }, + \\: error: invalid option: / + \\ ... / + \\ ^ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/ln" }, + \\: error: missing language tag after /ln option + \\ ... /ln + \\ ~~~~^ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "-vln" }, + \\: error: missing language tag after -ln option + \\ ... -vln + \\ ~ ~~~^ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/_not-an-option" }, + \\: error: invalid option: /_not-an-option + \\ ... /_not-an-option + \\ ~^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "-_not-an-option" }, + \\: error: invalid option: -_not-an-option + \\ ... -_not-an-option + \\ ~^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "--_not-an-option" }, + \\: error: invalid option: --_not-an-option + \\ ... --_not-an-option + \\ ~~^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/v_not-an-option" }, + \\: error: invalid option: /_not-an-option + \\ ... /v_not-an-option + \\ ~ ^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "-v_not-an-option" }, + \\: error: invalid option: -_not-an-option + \\ ... -v_not-an-option + \\ ~ ^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "--v_not-an-option" }, + \\: error: invalid option: --_not-an-option + \\ ... --v_not-an-option + \\ ~~ ^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/some/absolute/path/parsed/as/an/option.rc" }, + \\: error: the /s option is unsupported + \\ ... /some/absolute/path/parsed/as/an/option.rc + \\ ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\: note: if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing + \\ ... /some/absolute/path/parsed/as/an/option.rc + \\ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + \\ + ); +} + +test "parse errors: /ln" { + try testParseError(&.{ "foo.exe", "/ln", "invalid", "foo.rc" }, + \\: error: invalid language tag: invalid + \\ ... /ln invalid ... + \\ ~~~~^~~~~~~ + \\ + ); + try testParseError(&.{ "foo.exe", "/lninvalid", "foo.rc" }, + \\: error: invalid language tag: invalid + \\ ... /lninvalid ... + \\ ~~~^~~~~~~ + \\ + ); +} + +test "parse: options" { + { + var options = try testParse(&.{ "foo.exe", "/v", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "foo.exe", "/vx", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "foo.exe", "/xv", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "foo.exe", "/xvFObar.res", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("bar.res", options.output_filename); + } +} + +test "parse: define and undefine" { + { + var options = try testParse(&.{ "foo.exe", "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.define, action); + try std.testing.expectEqualStrings("1", action.define); + } + { + var options = try testParse(&.{ "foo.exe", "/dfoo=bar", "/dfoo=baz", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.define, action); + try std.testing.expectEqualStrings("baz", action.define); + } + { + var options = try testParse(&.{ "foo.exe", "/ufoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Once undefined, future defines are ignored + var options = try testParse(&.{ "foo.exe", "/ufoo", "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Undefined always takes precedence + var options = try testParse(&.{ "foo.exe", "/dfoo", "/ufoo", "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Warn + ignore invalid identifiers + var options = try testParseWarning( + &.{ "foo.exe", "/dfoo bar", "/u", "0leadingdigit", "foo.rc" }, + \\: warning: symbol "foo bar" is not a valid identifier and therefore cannot be defined + \\ ... /dfoo bar ... + \\ ~~^~~~~~~ + \\: warning: symbol "0leadingdigit" is not a valid identifier and therefore cannot be undefined + \\ ... /u 0leadingdigit ... + \\ ~~~^~~~~~~~~~~~~ + \\ + , + ); + defer options.deinit(); + + try std.testing.expectEqual(@as(usize, 0), options.symbols.count()); + } +} + +test "parse: /sl" { + try testParseError(&.{ "foo.exe", "/sl", "0", "foo.rc" }, + \\: error: percent out of range: 0 (parsed from '0') + \\ ... /sl 0 ... + \\ ~~~~^ + \\: note: string length percent must be an integer between 1 and 100 (inclusive) + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/sl", "abcd", "foo.rc" }, + \\: error: invalid percent format 'abcd' + \\ ... /sl abcd ... + \\ ~~~~^~~~ + \\: note: string length percent must be an integer between 1 and 100 (inclusive) + \\ + \\ + ); + { + var options = try testParse(&.{ "foo.exe", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, lex.default_max_string_literal_codepoints), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "foo.exe", "/sl100", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, max_string_literal_length_100_percent), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "foo.exe", "-SL33", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, 2703), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "foo.exe", "/sl15", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, 1228), options.max_string_literal_codepoints); + } +} + +test "parse: unsupported MUI-related options" { + try testParseError(&.{ "foo.exe", "/q", "blah", "/g1", "-G2", "blah", "/fm", "blah", "/g", "blah", "foo.rc" }, + \\: error: the /q option is unsupported + \\ ... /q ... + \\ ~^ + \\: error: the /g1 option is unsupported + \\ ... /g1 ... + \\ ~^~ + \\: error: the -G2 option is unsupported + \\ ... -G2 ... + \\ ~^~ + \\: error: the /fm option is unsupported + \\ ... /fm ... + \\ ~^~ + \\: error: the /g option is unsupported + \\ ... /g ... + \\ ~^ + \\ + ); +} + +test "parse: unsupported LCX/LCE-related options" { + try testParseError(&.{ "foo.exe", "/t", "/tp:", "/tp:blah", "/tm", "/tc", "/tw", "-TEti", "/ta", "/tn", "blah", "foo.rc" }, + \\: error: the /t option is unsupported + \\ ... /t ... + \\ ~^ + \\: error: missing value for /tp: option + \\ ... /tp: ... + \\ ~~~~^ + \\: error: the /tp: option is unsupported + \\ ... /tp: ... + \\ ~^~~ + \\: error: the /tp: option is unsupported + \\ ... /tp:blah ... + \\ ~^~~~~~~ + \\: error: the /tm option is unsupported + \\ ... /tm ... + \\ ~^~ + \\: error: the /tc option is unsupported + \\ ... /tc ... + \\ ~^~ + \\: error: the /tw option is unsupported + \\ ... /tw ... + \\ ~^~ + \\: error: the -TE option is unsupported + \\ ... -TEti ... + \\ ~^~ + \\: error: the -ti option is unsupported + \\ ... -TEti ... + \\ ~ ^~ + \\: error: the /ta option is unsupported + \\ ... /ta ... + \\ ~^~ + \\: error: the /tn option is unsupported + \\ ... /tn ... + \\ ~^~ + \\ + ); +} + +test "maybeAppendRC" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + var options = try testParse(&.{ "foo.exe", "foo" }); + defer options.deinit(); + try std.testing.expectEqualStrings("foo", options.input_filename); + + // Create the file so that it's found. In this scenario, .rc should not get + // appended. + var file = try tmp.dir.createFile("foo", .{}); + file.close(); + try options.maybeAppendRC(tmp.dir); + try std.testing.expectEqualStrings("foo", options.input_filename); + + // Now delete the file and try again. Since the verbatim name is no longer found + // and the input filename does not have an extension, .rc should get appended. + try tmp.dir.deleteFile("foo"); + try options.maybeAppendRC(tmp.dir); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); +} diff --git a/src/resinator/code_pages.zig b/src/resinator/code_pages.zig new file mode 100644 index 000000000000..4b9a87ce7a55 --- /dev/null +++ b/src/resinator/code_pages.zig @@ -0,0 +1,487 @@ +const std = @import("std"); +const windows1252 = @import("windows1252.zig"); + +// TODO: Parts of this comment block may be more relevant to string/NameOrOrdinal parsing +// than it is to the stuff in this file. +// +// ‰ representations for context: +// Win-1252 89 +// UTF-8 E2 80 B0 +// UTF-16 20 30 +// +// With code page 65001: +// ‰ RCDATA { "‰" L"‰" } +// File encoded as Windows-1252: +// ‰ => as u16 +// "‰" => 0x3F ('?') +// L"‰" => as u16 +// File encoded as UTF-8: +// ‰ => as u16 +// "‰" => 0x89 ('‰' encoded as Windows-1252) +// L"‰" => as u16 +// +// With code page 1252: +// ‰ RCDATA { "‰" L"‰" } +// File encoded as Windows-1252: +// ‰ => as u16 +// "‰" => 0x89 ('‰' encoded as Windows-1252) +// L"‰" => as u16 +// File encoded as UTF-8: +// ‰ => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 +// ^ first byte of utf8 representation +// ^ second byte of UTF-8 representation (0x80), but interpretted as +// Windows-1252 ('€') and then converted to UTF-16 () +// ^ third byte of utf8 representation +// "‰" => 0xE2, 0x80, 0xB0 (the bytes of the UTF-8 representation) +// L"‰" => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 (see '‰ =>' explanation) +// +// With code page 1252: +// <0x90> RCDATA { "<0x90>" L"<0x90>" } +// File encoded as Windows-1252: +// <0x90> => 0x90 as u16 +// "<0x90>" => 0x90 +// L"<0x90>" => 0x90 as u16 +// File encoded as UTF-8: +// <0x90> => 0xC2 as u16, 0x90 as u16 +// "<0x90>" => 0xC2, 0x90 (the bytes of the UTF-8 representation of ) +// L"<0x90>" => 0xC2 as u16, 0x90 as u16 +// +// Within a raw data block, file encoded as Windows-1252 ( is <0xC2>): +// "Âa" L"Âa" "\xC2ad" L"\xC2AD" +// With code page 1252: +// C2 61 C2 00 61 00 C2 61 64 AD C2 +// Â^ a^ Â~~~^ a~~~^ .^ a^ d^ ^~~~~\xC2AD +// \xC2~` +// With code page 65001: +// 3F 61 FD FF 61 00 C2 61 64 AD C2 +// ^. a^ ^~~~. a~~~^ ^. a^ d^ ^~~~~\xC2AD +// `. `. `~\xC2 +// `. `.~<0xC2>a is not well-formed UTF-8 (0xC2 expects a continutation byte after it). +// `. Because 'a' is a valid first byte of a UTF-8 sequence, it is not included in the +// `. invalid sequence so only the <0xC2> gets converted to . +// `~Same as ^ but converted to '?' instead. +// +// Within a raw data block, file encoded as Windows-1252 (ð is <0xF0>, € is <0x80>): +// "ð€a" L"ð€a" +// With code page 1252: +// F0 80 61 F0 00 AC 20 61 00 +// ð^ €^ a^ ð~~~^ €~~~^ a~~~^ +// With code page 65001: +// 3F 61 FD FF 61 00 +// ^. a^ ^~~~. a~~~^ +// `. `. +// `. `.~<0xF0><0x80> is not well-formed UTF-8, and <0x80> is not a valid first byte, so +// `. both bytes are considered an invalid sequence and get converted to '' +// `~Same as ^ but converted to '?' instead. + +/// https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers +pub const CodePage = enum(u16) { + // supported + windows1252 = 1252, // windows-1252 ANSI Latin 1; Western European (Windows) + utf8 = 65001, // utf-8 Unicode (UTF-8) + + // unsupported but valid + ibm037 = 37, // IBM037 IBM EBCDIC US-Canada + ibm437 = 437, // IBM437 OEM United States + ibm500 = 500, // IBM500 IBM EBCDIC International + asmo708 = 708, // ASMO-708 Arabic (ASMO 708) + asmo449plus = 709, // Arabic (ASMO-449+, BCON V4) + transparent_arabic = 710, // Arabic - Transparent Arabic + dos720 = 720, // DOS-720 Arabic (Transparent ASMO); Arabic (DOS) + ibm737 = 737, // ibm737 OEM Greek (formerly 437G); Greek (DOS) + ibm775 = 775, // ibm775 OEM Baltic; Baltic (DOS) + ibm850 = 850, // ibm850 OEM Multilingual Latin 1; Western European (DOS) + ibm852 = 852, // ibm852 OEM Latin 2; Central European (DOS) + ibm855 = 855, // IBM855 OEM Cyrillic (primarily Russian) + ibm857 = 857, // ibm857 OEM Turkish; Turkish (DOS) + ibm00858 = 858, // IBM00858 OEM Multilingual Latin 1 + Euro symbol + ibm860 = 860, // IBM860 OEM Portuguese; Portuguese (DOS) + ibm861 = 861, // ibm861 OEM Icelandic; Icelandic (DOS) + dos862 = 862, // DOS-862 OEM Hebrew; Hebrew (DOS) + ibm863 = 863, // IBM863 OEM French Canadian; French Canadian (DOS) + ibm864 = 864, // IBM864 OEM Arabic; Arabic (864) + ibm865 = 865, // IBM865 OEM Nordic; Nordic (DOS) + cp866 = 866, // cp866 OEM Russian; Cyrillic (DOS) + ibm869 = 869, // ibm869 OEM Modern Greek; Greek, Modern (DOS) + ibm870 = 870, // IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 + windows874 = 874, // windows-874 Thai (Windows) + cp875 = 875, // cp875 IBM EBCDIC Greek Modern + shift_jis = 932, // shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS) + gb2312 = 936, // gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) + ks_c_5601_1987 = 949, // ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code) + big5 = 950, // big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) + ibm1026 = 1026, // IBM1026 IBM EBCDIC Turkish (Latin 5) + ibm01047 = 1047, // IBM01047 IBM EBCDIC Latin 1/Open System + ibm01140 = 1140, // IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) + ibm01141 = 1141, // IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) + ibm01142 = 1142, // IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) + ibm01143 = 1143, // IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) + ibm01144 = 1144, // IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) + ibm01145 = 1145, // IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) + ibm01146 = 1146, // IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) + ibm01147 = 1147, // IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) + ibm01148 = 1148, // IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) + ibm01149 = 1149, // IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) + utf16 = 1200, // utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications + utf16_fffe = 1201, // unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications + windows1250 = 1250, // windows-1250 ANSI Central European; Central European (Windows) + windows1251 = 1251, // windows-1251 ANSI Cyrillic; Cyrillic (Windows) + windows1253 = 1253, // windows-1253 ANSI Greek; Greek (Windows) + windows1254 = 1254, // windows-1254 ANSI Turkish; Turkish (Windows) + windows1255 = 1255, // windows-1255 ANSI Hebrew; Hebrew (Windows) + windows1256 = 1256, // windows-1256 ANSI Arabic; Arabic (Windows) + windows1257 = 1257, // windows-1257 ANSI Baltic; Baltic (Windows) + windows1258 = 1258, // windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows) + johab = 1361, // Johab Korean (Johab) + macintosh = 10000, // macintosh MAC Roman; Western European (Mac) + x_mac_japanese = 10001, // x-mac-japanese Japanese (Mac) + x_mac_chinesetrad = 10002, // x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac) + x_mac_korean = 10003, // x-mac-korean Korean (Mac) + x_mac_arabic = 10004, // x-mac-arabic Arabic (Mac) + x_mac_hebrew = 10005, // x-mac-hebrew Hebrew (Mac) + x_mac_greek = 10006, // x-mac-greek Greek (Mac) + x_mac_cyrillic = 10007, // x-mac-cyrillic Cyrillic (Mac) + x_mac_chinesesimp = 10008, // x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) + x_mac_romanian = 10010, // x-mac-romanian Romanian (Mac) + x_mac_ukranian = 10017, // x-mac-ukrainian Ukrainian (Mac) + x_mac_thai = 10021, // x-mac-thai Thai (Mac) + x_mac_ce = 10029, // x-mac-ce MAC Latin 2; Central European (Mac) + x_mac_icelandic = 10079, // x-mac-icelandic Icelandic (Mac) + x_mac_turkish = 10081, // x-mac-turkish Turkish (Mac) + x_mac_croatian = 10082, // x-mac-croatian Croatian (Mac) + utf32 = 12000, // utf-32 Unicode UTF-32, little endian byte order; available only to managed applications + utf32_be = 12001, // utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications + x_chinese_cns = 20000, // x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS) + x_cp20001 = 20001, // x-cp20001 TCA Taiwan + x_chinese_eten = 20002, // x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten) + x_cp20003 = 20003, // x-cp20003 IBM5550 Taiwan + x_cp20004 = 20004, // x-cp20004 TeleText Taiwan + x_cp20005 = 20005, // x-cp20005 Wang Taiwan + x_ia5 = 20105, // x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) + x_ia5_german = 20106, // x-IA5-German IA5 German (7-bit) + x_ia5_swedish = 20107, // x-IA5-Swedish IA5 Swedish (7-bit) + x_ia5_norwegian = 20108, // x-IA5-Norwegian IA5 Norwegian (7-bit) + us_ascii = 20127, // us-ascii US-ASCII (7-bit) + x_cp20261 = 20261, // x-cp20261 T.61 + x_cp20269 = 20269, // x-cp20269 ISO 6937 Non-Spacing Accent + ibm273 = 20273, // IBM273 IBM EBCDIC Germany + ibm277 = 20277, // IBM277 IBM EBCDIC Denmark-Norway + ibm278 = 20278, // IBM278 IBM EBCDIC Finland-Sweden + ibm280 = 20280, // IBM280 IBM EBCDIC Italy + ibm284 = 20284, // IBM284 IBM EBCDIC Latin America-Spain + ibm285 = 20285, // IBM285 IBM EBCDIC United Kingdom + ibm290 = 20290, // IBM290 IBM EBCDIC Japanese Katakana Extended + ibm297 = 20297, // IBM297 IBM EBCDIC France + ibm420 = 20420, // IBM420 IBM EBCDIC Arabic + ibm423 = 20423, // IBM423 IBM EBCDIC Greek + ibm424 = 20424, // IBM424 IBM EBCDIC Hebrew + x_ebcdic_korean_extended = 20833, // x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended + ibm_thai = 20838, // IBM-Thai IBM EBCDIC Thai + koi8_r = 20866, // koi8-r Russian (KOI8-R); Cyrillic (KOI8-R) + ibm871 = 20871, // IBM871 IBM EBCDIC Icelandic + ibm880 = 20880, // IBM880 IBM EBCDIC Cyrillic Russian + ibm905 = 20905, // IBM905 IBM EBCDIC Turkish + ibm00924 = 20924, // IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) + euc_jp_jis = 20932, // EUC-JP Japanese (JIS 0208-1990 and 0212-1990) + x_cp20936 = 20936, // x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) + x_cp20949 = 20949, // x-cp20949 Korean Wansung + cp1025 = 21025, // cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian + // = 21027, // (deprecated) + koi8_u = 21866, // koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U) + iso8859_1 = 28591, // iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO) + iso8859_2 = 28592, // iso-8859-2 ISO 8859-2 Central European; Central European (ISO) + iso8859_3 = 28593, // iso-8859-3 ISO 8859-3 Latin 3 + iso8859_4 = 28594, // iso-8859-4 ISO 8859-4 Baltic + iso8859_5 = 28595, // iso-8859-5 ISO 8859-5 Cyrillic + iso8859_6 = 28596, // iso-8859-6 ISO 8859-6 Arabic + iso8859_7 = 28597, // iso-8859-7 ISO 8859-7 Greek + iso8859_8 = 28598, // iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual) + iso8859_9 = 28599, // iso-8859-9 ISO 8859-9 Turkish + iso8859_13 = 28603, // iso-8859-13 ISO 8859-13 Estonian + iso8859_15 = 28605, // iso-8859-15 ISO 8859-15 Latin 9 + x_europa = 29001, // x-Europa Europa 3 + is8859_8_i = 38598, // iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical) + iso2022_jp = 50220, // iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) + cs_iso2022_jp = 50221, // csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) + iso2022_jp_jis_x = 50222, // iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) + iso2022_kr = 50225, // iso-2022-kr ISO 2022 Korean + x_cp50227 = 50227, // x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) + iso2022_chinesetrad = 50229, // ISO 2022 Traditional Chinese + ebcdic_jp_katakana_extended = 50930, // EBCDIC Japanese (Katakana) Extended + ebcdic_us_ca_jp = 50931, // EBCDIC US-Canada and Japanese + ebcdic_kr_extended = 50933, // EBCDIC Korean Extended and Korean + ebcdic_chinesesimp_extended = 50935, // EBCDIC Simplified Chinese Extended and Simplified Chinese + ebcdic_chinesesimp = 50936, // EBCDIC Simplified Chinese + ebcdic_us_ca_chinesetrad = 50937, // EBCDIC US-Canada and Traditional Chinese + ebcdic_jp_latin_extended = 50939, // EBCDIC Japanese (Latin) Extended and Japanese + euc_jp = 51932, // euc-jp EUC Japanese + euc_cn = 51936, // EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC) + euc_kr = 51949, // euc-kr EUC Korean + euc_chinesetrad = 51950, // EUC Traditional Chinese + hz_gb2312 = 52936, // hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) + gb18030 = 54936, // GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) + x_iscii_de = 57002, // x-iscii-de ISCII Devanagari + x_iscii_be = 57003, // x-iscii-be ISCII Bangla + x_iscii_ta = 57004, // x-iscii-ta ISCII Tamil + x_iscii_te = 57005, // x-iscii-te ISCII Telugu + x_iscii_as = 57006, // x-iscii-as ISCII Assamese + x_iscii_or = 57007, // x-iscii-or ISCII Odia + x_iscii_ka = 57008, // x-iscii-ka ISCII Kannada + x_iscii_ma = 57009, // x-iscii-ma ISCII Malayalam + x_iscii_gu = 57010, // x-iscii-gu ISCII Gujarati + x_iscii_pa = 57011, // x-iscii-pa ISCII Punjabi + utf7 = 65000, // utf-7 Unicode (UTF-7) + + pub fn codepointAt(code_page: CodePage, index: usize, bytes: []const u8) ?Codepoint { + if (index >= bytes.len) return null; + switch (code_page) { + .windows1252 => { + // All byte values have a representation, so just convert the byte + return Codepoint{ + .value = windows1252.toCodepoint(bytes[index]), + .byte_len = 1, + }; + }, + .utf8 => { + return Utf8.WellFormedDecoder.decode(bytes[index..]); + }, + else => unreachable, + } + } + + pub fn isSupported(code_page: CodePage) bool { + return switch (code_page) { + .windows1252, .utf8 => true, + else => false, + }; + } + + pub fn getByIdentifier(identifier: u16) !CodePage { + // There's probably a more efficient way to do this (e.g. ComptimeHashMap?) but + // this should be fine, especially since this function likely won't be called much. + inline for (@typeInfo(CodePage).Enum.fields) |enumField| { + if (identifier == enumField.value) { + return @field(CodePage, enumField.name); + } + } + return error.InvalidCodePage; + } + + pub fn getByIdentifierEnsureSupported(identifier: u16) !CodePage { + const code_page = try getByIdentifier(identifier); + switch (isSupported(code_page)) { + true => return code_page, + false => return error.UnsupportedCodePage, + } + } +}; + +pub const Utf8 = struct { + /// Implements decoding with rejection of ill-formed UTF-8 sequences based on section + /// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically). + pub const WellFormedDecoder = struct { + /// Like std.unicode.utf8ByteSequenceLength, but: + /// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF + /// - Returns an optional value instead of an error union + pub fn sequenceLength(first_byte: u8) ?u3 { + return switch (first_byte) { + 0x00...0x7F => 1, + 0xC2...0xDF => 2, + 0xE0...0xEF => 3, + 0xF0...0xF4 => 4, + else => null, + }; + } + + fn isContinuationByte(byte: u8) bool { + return switch (byte) { + 0x80...0xBF => true, + else => false, + }; + } + + pub fn decode(bytes: []const u8) Codepoint { + std.debug.assert(bytes.len > 0); + var first_byte = bytes[0]; + var expected_len = sequenceLength(first_byte) orelse { + return .{ .value = Codepoint.invalid, .byte_len = 1 }; + }; + if (expected_len == 1) return .{ .value = first_byte, .byte_len = 1 }; + + var value: u21 = first_byte & 0b00011111; + var byte_index: u8 = 1; + while (byte_index < @min(bytes.len, expected_len)) : (byte_index += 1) { + const byte = bytes[byte_index]; + // See Table 3-7 of D92 in Chapter 3 of the Unicode Standard + const valid: bool = switch (byte_index) { + 1 => switch (first_byte) { + 0xE0 => switch (byte) { + 0xA0...0xBF => true, + else => false, + }, + 0xED => switch (byte) { + 0x80...0x9F => true, + else => false, + }, + 0xF0 => switch (byte) { + 0x90...0xBF => true, + else => false, + }, + 0xF4 => switch (byte) { + 0x80...0x8F => true, + else => false, + }, + else => switch (byte) { + 0x80...0xBF => true, + else => false, + }, + }, + else => switch (byte) { + 0x80...0xBF => true, + else => false, + }, + }; + + if (!valid) { + var len = byte_index; + // Only include the byte in the invalid sequence if it's in the range + // of a continuation byte. All other values should not be included in the + // invalid sequence. + // + // Note: This is how the Windows RC compiler handles this, this may not + // be the correct-as-according-to-the-Unicode-standard way to do it. + if (isContinuationByte(byte)) len += 1; + return .{ .value = Codepoint.invalid, .byte_len = len }; + } + + value <<= 6; + value |= byte & 0b00111111; + } + if (byte_index != expected_len) { + return .{ .value = Codepoint.invalid, .byte_len = byte_index }; + } + return .{ .value = value, .byte_len = expected_len }; + } + }; +}; + +test "Utf8.WellFormedDecoder" { + const invalid_utf8 = "\xF0\x80"; + var decoded = Utf8.WellFormedDecoder.decode(invalid_utf8); + try std.testing.expectEqual(Codepoint.invalid, decoded.value); + try std.testing.expectEqual(@as(usize, 2), decoded.byte_len); +} + +test "codepointAt invalid utf8" { + { + const invalid_utf8 = "\xf0\xf0\x80\x80\x80"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(1, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(3, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(4, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(5, invalid_utf8)); + } + + { + const invalid_utf8 = "\xE1\xA0\xC0"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(2, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(3, invalid_utf8)); + } + + { + const invalid_utf8 = "\xD2"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, invalid_utf8)); + } + + { + const invalid_utf8 = "\xE1\xA0"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8)); + } + + { + const invalid_utf8 = "\xC5\xFF"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(1, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8)); + } +} + +test "codepointAt utf8 encoded" { + const utf8_encoded = "²"; + + // with code page utf8 + try std.testing.expectEqual(Codepoint{ + .value = '²', + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, utf8_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, utf8_encoded)); + + // with code page windows1252 + try std.testing.expectEqual(Codepoint{ + .value = '\xC2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(0, utf8_encoded).?); + try std.testing.expectEqual(Codepoint{ + .value = '\xB2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(1, utf8_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, utf8_encoded)); +} + +test "codepointAt windows1252 encoded" { + const windows1252_encoded = "\xB2"; + + // with code page utf8 + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, windows1252_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, windows1252_encoded)); + + // with code page windows1252 + try std.testing.expectEqual(Codepoint{ + .value = '\xB2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(0, windows1252_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, windows1252_encoded)); +} + +pub const Codepoint = struct { + value: u21, + byte_len: usize, + + pub const invalid: u21 = std.math.maxInt(u21); +}; diff --git a/src/resinator/comments.zig b/src/resinator/comments.zig new file mode 100644 index 000000000000..cfb27ae34174 --- /dev/null +++ b/src/resinator/comments.zig @@ -0,0 +1,340 @@ +//! Expects to run after a C preprocessor step that preserves comments. +//! +//! `rc` has a peculiar quirk where something like `blah/**/blah` will be +//! transformed into `blahblah` during parsing. However, `clang -E` will +//! transform it into `blah blah`, so in order to match `rc`, we need +//! to remove comments ourselves after the preprocessor runs. +//! Note: Multiline comments that actually span more than one line do +//! get translated to a space character by `rc`. +//! +//! Removing comments before lexing also allows the lexer to not have to +//! deal with comments which would complicate its implementation (this is something +//! of a tradeoff, as removing comments in a separate pass means that we'll +//! need to iterate the source twice instead of once, but having to deal with +//! comments when lexing would be a pain). + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const LineHandler = @import("lex.zig").LineHandler; +const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair; + +/// `buf` must be at least as long as `source` +/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) +pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) []u8 { + std.debug.assert(buf.len >= source.len); + var result = UncheckedSliceWriter{ .slice = buf }; + const State = enum { + start, + forward_slash, + line_comment, + multiline_comment, + multiline_comment_end, + single_quoted, + single_quoted_escape, + double_quoted, + double_quoted_escape, + }; + var state: State = .start; + var index: usize = 0; + var pending_start: ?usize = null; + var line_handler = LineHandler{ .buffer = source }; + while (index < source.len) : (index += 1) { + const c = source[index]; + // TODO: Disallow \x1A, \x00, \x7F in comments. At least \x1A and \x00 can definitely + // cause errors or parsing weirdness in the Win32 RC compiler. These are disallowed + // in the lexer, but comments are stripped before getting to the lexer. + switch (state) { + .start => switch (c) { + '/' => { + state = .forward_slash; + pending_start = index; + }, + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + }, + else => { + switch (c) { + '"' => state = .double_quoted, + '\'' => state = .single_quoted, + else => {}, + } + result.write(c); + }, + }, + .forward_slash => switch (c) { + '/' => state = .line_comment, + '*' => { + state = .multiline_comment; + }, + else => { + _ = line_handler.maybeIncrementLineNumber(index); + result.writeSlice(source[pending_start.? .. index + 1]); + pending_start = null; + state = .start; + }, + }, + .line_comment => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + state = .start; + }, + else => {}, + }, + .multiline_comment => switch (c) { + '\r' => handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings), + '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + }, + '*' => state = .multiline_comment_end, + else => {}, + }, + .multiline_comment_end => switch (c) { + '\r' => { + handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings); + // We only want to treat this as a newline if it's part of a CRLF pair. If it's + // not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still + // functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works. + if (formsLineEndingPair(source, '\r', index + 1)) { + state = .multiline_comment; + } + }, + '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + state = .multiline_comment; + }, + '/' => { + state = .start; + }, + else => { + state = .multiline_comment; + }, + }, + .single_quoted => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + '\\' => { + state = .single_quoted_escape; + result.write(c); + }, + '\'' => { + state = .start; + result.write(c); + }, + else => { + result.write(c); + }, + }, + .single_quoted_escape => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + else => { + state = .single_quoted; + result.write(c); + }, + }, + .double_quoted => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + '\\' => { + state = .double_quoted_escape; + result.write(c); + }, + '"' => { + state = .start; + result.write(c); + }, + else => { + result.write(c); + }, + }, + .double_quoted_escape => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + else => { + state = .double_quoted; + result.write(c); + }, + }, + } + } + return result.getWritten(); +} + +inline fn handleMultilineCarriageReturn( + source: []const u8, + line_handler: *LineHandler, + index: usize, + result: *UncheckedSliceWriter, + source_mappings: ?*SourceMappings, +) void { + // Note: Bare \r within a multiline comment should *not* be treated as a line ending for the + // purposes of removing comments, but *should* be treated as a line ending for the + // purposes of line counting/source mapping + _ = line_handler.incrementLineNumber(index); + // So only write the \r if it's part of a CRLF pair + if (formsLineEndingPair(source, '\r', index + 1)) { + result.write('\r'); + } + // And otherwise, we want to collapse the source mapping so that we can still know which + // line came from where. + else { + // Because the line gets collapsed, we need to decrement line number so that + // the next collapse acts on the first of the collapsed line numbers + line_handler.line_number -= 1; + if (source_mappings) |mappings| { + mappings.collapse(line_handler.line_number, 1); + } + } +} + +pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 { + var buf = try allocator.alloc(u8, source.len); + errdefer allocator.free(buf); + var result = removeComments(source, buf, source_mappings); + return allocator.realloc(buf, result.len); +} + +fn testRemoveComments(expected: []const u8, source: []const u8) !void { + const result = try removeCommentsAlloc(std.testing.allocator, source, null); + defer std.testing.allocator.free(result); + + try std.testing.expectEqualStrings(expected, result); +} + +test "basic" { + try testRemoveComments("", "// comment"); + try testRemoveComments("", "/* comment */"); +} + +test "mixed" { + try testRemoveComments("hello", "hello// comment"); + try testRemoveComments("hello", "hel/* comment */lo"); +} + +test "within a string" { + // escaped " is \" + try testRemoveComments( + \\blah"//som\"/*ething*/"BLAH + , + \\blah"//som\"/*ething*/"BLAH + ); +} + +test "line comments retain newlines" { + try testRemoveComments( + \\ + \\ + \\ + , + \\// comment + \\// comment + \\// comment + ); + + try testRemoveComments("\r\n", "//comment\r\n"); +} + +test "crazy" { + try testRemoveComments( + \\blah"/*som*/\""BLAH + , + \\blah"/*som*/\""/*ething*/BLAH + ); + + try testRemoveComments( + \\blah"/*som*/"BLAH RCDATA "BEGIN END + \\ + \\ + \\hello + \\" + , + \\blah"/*som*/"/*ething*/BLAH RCDATA "BEGIN END + \\// comment + \\//"blah blah" RCDATA {} + \\hello + \\" + ); +} + +test "multiline comment with newlines" { + // bare \r is not treated as a newline + try testRemoveComments("blahblah", "blah/*some\rthing*/blah"); + + try testRemoveComments( + \\blah + \\blah + , + \\blah/*some + \\thing*/blah + ); + try testRemoveComments( + "blah\r\nblah", + "blah/*some\r\nthing*/blah", + ); + + // handle * correctly + try testRemoveComments( + \\blah + \\ + \\ + , + \\blah/*some + \\thing* + \\/bl*ah*/ + ); +} + +test "comments appended to a line" { + try testRemoveComments( + \\blah + \\blah + , + \\blah // line comment + \\blah + ); + try testRemoveComments( + "blah \r\nblah", + "blah // line comment\r\nblah", + ); +} + +test "remove comments with mappings" { + const allocator = std.testing.allocator; + var mut_source = "blah/*\rcommented line*\r/blah".*; + var mappings = SourceMappings{}; + _ = try mappings.files.put(allocator, "test.rc"); + try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = 0 }); + try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 2, .filename_offset = 0 }); + try mappings.set(allocator, 3, .{ .start_line = 3, .end_line = 3, .filename_offset = 0 }); + defer mappings.deinit(allocator); + + var result = removeComments(&mut_source, &mut_source, &mappings); + + try std.testing.expectEqualStrings("blahblah", result); + try std.testing.expectEqual(@as(usize, 1), mappings.mapping.items.len); + try std.testing.expectEqual(@as(usize, 3), mappings.mapping.items[0].end_line); +} + +test "in place" { + var mut_source = "blah /* comment */ blah".*; + var result = removeComments(&mut_source, &mut_source, null); + try std.testing.expectEqualStrings("blah blah", result); +} diff --git a/src/resinator/compile.zig b/src/resinator/compile.zig new file mode 100644 index 000000000000..c35a882ecabe --- /dev/null +++ b/src/resinator/compile.zig @@ -0,0 +1,3356 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const Allocator = std.mem.Allocator; +const Node = @import("ast.zig").Node; +const lex = @import("lex.zig"); +const Parser = @import("parse.zig").Parser; +const Resource = @import("rc.zig").Resource; +const Token = @import("lex.zig").Token; +const literals = @import("literals.zig"); +const Number = literals.Number; +const SourceBytes = literals.SourceBytes; +const Diagnostics = @import("errors.zig").Diagnostics; +const ErrorDetails = @import("errors.zig").ErrorDetails; +const MemoryFlags = @import("res.zig").MemoryFlags; +const rc = @import("rc.zig"); +const res = @import("res.zig"); +const ico = @import("ico.zig"); +const ani = @import("ani.zig"); +const bmp = @import("bmp.zig"); +const WORD = std.os.windows.WORD; +const DWORD = std.os.windows.DWORD; +const utils = @import("utils.zig"); +const NameOrOrdinal = res.NameOrOrdinal; +const CodePage = @import("code_pages.zig").CodePage; +const CodePageLookup = @import("ast.zig").CodePageLookup; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const windows1252 = @import("windows1252.zig"); +const lang = @import("lang.zig"); +const code_pages = @import("code_pages.zig"); +const errors = @import("errors.zig"); + +pub const CompileOptions = struct { + cwd: std.fs.Dir, + diagnostics: *Diagnostics, + source_mappings: ?*SourceMappings = null, + /// List of paths (absolute or relative to `cwd`) for every file that the resources within the .rc file depend on. + /// Items within the list will be allocated using the allocator of the ArrayList and must be + /// freed by the caller. + /// TODO: Maybe a dedicated struct for this purpose so that it's a bit nicer to work with. + dependencies_list: ?*std.ArrayList([]const u8) = null, + default_code_page: CodePage = .windows1252, + ignore_include_env_var: bool = false, + extra_include_paths: []const []const u8 = &.{}, + /// This is just an API convenience to allow separately passing 'system' (i.e. those + /// that would normally be gotten from the INCLUDE env var) include paths. This is mostly + /// intended for use when setting `ignore_include_env_var = true`. When `ignore_include_env_var` + /// is false, `system_include_paths` will be searched before the paths in the INCLUDE env var. + system_include_paths: []const []const u8 = &.{}, + default_language_id: ?u16 = null, + // TODO: Implement verbose output + verbose: bool = false, + null_terminate_string_table_strings: bool = false, + /// Note: This is a u15 to ensure that the maximum number of UTF-16 code units + /// plus a null-terminator can always fit into a u16. + max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints, + silent_duplicate_control_ids: bool = false, + warn_instead_of_error_on_invalid_code_page: bool = false, +}; + +pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, options: CompileOptions) !void { + var lexer = lex.Lexer.init(source, .{ + .default_code_page = options.default_code_page, + .source_mappings = options.source_mappings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + }); + var parser = Parser.init(&lexer, .{ + .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, + }); + var tree = try parser.parse(allocator, options.diagnostics); + defer tree.deinit(); + + var search_dirs = std.ArrayList(SearchDir).init(allocator); + defer { + for (search_dirs.items) |*search_dir| { + search_dir.deinit(allocator); + } + search_dirs.deinit(); + } + + if (options.source_mappings) |source_mappings| { + const root_path = source_mappings.files.get(source_mappings.root_filename_offset); + // If dirname returns null, then the root path will be the same as + // the cwd so we don't need to add it as a distinct search path. + if (std.fs.path.dirname(root_path)) |root_dir_path| { + var root_dir = try options.cwd.openDir(root_dir_path, .{}); + errdefer root_dir.close(); + try search_dirs.append(.{ .dir = root_dir, .path = try allocator.dupe(u8, root_dir_path) }); + } + } + // Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed) + // `catch unreachable` since `options.cwd` is expected to be a valid dir handle, so opening + // a new handle to it should be fine as well. + // TODO: Maybe catch and return an error instead + const cwd_dir = options.cwd.openDir(".", .{}) catch unreachable; + try search_dirs.append(.{ .dir = cwd_dir, .path = null }); + for (options.extra_include_paths) |extra_include_path| { + var dir = openSearchPathDir(options.cwd, extra_include_path) catch { + // TODO: maybe a warning that the search path is skipped? + continue; + }; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, extra_include_path) }); + } + for (options.system_include_paths) |system_include_path| { + var dir = openSearchPathDir(options.cwd, system_include_path) catch { + // TODO: maybe a warning that the search path is skipped? + continue; + }; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) }); + } + if (!options.ignore_include_env_var) { + const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch ""; + defer allocator.free(INCLUDE); + + // TODO: Should this be platform-specific? How does windres/llvm-rc handle this (if at all)? + var it = std.mem.tokenize(u8, INCLUDE, ";"); + while (it.next()) |search_path| { + var dir = openSearchPathDir(options.cwd, search_path) catch continue; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, search_path) }); + } + } + + var arena_allocator = std.heap.ArenaAllocator.init(allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + var compiler = Compiler{ + .source = source, + .arena = arena, + .allocator = allocator, + .cwd = options.cwd, + .diagnostics = options.diagnostics, + .dependencies_list = options.dependencies_list, + .input_code_pages = &tree.input_code_pages, + .output_code_pages = &tree.output_code_pages, + // This is only safe because we know search_dirs won't be modified past this point + .search_dirs = search_dirs.items, + .null_terminate_string_table_strings = options.null_terminate_string_table_strings, + .silent_duplicate_control_ids = options.silent_duplicate_control_ids, + }; + if (options.default_language_id) |default_language_id| { + compiler.state.language = res.Language.fromInt(default_language_id); + } + + try compiler.writeRoot(tree.root(), writer); +} + +pub const Compiler = struct { + source: []const u8, + arena: Allocator, + allocator: Allocator, + cwd: std.fs.Dir, + state: State = .{}, + diagnostics: *Diagnostics, + dependencies_list: ?*std.ArrayList([]const u8), + input_code_pages: *const CodePageLookup, + output_code_pages: *const CodePageLookup, + search_dirs: []SearchDir, + null_terminate_string_table_strings: bool, + silent_duplicate_control_ids: bool, + + pub const State = struct { + icon_id: u16 = 1, + string_tables: StringTablesByLanguage = .{}, + language: res.Language = .{}, + font_dir: FontDir = .{}, + version: u32 = 0, + characteristics: u32 = 0, + }; + + pub fn writeRoot(self: *Compiler, root: *Node.Root, writer: anytype) !void { + try writeEmptyResource(writer); + for (root.body) |node| { + try self.writeNode(node, writer); + } + + // now write the FONTDIR (if it has anything in it) + try self.state.font_dir.writeResData(self, writer); + if (self.state.font_dir.fonts.items.len != 0) { + // The Win32 RC compiler may write a different FONTDIR resource than us, + // due to it sometimes writing a non-zero-length device name/face name + // whereas we *always* write them both as zero-length. + // + // In practical terms, this doesn't matter, since for various reasons the format + // of the FONTDIR cannot be relied on and is seemingly not actually used by anything + // anymore. We still want to emit some sort of diagnostic for the purposes of being able + // to know that our .RES is intentionally not meant to be byte-for-byte identical with + // the rc.exe output. + // + // By using the hint type here, we allow this diagnostic to be detected in code, + // but it will not be printed since the end-user doesn't need to care. + try self.addErrorDetails(.{ + .err = .result_contains_fontdir, + .type = .hint, + .token = undefined, + }); + } + // once we've written every else out, we can write out the finalized STRINGTABLE resources + var string_tables_it = self.state.string_tables.tables.iterator(); + while (string_tables_it.next()) |string_table_entry| { + var string_table_it = string_table_entry.value_ptr.blocks.iterator(); + while (string_table_it.next()) |entry| { + try entry.value_ptr.writeResData(self, string_table_entry.key_ptr.*, entry.key_ptr.*, writer); + } + } + } + + pub fn writeNode(self: *Compiler, node: *Node, writer: anytype) !void { + switch (node.id) { + .root => unreachable, // writeRoot should be called directly instead + .resource_external => try self.writeResourceExternal(@fieldParentPtr(Node.ResourceExternal, "base", node), writer), + .resource_raw_data => try self.writeResourceRawData(@fieldParentPtr(Node.ResourceRawData, "base", node), writer), + .literal => unreachable, // this is context dependent and should be handled by its parent + .binary_expression => unreachable, + .grouped_expression => unreachable, + .not_expression => unreachable, + .invalid => {}, // no-op, currently only used for dangling literals at EOF + .accelerators => try self.writeAccelerators(@fieldParentPtr(Node.Accelerators, "base", node), writer), + .accelerator => unreachable, // handled by writeAccelerators + .dialog => try self.writeDialog(@fieldParentPtr(Node.Dialog, "base", node), writer), + .control_statement => unreachable, + .toolbar => try self.writeToolbar(@fieldParentPtr(Node.Toolbar, "base", node), writer), + .menu => try self.writeMenu(@fieldParentPtr(Node.Menu, "base", node), writer), + .menu_item => unreachable, + .menu_item_separator => unreachable, + .menu_item_ex => unreachable, + .popup => unreachable, + .popup_ex => unreachable, + .version_info => try self.writeVersionInfo(@fieldParentPtr(Node.VersionInfo, "base", node), writer), + .version_statement => unreachable, + .block => unreachable, + .block_value => unreachable, + .block_value_value => unreachable, + .string_table => try self.writeStringTable(@fieldParentPtr(Node.StringTable, "base", node)), + .string_table_string => unreachable, // handled by writeStringTable + .language_statement => self.writeLanguageStatement(@fieldParentPtr(Node.LanguageStatement, "base", node)), + .font_statement => unreachable, + .simple_statement => self.writeTopLevelSimpleStatement(@fieldParentPtr(Node.SimpleStatement, "base", node)), + } + } + + /// Returns the filename encoded as UTF-8 (allocated by self.allocator) + pub fn evaluateFilenameExpression(self: *Compiler, expression_node: *Node) ![]u8 { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + switch (literal_node.token.id) { + .literal, .number => { + const slice = literal_node.token.slice(self.source); + const code_page = self.input_code_pages.getForToken(literal_node.token); + var buf = try std.ArrayList(u8).initCapacity(self.allocator, slice.len); + errdefer buf.deinit(); + + var index: usize = 0; + while (code_page.codepointAt(index, slice)) |codepoint| : (index += codepoint.byte_len) { + const c = codepoint.value; + if (c == code_pages.Codepoint.invalid) { + try buf.appendSlice("�"); + } else { + // Anything that is not returned as an invalid codepoint must be encodable as UTF-8. + const utf8_len = std.unicode.utf8CodepointSequenceLength(c) catch unreachable; + try buf.ensureUnusedCapacity(utf8_len); + _ = std.unicode.utf8Encode(c, buf.unusedCapacitySlice()) catch unreachable; + buf.items.len += utf8_len; + } + } + + return buf.toOwnedSlice(); + }, + .quoted_ascii_string, .quoted_wide_string => { + const slice = literal_node.token.slice(self.source); + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ .slice = slice, .code_page = self.input_code_pages.getForToken(literal_node.token) }; + + var buf = std.ArrayList(u8).init(self.allocator); + errdefer buf.deinit(); + + // Filenames are sort-of parsed as if they were wide strings, but the max escape width of + // hex/octal escapes is still determined by the L prefix. Since we want to end up with + // UTF-8, we can parse either string type directly to UTF-8. + var parser = literals.IterativeStringParser.init(bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + }); + + while (try parser.nextUnchecked()) |parsed| { + const c = parsed.codepoint; + if (c == code_pages.Codepoint.invalid) { + try buf.appendSlice("�"); + } else { + var codepoint_buf: [4]u8 = undefined; + // If the codepoint cannot be encoded, we fall back to � + if (std.unicode.utf8Encode(c, &codepoint_buf)) |len| { + try buf.appendSlice(codepoint_buf[0..len]); + } else |_| { + try buf.appendSlice("�"); + } + } + } + + return buf.toOwnedSlice(); + }, + else => { + std.debug.print("unexpected filename token type: {}\n", .{literal_node.token}); + unreachable; // no other token types should be in a filename literal node + }, + } + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + return self.evaluateFilenameExpression(binary_expression_node.right); + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return self.evaluateFilenameExpression(grouped_expression_node.expression); + }, + else => unreachable, + } + } + + /// https://learn.microsoft.com/en-us/windows/win32/menurc/searching-for-files + /// + /// Searches, in this order: + /// Directory of the 'root' .rc file (if different from CWD) + /// CWD + /// extra_include_paths (resolved relative to CWD) + /// system_include_paths (resolve relative to CWD) + /// INCLUDE environment var paths (only if ignore_include_env_var is false; resolved relative to CWD) + /// + /// Note: The CWD being searched *in addition to* the directory of the 'root' .rc file + /// is also how the Win32 RC compiler preprocessor searches for includes, but that + /// differs from how the clang preprocessor searches for includes. + /// + /// Note: This will always return the first matching file that can be opened. + /// This matches the Win32 RC compiler, which will fail with an error if the first + /// matching file is invalid. That is, it does not do the `cmd` PATH searching + /// thing of continuing to look for matching files until it finds a valid + /// one if a matching file is invalid. + fn searchForFile(self: *Compiler, path: []const u8) !std.fs.File { + // If the path is absolute, then it is not resolved relative to any search + // paths, so there's no point in checking them. + // + // This behavior was determined/confirmed with the following test: + // - A `test.rc` file with the contents `1 RCDATA "/test.bin"` + // - A `test.bin` file at `C:\test.bin` + // - A `test.bin` file at `inc\test.bin` relative to the .rc file + // - Invoking `rc` with `rc /i inc test.rc` + // + // This results in a .res file with the contents of `C:\test.bin`, not + // the contents of `inc\test.bin`. Further, if `C:\test.bin` is deleted, + // then it start failing to find `/test.bin`, meaning that it does not resolve + // `/test.bin` relative to include paths and instead only treats it as + // an absolute path. + if (std.fs.path.isAbsolute(path)) { + const file = try utils.openFileNotDir(std.fs.cwd(), path, .{}); + errdefer file.close(); + + if (self.dependencies_list) |dependencies_list| { + const duped_path = try dependencies_list.allocator.dupe(u8, path); + errdefer dependencies_list.allocator.free(duped_path); + try dependencies_list.append(duped_path); + } + } + + var first_error: ?std.fs.File.OpenError = null; + for (self.search_dirs) |search_dir| { + if (utils.openFileNotDir(search_dir.dir, path, .{})) |file| { + errdefer file.close(); + + if (self.dependencies_list) |dependencies_list| { + const searched_file_path = try std.fs.path.join(dependencies_list.allocator, &.{ + search_dir.path orelse "", path, + }); + errdefer dependencies_list.allocator.free(searched_file_path); + try dependencies_list.append(searched_file_path); + } + + return file; + } else |err| if (first_error == null) { + first_error = err; + } + } + return first_error orelse error.FileNotFound; + } + + pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void { + // Init header with data size zero for now, will need to fill it in later + var header = try self.resourceHeader(node.id, node.type, .{}); + defer header.deinit(self.allocator); + + const maybe_predefined_type = header.predefinedResourceType(); + + // DLGINCLUDE has special handling that doesn't actually need the file to exist + if (maybe_predefined_type != null and maybe_predefined_type.? == .DLGINCLUDE) { + const filename_token = node.filename.cast(.literal).?.token; + const parsed_filename = try self.parseQuotedStringAsAsciiString(filename_token); + defer self.allocator.free(parsed_filename); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.data_size = @intCast(parsed_filename.len + 1); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try writer.writeAll(parsed_filename); + try writer.writeByte(0); + try writeDataPadding(writer, header.data_size); + return; + } + + const filename_utf8 = try self.evaluateFilenameExpression(node.filename); + defer self.allocator.free(filename_utf8); + + // TODO: More robust checking of the validity of the filename. + // This currently only checks for NUL bytes, but it should probably also check for + // platform-specific invalid characters like '*', '?', '"', '<', '>', '|' (Windows) + // Related: https://github.com/ziglang/zig/pull/14533#issuecomment-1416888193 + if (std.mem.indexOfScalar(u8, filename_utf8, 0) != null) { + return self.addErrorDetailsAndFail(.{ + .err = .invalid_filename, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .extra = .{ .number = 0 }, + }); + } + + // Allow plain number literals, but complex number expressions are evaluated strangely + // and almost certainly lead to things not intended by the user (e.g. '(1+-1)' evaluates + // to the filename '-1'), so error if the filename node is a grouped/binary expression. + // Note: This is done here instead of during parsing so that we can easily include + // the evaluated filename as part of the error messages. + if (node.filename.id != .literal) { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + try self.addErrorDetails(.{ + .err = .number_expression_as_filename, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .extra = .{ .number = filename_string_index }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .number_expression_as_filename, + .type = .note, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .print_source_line = false, + .extra = .{ .number = filename_string_index }, + }); + } + // From here on out, we know that the filename must be comprised of a single token, + // so get it here to simplify future usage. + const filename_token = node.filename.getFirstToken(); + + const file = self.searchForFile(filename_utf8) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + return self.addErrorDetailsAndFail(.{ + .err = .file_open_error, + .token = filename_token, + .extra = .{ .file_open_error = .{ + .err = ErrorDetails.FileOpenError.enumFromError(e), + .filename_string_index = filename_string_index, + } }, + }); + }, + }; + defer file.close(); + + if (maybe_predefined_type) |predefined_type| { + switch (predefined_type) { + .GROUP_ICON, .GROUP_CURSOR => { + // Check for animated icon first + if (ani.isAnimatedIcon(file.reader())) { + // Animated icons are just put into the resource unmodified, + // and the resource type changes to ANIICON/ANICURSOR + + const new_predefined_type: res.RT = switch (predefined_type) { + .GROUP_ICON => .ANIICON, + .GROUP_CURSOR => .ANICURSOR, + else => unreachable, + }; + header.type_value.ordinal = @intFromEnum(new_predefined_type); + header.memory_flags = MemoryFlags.defaults(new_predefined_type); + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.data_size = @intCast(try file.getEndPos()); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try file.seekTo(0); + try writeResourceData(writer, file.reader(), header.data_size); + return; + } + + // isAnimatedIcon moved the file cursor so reset to the start + try file.seekTo(0); + + const icon_dir = ico.read(self.allocator, file.reader(), try file.getEndPos()) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + return self.iconReadError( + e, + filename_utf8, + filename_token, + predefined_type, + ); + }, + }; + defer icon_dir.deinit(); + + // This limit is inherent to the ico format since number of entries is a u16 field. + std.debug.assert(icon_dir.entries.len <= std.math.maxInt(u16)); + + // Note: The Win32 RC compiler will compile the resource as whatever type is + // in the icon_dir regardless of the type of resource specified in the .rc. + // This leads to unusable .res files when the types mismatch, so + // we error instead. + const res_types_match = switch (predefined_type) { + .GROUP_ICON => icon_dir.image_type == .icon, + .GROUP_CURSOR => icon_dir.image_type == .cursor, + else => unreachable, + }; + if (!res_types_match) { + return self.addErrorDetailsAndFail(.{ + .err = .icon_dir_and_resource_type_mismatch, + .token = filename_token, + .extra = .{ .resource = switch (predefined_type) { + .GROUP_ICON => .icon, + .GROUP_CURSOR => .cursor, + else => unreachable, + } }, + }); + } + + // Memory flags affect the RT_ICON and the RT_GROUP_ICON differently + var icon_memory_flags = MemoryFlags.defaults(res.RT.ICON); + applyToMemoryFlags(&icon_memory_flags, node.common_resource_attributes, self.source); + applyToGroupMemoryFlags(&header.memory_flags, node.common_resource_attributes, self.source); + + const first_icon_id = self.state.icon_id; + const entry_type = if (predefined_type == .GROUP_ICON) @intFromEnum(res.RT.ICON) else @intFromEnum(res.RT.CURSOR); + for (icon_dir.entries, 0..) |*entry, entry_i_usize| { + // We know that the entry index must fit within a u16, so + // cast it here to simplify usage sites. + const entry_i: u16 = @intCast(entry_i_usize); + var full_data_size = entry.data_size_in_bytes; + if (icon_dir.image_type == .cursor) { + full_data_size = std.math.add(u32, full_data_size, 4) catch { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }; + } + + const image_header = ResourceHeader{ + .type_value = .{ .ordinal = entry_type }, + .name_value = .{ .ordinal = self.state.icon_id }, + .data_size = full_data_size, + .memory_flags = icon_memory_flags, + .language = self.state.language, + .version = self.state.version, + .characteristics = self.state.characteristics, + }; + try image_header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + // From https://learn.microsoft.com/en-us/windows/win32/menurc/localheader: + // > The LOCALHEADER structure is the first data written to the RT_CURSOR + // > resource if a RESDIR structure contains information about a cursor. + // where LOCALHEADER is `struct { WORD xHotSpot; WORD yHotSpot; }` + if (icon_dir.image_type == .cursor) { + try writer.writeIntLittle(u16, entry.type_specific_data.cursor.hotspot_x); + try writer.writeIntLittle(u16, entry.type_specific_data.cursor.hotspot_y); + } + + try file.seekTo(entry.data_offset_from_start_of_file); + const header_bytes = file.reader().readBytesNoEof(16) catch { + return self.iconReadError( + error.UnexpectedEOF, + filename_utf8, + filename_token, + predefined_type, + ); + }; + + const image_format = ico.ImageFormat.detect(&header_bytes); + if (!image_format.validate(&header_bytes)) { + return self.iconReadError( + error.InvalidHeader, + filename_utf8, + filename_token, + predefined_type, + ); + } + switch (image_format) { + .riff => switch (icon_dir.image_type) { + .icon => { + // The Win32 RC compiler treats this as an error, but icon dirs + // with RIFF encoded icons within them work ~okay (they work + // in some places but not others, they may not animate, etc) if they are + // allowed to be compiled. + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .note, + .print_source_line = false, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } }, + }); + }, + .cursor => { + // The Win32 RC compiler errors in this case too, but we only error + // here because the cursor would fail to be loaded at runtime if we + // compiled it. + return self.addErrorDetailsAndFail(.{ + .err = .format_not_supported_in_icon_dir, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .riff, .index = entry_i } }, + }); + }, + }, + .png => switch (icon_dir.image_type) { + .icon => { + // PNG always seems to have 1 for color planes no matter what + entry.type_specific_data.icon.color_planes = 1; + // These seem to be the only values of num_colors that + // get treated specially + entry.type_specific_data.icon.bits_per_pixel = switch (entry.num_colors) { + 2 => 1, + 8 => 3, + 16 => 4, + else => entry.type_specific_data.icon.bits_per_pixel, + }; + }, + .cursor => { + // The Win32 RC compiler treats this as an error, but cursor dirs + // with PNG encoded icons within them work fine if they are + // allowed to be compiled. + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .png, .index = entry_i } }, + }); + }, + }, + .dib => { + const bitmap_header: *const ico.BitmapHeader = @ptrCast(@alignCast(&header_bytes)); + const bitmap_version = ico.BitmapHeader.Version.get(std.mem.littleToNative(u32, bitmap_header.bcSize)); + + // The Win32 RC compiler only allows headers with + // `bcSize == sizeof(BITMAPINFOHEADER)`, but it seems unlikely + // that there's a good reason for that outside of too-old + // bitmap headers. + // TODO: Need to test V4 and V5 bitmaps to check they actually work + if (bitmap_version == .@"win2.0") { + return self.addErrorDetailsAndFail(.{ + .err = .rc_would_error_on_bitmap_version, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + .bitmap_version = bitmap_version, + } }, + }); + } else if (bitmap_version != .@"nt3.1") { + try self.addErrorDetails(.{ + .err = .rc_would_error_on_bitmap_version, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + .bitmap_version = bitmap_version, + } }, + }); + } + + switch (icon_dir.image_type) { + .icon => { + // The values in the icon's BITMAPINFOHEADER always take precedence over + // the values in the IconDir, but not in the LOCALHEADER (see above). + entry.type_specific_data.icon.color_planes = std.mem.littleToNative(u16, bitmap_header.bcPlanes); + entry.type_specific_data.icon.bits_per_pixel = std.mem.littleToNative(u16, bitmap_header.bcBitCount); + }, + .cursor => { + // Only cursors get the width/height from BITMAPINFOHEADER (icons don't) + entry.width = @intCast(bitmap_header.bcWidth); + entry.height = @intCast(bitmap_header.bcHeight); + entry.type_specific_data.cursor.hotspot_x = std.mem.littleToNative(u16, bitmap_header.bcPlanes); + entry.type_specific_data.cursor.hotspot_y = std.mem.littleToNative(u16, bitmap_header.bcBitCount); + }, + } + }, + } + + try file.seekTo(entry.data_offset_from_start_of_file); + try writeResourceDataNoPadding(writer, file.reader(), entry.data_size_in_bytes); + try writeDataPadding(writer, full_data_size); + + if (self.state.icon_id == std.math.maxInt(u16)) { + try self.addErrorDetails(.{ + .err = .max_icon_ids_exhausted, + .print_source_line = false, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + } }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .max_icon_ids_exhausted, + .type = .note, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + } }, + }); + } + self.state.icon_id += 1; + } + + header.data_size = icon_dir.getResDataSize(); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try icon_dir.writeResData(writer, first_icon_id); + try writeDataPadding(writer, header.data_size); + return; + }, + .RCDATA, .HTML, .MANIFEST, .MESSAGETABLE, .DLGINIT, .PLUGPLAY => { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + }, + .BITMAP => { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + const file_size = try file.getEndPos(); + + const bitmap_info = bmp.read(file.reader(), file_size) catch |err| { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + return self.addErrorDetailsAndFail(.{ + .err = .bmp_read_error, + .token = filename_token, + .extra = .{ .bmp_read_error = .{ + .err = ErrorDetails.BitmapReadError.enumFromError(err), + .filename_string_index = filename_string_index, + } }, + }); + }; + + if (bitmap_info.getActualPaletteByteLen() > bitmap_info.getExpectedPaletteByteLen()) { + const num_ignored_bytes = bitmap_info.getActualPaletteByteLen() - bitmap_info.getExpectedPaletteByteLen(); + var number_as_bytes: [8]u8 = undefined; + std.mem.writeIntNative(u64, &number_as_bytes, num_ignored_bytes); + const value_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_ignored_palette_bytes, + .type = .warning, + .token = filename_token, + .extra = .{ .number = value_string_index }, + }); + } else if (bitmap_info.getActualPaletteByteLen() < bitmap_info.getExpectedPaletteByteLen()) { + const num_padding_bytes = bitmap_info.getExpectedPaletteByteLen() - bitmap_info.getActualPaletteByteLen(); + + // TODO: Make this configurable (command line option) + const max_missing_bytes = 4096; + if (num_padding_bytes > max_missing_bytes) { + var numbers_as_bytes: [16]u8 = undefined; + std.mem.writeIntNative(u64, numbers_as_bytes[0..8], num_padding_bytes); + std.mem.writeIntNative(u64, numbers_as_bytes[8..16], max_missing_bytes); + const values_string_index = try self.diagnostics.putString(&numbers_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_too_many_missing_palette_bytes, + .token = filename_token, + .extra = .{ .number = values_string_index }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .bmp_too_many_missing_palette_bytes, + .type = .note, + .print_source_line = false, + .token = filename_token, + }); + } + + var number_as_bytes: [8]u8 = undefined; + std.mem.writeIntNative(u64, &number_as_bytes, num_padding_bytes); + const value_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_missing_palette_bytes, + .type = .warning, + .token = filename_token, + .extra = .{ .number = value_string_index }, + }); + const pixel_data_len = bitmap_info.getPixelDataLen(file_size); + if (pixel_data_len > 0) { + const miscompiled_bytes = @min(pixel_data_len, num_padding_bytes); + std.mem.writeIntNative(u64, &number_as_bytes, miscompiled_bytes); + const miscompiled_bytes_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_bmp_palette_padding, + .type = .warning, + .token = filename_token, + .extra = .{ .number = miscompiled_bytes_string_index }, + }); + } + } + + // TODO: It might be possible that the calculation done in this function + // could underflow if the underlying file is modified while reading + // it, but need to think about it more to determine if that's a + // real possibility + const bmp_bytes_to_write: u32 = @intCast(bitmap_info.getExpectedByteLen(file_size)); + + header.data_size = bmp_bytes_to_write; + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try file.seekTo(bmp.file_header_len); + const file_reader = file.reader(); + try writeResourceDataNoPadding(writer, file_reader, bitmap_info.dib_header_size); + if (bitmap_info.getBitmasksByteLen() > 0) { + try writeResourceDataNoPadding(writer, file_reader, bitmap_info.getBitmasksByteLen()); + } + if (bitmap_info.getExpectedPaletteByteLen() > 0) { + try writeResourceDataNoPadding(writer, file_reader, @intCast(bitmap_info.getActualPaletteByteLen())); + const padding_bytes = bitmap_info.getMissingPaletteByteLen(); + if (padding_bytes > 0) { + try writer.writeByteNTimes(0, padding_bytes); + } + } + try file.seekTo(bitmap_info.pixel_data_offset); + const pixel_bytes: u32 = @intCast(file_size - bitmap_info.pixel_data_offset); + try writeResourceDataNoPadding(writer, file_reader, pixel_bytes); + try writeDataPadding(writer, bmp_bytes_to_write); + return; + }, + .FONT => { + if (self.state.font_dir.ids.get(header.name_value.ordinal) != null) { + // Add warning and skip this resource + // Note: The Win32 compiler prints this as an error but it doesn't fail the compilation + // and the duplicate resource is skipped. + try self.addErrorDetails(ErrorDetails{ + .err = .font_id_already_defined, + .token = node.id, + .type = .warning, + .extra = .{ .number = header.name_value.ordinal }, + }); + try self.addErrorDetails(ErrorDetails{ + .err = .font_id_already_defined, + .token = self.state.font_dir.ids.get(header.name_value.ordinal).?, + .type = .note, + .extra = .{ .number = header.name_value.ordinal }, + }); + return; + } + header.applyMemoryFlags(node.common_resource_attributes, self.source); + const file_size = try file.getEndPos(); + if (file_size > std.math.maxInt(u32)) { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + } + + // We now know that the data size will fit in a u32 + header.data_size = @intCast(file_size); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var header_slurping_reader = headerSlurpingReader(148, file.reader()); + try writeResourceData(writer, header_slurping_reader.reader(), header.data_size); + + try self.state.font_dir.add(self.arena, FontDir.Font{ + .id = header.name_value.ordinal, + .header_bytes = header_slurping_reader.slurped_header, + }, node.id); + return; + }, + .ACCELERATOR, + .ANICURSOR, + .ANIICON, + .CURSOR, + .DIALOG, + .DLGINCLUDE, + .FONTDIR, + .ICON, + .MENU, + .STRING, + .TOOLBAR, + .VERSION, + .VXD, + => unreachable, + _ => unreachable, + } + } else { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + } + + // Fallback to just writing out the entire contents of the file + const data_size = try file.getEndPos(); + if (data_size > std.math.maxInt(u32)) { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + } + // We now know that the data size will fit in a u32 + header.data_size = @intCast(data_size); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try writeResourceData(writer, file.reader(), header.data_size); + } + + fn iconReadError( + self: *Compiler, + err: ico.ReadError, + filename: []const u8, + token: Token, + predefined_type: res.RT, + ) error{ CompileError, OutOfMemory } { + const filename_string_index = try self.diagnostics.putString(filename); + return self.addErrorDetailsAndFail(.{ + .err = .icon_read_error, + .token = token, + .extra = .{ .icon_read_error = .{ + .err = ErrorDetails.IconReadError.enumFromError(err), + .icon_type = switch (predefined_type) { + .GROUP_ICON => .icon, + .GROUP_CURSOR => .cursor, + else => unreachable, + }, + .filename_string_index = filename_string_index, + } }, + }); + } + + pub const DataType = enum { + number, + ascii_string, + wide_string, + }; + + pub const Data = union(DataType) { + number: Number, + ascii_string: []const u8, + wide_string: [:0]const u16, + + pub fn deinit(self: Data, allocator: Allocator) void { + switch (self) { + .wide_string => |wide_string| { + allocator.free(wide_string); + }, + .ascii_string => |ascii_string| { + allocator.free(ascii_string); + }, + else => {}, + } + } + + pub fn write(self: Data, writer: anytype) !void { + switch (self) { + .number => |number| switch (number.is_long) { + false => try writer.writeIntLittle(WORD, number.asWord()), + true => try writer.writeIntLittle(DWORD, number.value), + }, + .ascii_string => |ascii_string| { + try writer.writeAll(ascii_string); + }, + .wide_string => |wide_string| { + try writer.writeAll(std.mem.sliceAsBytes(wide_string)); + }, + } + } + }; + + /// Assumes that the node is a number or number expression + pub fn evaluateNumberExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) Number { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + std.debug.assert(literal_node.token.id == .number); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(source), + .code_page = code_page_lookup.getForToken(literal_node.token), + }; + return literals.parseNumberLiteral(bytes); + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + const lhs = evaluateNumberExpression(binary_expression_node.left, source, code_page_lookup); + const rhs = evaluateNumberExpression(binary_expression_node.right, source, code_page_lookup); + const operator_char = binary_expression_node.operator.slice(source)[0]; + return lhs.evaluateOperator(operator_char, rhs); + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return evaluateNumberExpression(grouped_expression_node.expression, source, code_page_lookup); + }, + else => unreachable, + } + } + + const FlagsNumber = struct { + value: u32, + not_mask: u32 = 0xFFFFFFFF, + + pub fn evaluateOperator(lhs: FlagsNumber, operator_char: u8, rhs: FlagsNumber) FlagsNumber { + const result = switch (operator_char) { + '-' => lhs.value -% rhs.value, + '+' => lhs.value +% rhs.value, + '|' => lhs.value | rhs.value, + '&' => lhs.value & rhs.value, + else => unreachable, // invalid operator, this would be a lexer/parser bug + }; + return .{ + .value = result, + .not_mask = lhs.not_mask & rhs.not_mask, + }; + } + + pub fn applyNotMask(self: FlagsNumber) u32 { + return self.value & self.not_mask; + } + }; + + pub fn evaluateFlagsExpressionWithDefault(default: u32, expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) u32 { + var context = FlagsExpressionContext{ .initial_value = default }; + const number = evaluateFlagsExpression(expression_node, source, code_page_lookup, &context); + return number.value; + } + + pub const FlagsExpressionContext = struct { + initial_value: u32 = 0, + initial_value_used: bool = false, + }; + + /// Assumes that the node is a number expression (which can contain not_expressions) + pub fn evaluateFlagsExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup, context: *FlagsExpressionContext) FlagsNumber { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + std.debug.assert(literal_node.token.id == .number); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(source), + .code_page = code_page_lookup.getForToken(literal_node.token), + }; + var value = literals.parseNumberLiteral(bytes).value; + if (!context.initial_value_used) { + context.initial_value_used = true; + value |= context.initial_value; + } + return .{ .value = value }; + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + const lhs = evaluateFlagsExpression(binary_expression_node.left, source, code_page_lookup, context); + const rhs = evaluateFlagsExpression(binary_expression_node.right, source, code_page_lookup, context); + const operator_char = binary_expression_node.operator.slice(source)[0]; + const result = lhs.evaluateOperator(operator_char, rhs); + return .{ .value = result.applyNotMask() }; + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return evaluateFlagsExpression(grouped_expression_node.expression, source, code_page_lookup, context); + }, + .not_expression => { + const not_expression = expression_node.cast(.not_expression).?; + const bytes = SourceBytes{ + .slice = not_expression.number_token.slice(source), + .code_page = code_page_lookup.getForToken(not_expression.number_token), + }; + const not_number = literals.parseNumberLiteral(bytes); + if (!context.initial_value_used) { + context.initial_value_used = true; + return .{ .value = context.initial_value & ~not_number.value }; + } + return .{ .value = 0, .not_mask = ~not_number.value }; + }, + else => unreachable, + } + } + + pub fn evaluateDataExpression(self: *Compiler, expression_node: *Node) !Data { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + switch (literal_node.token.id) { + .number => { + const number = evaluateNumberExpression(expression_node, self.source, self.input_code_pages); + return .{ .number = number }; + }, + .quoted_ascii_string => { + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const parsed = try literals.parseQuotedAsciiString(self.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + .output_code_page = self.output_code_pages.getForToken(literal_node.token), + }); + errdefer self.allocator.free(parsed); + return .{ .ascii_string = parsed }; + }, + .quoted_wide_string => { + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const parsed_string = try literals.parseQuotedWideString(self.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + }); + errdefer self.allocator.free(parsed_string); + return .{ .wide_string = parsed_string }; + }, + else => { + std.debug.print("unexpected token in literal node: {}\n", .{literal_node.token}); + unreachable; // no other token types should be in a data literal node + }, + } + }, + .binary_expression, .grouped_expression => { + const result = evaluateNumberExpression(expression_node, self.source, self.input_code_pages); + return .{ .number = result }; + }, + .not_expression => unreachable, + else => { + std.debug.print("{}\n", .{expression_node.id}); + @panic("TODO: evaluateDataExpression"); + }, + } + } + + pub fn writeResourceRawData(self: *Compiler, node: *Node.ResourceRawData, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + for (node.raw_data) |expression| { + const data = try self.evaluateDataExpression(expression); + defer data.deinit(self.allocator); + data.write(data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + } + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_len: u32 = @intCast(data_buffer.items.len); + try self.writeResourceHeader(writer, node.id, node.type, data_len, node.common_resource_attributes, self.state.language); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_len); + } + + pub fn writeResourceHeader(self: *Compiler, writer: anytype, id_token: Token, type_token: Token, data_size: u32, common_resource_attributes: []Token, language: res.Language) !void { + var header = try self.resourceHeader(id_token, type_token, .{ + .language = language, + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = id_token }); + } + + pub fn writeResourceDataNoPadding(writer: anytype, data_reader: anytype, data_size: u32) !void { + var limited_reader = std.io.limitedReader(data_reader, data_size); + + const FifoBuffer = std.fifo.LinearFifo(u8, .{ .Static = 4096 }); + var fifo = FifoBuffer.init(); + try fifo.pump(limited_reader.reader(), writer); + } + + pub fn writeResourceData(writer: anytype, data_reader: anytype, data_size: u32) !void { + try writeResourceDataNoPadding(writer, data_reader, data_size); + try writeDataPadding(writer, data_size); + } + + pub fn writeDataPadding(writer: anytype, data_size: u32) !void { + try writer.writeByteNTimes(0, numPaddingBytesNeeded(data_size)); + } + + pub fn numPaddingBytesNeeded(data_size: u32) u2 { + // Result is guaranteed to be between 0 and 3. + return @intCast((4 -% data_size) % 4); + } + + pub fn evaluateAcceleratorKeyExpression(self: *Compiler, node: *Node, is_virt: bool) !u16 { + if (node.isNumberExpression()) { + return evaluateNumberExpression(node, self.source, self.input_code_pages).asWord(); + } else { + std.debug.assert(node.isStringLiteral()); + const literal = @fieldParentPtr(Node.Literal, "base", node); + const bytes = SourceBytes{ + .slice = literal.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal.token), + }; + const column = literal.token.calculateColumn(self.source, 8, null); + return res.parseAcceleratorKeyString(bytes, is_virt, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal.token }, + }); + } + } + + pub fn writeAccelerators(self: *Compiler, node: *Node.Accelerators, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + self.writeAcceleratorsData(node, data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft + pub fn writeAcceleratorsData(self: *Compiler, node: *Node.Accelerators, data_writer: anytype) !void { + for (node.accelerators, 0..) |accel_node, i| { + const accelerator = @fieldParentPtr(Node.Accelerator, "base", accel_node); + var modifiers = res.AcceleratorModifiers{}; + for (accelerator.type_and_options) |type_or_option| { + const modifier = rc.AcceleratorTypeAndOptions.map.get(type_or_option.slice(self.source)).?; + modifiers.apply(modifier); + } + if (accelerator.event.isNumberExpression() and !modifiers.explicit_ascii_or_virtkey) { + return self.addErrorDetailsAndFail(.{ + .err = .accelerator_type_required, + .token = accelerator.event.getFirstToken(), + .token_span_end = accelerator.event.getLastToken(), + }); + } + const key = self.evaluateAcceleratorKeyExpression(accelerator.event, modifiers.isSet(.virtkey)) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + return self.addErrorDetailsAndFail(.{ + .err = .invalid_accelerator_key, + .token = accelerator.event.getFirstToken(), + .token_span_end = accelerator.event.getLastToken(), + .extra = .{ .accelerator_error = .{ + .err = ErrorDetails.AcceleratorError.enumFromError(e), + } }, + }); + }, + }; + const cmd_id = evaluateNumberExpression(accelerator.idvalue, self.source, self.input_code_pages); + + if (i == node.accelerators.len - 1) { + modifiers.markLast(); + } + + try data_writer.writeByte(modifiers.value); + try data_writer.writeByte(0); // padding + try data_writer.writeIntLittle(u16, key); + try data_writer.writeIntLittle(u16, cmd_id.asWord()); + try data_writer.writeIntLittle(u16, 0); // padding + } + } + + const DialogOptionalStatementValues = struct { + style: u32 = res.WS.SYSMENU | res.WS.BORDER | res.WS.POPUP, + exstyle: u32 = 0, + class: ?NameOrOrdinal = null, + menu: ?NameOrOrdinal = null, + font: ?FontStatementValues = null, + caption: ?Token = null, + }; + + pub fn writeDialog(self: *Compiler, node: *Node.Dialog, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + const resource = Resource.fromString(.{ + .slice = node.type.slice(self.source), + .code_page = self.input_code_pages.getForToken(node.type), + }); + std.debug.assert(resource == .dialog or resource == .dialogex); + + var optional_statement_values: DialogOptionalStatementValues = .{}; + defer { + if (optional_statement_values.class) |class| { + class.deinit(self.allocator); + } + if (optional_statement_values.menu) |menu| { + menu.deinit(self.allocator); + } + } + var skipped_menu_or_classes = std.ArrayList(*Node.SimpleStatement).init(self.allocator); + defer skipped_menu_or_classes.deinit(); + var last_menu: *Node.SimpleStatement = undefined; + var last_class: *Node.SimpleStatement = undefined; + var last_menu_would_be_forced_ordinal = false; + var last_menu_has_digit_as_first_char = false; + var last_menu_did_uppercase = false; + var last_class_would_be_forced_ordinal = false; + + for (node.optional_statements) |optional_statement| { + switch (optional_statement.id) { + .simple_statement => { + const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", optional_statement); + const statement_identifier = simple_statement.identifier; + const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue; + switch (statement_type) { + .style, .exstyle => { + const style = evaluateFlagsExpressionWithDefault(0, simple_statement.value, self.source, self.input_code_pages); + if (statement_type == .style) { + optional_statement_values.style = style; + } else { + optional_statement_values.exstyle = style; + } + }, + .caption => { + std.debug.assert(simple_statement.value.id == .literal); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + optional_statement_values.caption = literal_node.token; + }, + .class => { + const is_duplicate = optional_statement_values.class != null; + if (is_duplicate) { + try skipped_menu_or_classes.append(last_class); + } + const forced_ordinal = is_duplicate and optional_statement_values.class.? == .ordinal; + // In the Win32 RC compiler, if any CLASS values that are interpreted as + // an ordinal exist, it affects all future CLASS statements and forces + // them to be treated as an ordinal no matter what. + if (forced_ordinal) { + last_class_would_be_forced_ordinal = true; + } + // clear out the old one if it exists + if (optional_statement_values.class) |prev| { + prev.deinit(self.allocator); + optional_statement_values.class = null; + } + + if (simple_statement.value.isNumberExpression()) { + const class_ordinal = evaluateNumberExpression(simple_statement.value, self.source, self.input_code_pages); + optional_statement_values.class = NameOrOrdinal{ .ordinal = class_ordinal.asWord() }; + } else { + std.debug.assert(simple_statement.value.isStringLiteral()); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + const parsed = try self.parseQuotedStringAsWideString(literal_node.token); + optional_statement_values.class = NameOrOrdinal{ .name = parsed }; + } + + last_class = simple_statement; + }, + .menu => { + const is_duplicate = optional_statement_values.menu != null; + if (is_duplicate) { + try skipped_menu_or_classes.append(last_menu); + } + const forced_ordinal = is_duplicate and optional_statement_values.menu.? == .ordinal; + // In the Win32 RC compiler, if any MENU values that are interpreted as + // an ordinal exist, it affects all future MENU statements and forces + // them to be treated as an ordinal no matter what. + if (forced_ordinal) { + last_menu_would_be_forced_ordinal = true; + } + // clear out the old one if it exists + if (optional_statement_values.menu) |prev| { + prev.deinit(self.allocator); + optional_statement_values.menu = null; + } + + std.debug.assert(simple_statement.value.id == .literal); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + + const token_slice = literal_node.token.slice(self.source); + const bytes = SourceBytes{ + .slice = token_slice, + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + optional_statement_values.menu = try NameOrOrdinal.fromString(self.allocator, bytes); + + if (optional_statement_values.menu.? == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(bytes)) |win32_rc_ordinal| { + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = literal_node.token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = literal_node.token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + } + } + + // Need to keep track of some properties of the value + // in order to emit the appropriate warning(s) later on. + // See where the warning are emitted below (outside this loop) + // for the full explanation. + var did_uppercase = false; + var codepoint_i: usize = 0; + while (bytes.code_page.codepointAt(codepoint_i, bytes.slice)) |codepoint| : (codepoint_i += codepoint.byte_len) { + const c = codepoint.value; + switch (c) { + 'a'...'z' => { + did_uppercase = true; + break; + }, + else => {}, + } + } + last_menu_did_uppercase = did_uppercase; + last_menu_has_digit_as_first_char = std.ascii.isDigit(token_slice[0]); + last_menu = simple_statement; + }, + else => {}, + } + }, + .font_statement => { + const font = @fieldParentPtr(Node.FontStatement, "base", optional_statement); + if (optional_statement_values.font != null) { + optional_statement_values.font.?.node = font; + } else { + optional_statement_values.font = FontStatementValues{ .node = font }; + } + if (font.weight) |weight| { + const value = evaluateNumberExpression(weight, self.source, self.input_code_pages); + optional_statement_values.font.?.weight = value.asWord(); + } + if (font.italic) |italic| { + const value = evaluateNumberExpression(italic, self.source, self.input_code_pages); + optional_statement_values.font.?.italic = value.asWord() != 0; + } + }, + else => {}, + } + } + + for (skipped_menu_or_classes.items) |simple_statement| { + const statement_identifier = simple_statement.identifier; + const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue; + try self.addErrorDetails(.{ + .err = .duplicate_menu_or_class_skipped, + .type = .warning, + .token = simple_statement.identifier, + .token_span_start = simple_statement.base.getFirstToken(), + .token_span_end = simple_statement.base.getLastToken(), + .extra = .{ .menu_or_class = switch (statement_type) { + .menu => .menu, + .class => .class, + else => unreachable, + } }, + }); + } + // The Win32 RC compiler miscompiles the value in the following scenario: + // Multiple CLASS parameters are specified and any of them are treated as a number, then + // the last CLASS is always treated as a number no matter what + if (last_class_would_be_forced_ordinal and optional_statement_values.class.? == .name) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_class.value); + const ordinal_value = res.ForcedOrdinal.fromUtf16Le(optional_statement_values.class.?.name); + + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_class, + .type = .warning, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_class, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .menu_or_class = .class }, + }); + } + // The Win32 RC compiler miscompiles the id in two different scenarios: + // 1. The first character of the ID is a digit, in which case it is always treated as a number + // no matter what (and therefore does not match how the MENU/MENUEX id is parsed) + // 2. Multiple MENU parameters are specified and any of them are treated as a number, then + // the last MENU is always treated as a number no matter what + if ((last_menu_would_be_forced_ordinal or last_menu_has_digit_as_first_char) and optional_statement_values.menu.? == .name) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value); + const token_slice = literal_node.token.slice(self.source); + const bytes = SourceBytes{ + .slice = token_slice, + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const ordinal_value = res.ForcedOrdinal.fromBytes(bytes); + + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id, + .type = .warning, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + if (last_menu_would_be_forced_ordinal) { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .menu_or_class = .menu }, + }); + } else { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id_starts_with_digit, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + }); + } + } + // The MENU id parsing uses the exact same logic as the MENU/MENUEX resource id parsing, + // which means that it will convert ASCII characters to uppercase during the 'name' parsing. + // This turns out not to matter (`LoadMenu` does a case-insensitive lookup anyway), + // but it still makes sense to share the uppercasing logic since the MENU parameter + // here is just a reference to a MENU/MENUEX id within the .exe. + // So, because this is an intentional but inconsequential-to-the-user difference + // between resinator and the Win32 RC compiler, we only emit a hint instead of + // a warning. + if (last_menu_did_uppercase) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value); + try self.addErrorDetails(.{ + .err = .dialog_menu_id_was_uppercased, + .type = .hint, + .token = literal_node.token, + }); + } + + const x = evaluateNumberExpression(node.x, self.source, self.input_code_pages); + const y = evaluateNumberExpression(node.y, self.source, self.input_code_pages); + const width = evaluateNumberExpression(node.width, self.source, self.input_code_pages); + const height = evaluateNumberExpression(node.height, self.source, self.input_code_pages); + + // FONT statement requires DS_SETFONT, and if it's not present DS_SETFRONT must be unset + if (optional_statement_values.font) |_| { + optional_statement_values.style |= res.DS.SETFONT; + } else { + optional_statement_values.style &= ~res.DS.SETFONT; + } + // CAPTION statement implies WS_CAPTION + if (optional_statement_values.caption) |_| { + optional_statement_values.style |= res.WS.CAPTION; + } + + self.writeDialogHeaderAndStrings( + node, + data_writer, + resource, + &optional_statement_values, + x, + y, + width, + height, + ) catch |err| switch (err) { + // Dialog header and menu/class/title strings can never exceed u32 bytes + // on their own, so this error is unreachable. + error.NoSpaceLeft => unreachable, + else => |e| return e, + }; + + var controls_by_id = std.AutoHashMap(u32, *const Node.ControlStatement).init(self.allocator); + // Number of controls are guaranteed by the parser to be within maxInt(u16). + try controls_by_id.ensureTotalCapacity(@as(u16, @intCast(node.controls.len))); + defer controls_by_id.deinit(); + + for (node.controls) |control_node| { + const control = @fieldParentPtr(Node.ControlStatement, "base", control_node); + + self.writeDialogControl( + control, + data_writer, + resource, + // We know the data_buffer len is limited to u32 max. + @intCast(data_buffer.items.len), + &controls_by_id, + ) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .type = .note, + .token = control.type, + }); + }, + else => |e| return e, + }; + } + + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + fn writeDialogHeaderAndStrings( + self: *Compiler, + node: *Node.Dialog, + data_writer: anytype, + resource: Resource, + optional_statement_values: *const DialogOptionalStatementValues, + x: Number, + y: Number, + width: Number, + height: Number, + ) !void { + // Header + if (resource == .dialogex) { + const help_id: u32 = help_id: { + if (node.help_id == null) break :help_id 0; + break :help_id evaluateNumberExpression(node.help_id.?, self.source, self.input_code_pages).value; + }; + try data_writer.writeIntLittle(u16, 1); // version number, always 1 + try data_writer.writeIntLittle(u16, 0xFFFF); // signature, always 0xFFFF + try data_writer.writeIntLittle(u32, help_id); + try data_writer.writeIntLittle(u32, optional_statement_values.exstyle); + try data_writer.writeIntLittle(u32, optional_statement_values.style); + } else { + try data_writer.writeIntLittle(u32, optional_statement_values.style); + try data_writer.writeIntLittle(u32, optional_statement_values.exstyle); + } + // This limit is enforced by the parser, so we know the number of controls + // is within the range of a u16. + try data_writer.writeIntLittle(u16, @as(u16, @intCast(node.controls.len))); + try data_writer.writeIntLittle(u16, x.asWord()); + try data_writer.writeIntLittle(u16, y.asWord()); + try data_writer.writeIntLittle(u16, width.asWord()); + try data_writer.writeIntLittle(u16, height.asWord()); + + // Menu + if (optional_statement_values.menu) |menu| { + try menu.write(data_writer); + } else { + try data_writer.writeIntLittle(u16, 0); + } + // Class + if (optional_statement_values.class) |class| { + try class.write(data_writer); + } else { + try data_writer.writeIntLittle(u16, 0); + } + // Caption + if (optional_statement_values.caption) |caption| { + const parsed = try self.parseQuotedStringAsWideString(caption); + defer self.allocator.free(parsed); + try data_writer.writeAll(std.mem.sliceAsBytes(parsed[0 .. parsed.len + 1])); + } else { + try data_writer.writeIntLittle(u16, 0); + } + // Font + if (optional_statement_values.font) |font| { + try self.writeDialogFont(resource, font, data_writer); + } + } + + fn writeDialogControl( + self: *Compiler, + control: *Node.ControlStatement, + data_writer: anytype, + resource: Resource, + bytes_written_so_far: u32, + controls_by_id: *std.AutoHashMap(u32, *const Node.ControlStatement), + ) !void { + const control_type = rc.Control.map.get(control.type.slice(self.source)).?; + + // Each control must be at a 4-byte boundary. However, the Windows RC + // compiler will miscompile controls if their extra data ends on an odd offset. + // We will avoid the miscompilation and emit a warning. + const num_padding = numPaddingBytesNeeded(bytes_written_so_far); + if (num_padding == 1 or num_padding == 3) { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_padding, + .type = .warning, + .token = control.type, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_padding, + .type = .note, + .print_source_line = false, + .token = control.type, + }); + } + try data_writer.writeByteNTimes(0, num_padding); + + var style = if (control.style) |style_expression| + // Certain styles are implied by the control type + evaluateFlagsExpressionWithDefault(res.ControlClass.getImpliedStyle(control_type), style_expression, self.source, self.input_code_pages) + else + res.ControlClass.getImpliedStyle(control_type); + + var exstyle = if (control.exstyle) |exstyle_expression| + evaluateFlagsExpressionWithDefault(0, exstyle_expression, self.source, self.input_code_pages) + else + 0; + + switch (resource) { + .dialog => { + // Note: Reverse order from DIALOGEX + try data_writer.writeIntLittle(u32, style); + try data_writer.writeIntLittle(u32, exstyle); + }, + .dialogex => { + const help_id: u32 = if (control.help_id) |help_id_expression| + evaluateNumberExpression(help_id_expression, self.source, self.input_code_pages).value + else + 0; + try data_writer.writeIntLittle(u32, help_id); + // Note: Reverse order from DIALOG + try data_writer.writeIntLittle(u32, exstyle); + try data_writer.writeIntLittle(u32, style); + }, + else => unreachable, + } + + const control_x = evaluateNumberExpression(control.x, self.source, self.input_code_pages); + const control_y = evaluateNumberExpression(control.y, self.source, self.input_code_pages); + const control_width = evaluateNumberExpression(control.width, self.source, self.input_code_pages); + const control_height = evaluateNumberExpression(control.height, self.source, self.input_code_pages); + + try data_writer.writeIntLittle(u16, control_x.asWord()); + try data_writer.writeIntLittle(u16, control_y.asWord()); + try data_writer.writeIntLittle(u16, control_width.asWord()); + try data_writer.writeIntLittle(u16, control_height.asWord()); + + const control_id = evaluateNumberExpression(control.id, self.source, self.input_code_pages); + switch (resource) { + .dialog => try data_writer.writeIntLittle(u16, control_id.asWord()), + .dialogex => try data_writer.writeIntLittle(u32, control_id.value), + else => unreachable, + } + + const control_id_for_map: u32 = switch (resource) { + .dialog => control_id.asWord(), + .dialogex => control_id.value, + else => unreachable, + }; + const result = controls_by_id.getOrPutAssumeCapacity(control_id_for_map); + if (result.found_existing) { + if (!self.silent_duplicate_control_ids) { + try self.addErrorDetails(.{ + .err = .control_id_already_defined, + .type = .warning, + .token = control.id.getFirstToken(), + .token_span_end = control.id.getLastToken(), + .extra = .{ .number = control_id_for_map }, + }); + try self.addErrorDetails(.{ + .err = .control_id_already_defined, + .type = .note, + .token = result.value_ptr.*.id.getFirstToken(), + .token_span_end = result.value_ptr.*.id.getLastToken(), + .extra = .{ .number = control_id_for_map }, + }); + } + } else { + result.value_ptr.* = control; + } + + if (res.ControlClass.fromControl(control_type)) |control_class| { + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } else { + const class_node = control.class.?; + if (class_node.isNumberExpression()) { + const number = evaluateNumberExpression(class_node, self.source, self.input_code_pages); + const ordinal = NameOrOrdinal{ .ordinal = number.asWord() }; + // This is different from how the Windows RC compiles ordinals here, + // but I think that's a miscompilation/bug of the Windows implementation. + // The Windows behavior is (where LSB = least significant byte): + // - If the LSB is 0x00 => 0xFFFF0000 + // - If the LSB is < 0x80 => 0x000000 + // - If the LSB is >= 0x80 => 0x0000FF + // + // Because of this, we emit a warning about the potential miscompilation + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_class_ordinal, + .type = .warning, + .token = class_node.getFirstToken(), + .token_span_end = class_node.getLastToken(), + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_class_ordinal, + .type = .note, + .print_source_line = false, + .token = class_node.getFirstToken(), + .token_span_end = class_node.getLastToken(), + }); + // And then write out the ordinal using a proper a NameOrOrdinal encoding. + try ordinal.write(data_writer); + } else if (class_node.isStringLiteral()) { + const literal_node = @fieldParentPtr(Node.Literal, "base", class_node); + const parsed = try self.parseQuotedStringAsWideString(literal_node.token); + defer self.allocator.free(parsed); + if (rc.ControlClass.fromWideString(parsed)) |control_class| { + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } else { + // NUL acts as a terminator + // TODO: Maybe warn when parsed_terminated.len != parsed.len, since + // it seems unlikely that NUL-termination is something intentional + const parsed_terminated = std.mem.sliceTo(parsed, 0); + const name = NameOrOrdinal{ .name = parsed_terminated }; + try name.write(data_writer); + } + } else { + const literal_node = @fieldParentPtr(Node.Literal, "base", class_node); + const literal_slice = literal_node.token.slice(self.source); + // This succeeding is guaranteed by the parser + const control_class = rc.ControlClass.map.get(literal_slice) orelse unreachable; + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } + } + + if (control.text) |text_token| { + const bytes = SourceBytes{ + .slice = text_token.slice(self.source), + .code_page = self.input_code_pages.getForToken(text_token), + }; + if (text_token.isStringLiteral()) { + const text = try self.parseQuotedStringAsWideString(text_token); + defer self.allocator.free(text); + const name = NameOrOrdinal{ .name = text }; + try name.write(data_writer); + } else { + std.debug.assert(text_token.id == .number); + const number = literals.parseNumberLiteral(bytes); + const ordinal = NameOrOrdinal{ .ordinal = number.asWord() }; + try ordinal.write(data_writer); + } + } else { + try NameOrOrdinal.writeEmpty(data_writer); + } + + var extra_data_buf = std.ArrayList(u8).init(self.allocator); + defer extra_data_buf.deinit(); + // The extra data byte length must be able to fit within a u16. + var limited_extra_data_writer = limitedWriter(extra_data_buf.writer(), std.math.maxInt(u16)); + const extra_data_writer = limited_extra_data_writer.writer(); + for (control.extra_data) |data_expression| { + const data = try self.evaluateDataExpression(data_expression); + defer data.deinit(self.allocator); + data.write(extra_data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .control_extra_data_size_exceeds_max, + .token = control.type, + }); + return self.addErrorDetailsAndFail(.{ + .err = .control_extra_data_size_exceeds_max, + .type = .note, + .token = data_expression.getFirstToken(), + .token_span_end = data_expression.getLastToken(), + }); + }, + else => |e| return e, + }; + } + // We know the extra_data_buf size fits within a u16. + const extra_data_size: u16 = @intCast(extra_data_buf.items.len); + try data_writer.writeIntLittle(u16, extra_data_size); + try data_writer.writeAll(extra_data_buf.items); + } + + pub fn writeToolbar(self: *Compiler, node: *Node.Toolbar, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + const data_writer = data_buffer.writer(); + + const button_width = evaluateNumberExpression(node.button_width, self.source, self.input_code_pages); + const button_height = evaluateNumberExpression(node.button_height, self.source, self.input_code_pages); + + // I'm assuming this is some sort of version + // TODO: Try to find something mentioning this + try data_writer.writeIntLittle(u16, 1); + try data_writer.writeIntLittle(u16, button_width.asWord()); + try data_writer.writeIntLittle(u16, button_height.asWord()); + try data_writer.writeIntLittle(u16, @as(u16, @intCast(node.buttons.len))); + + for (node.buttons) |button_or_sep| { + switch (button_or_sep.id) { + .literal => { // This is always SEPARATOR + std.debug.assert(button_or_sep.cast(.literal).?.token.id == .literal); + try data_writer.writeIntLittle(u16, 0); + }, + .simple_statement => { + const value_node = button_or_sep.cast(.simple_statement).?.value; + const value = evaluateNumberExpression(value_node, self.source, self.input_code_pages); + try data_writer.writeIntLittle(u16, value.asWord()); + }, + else => unreachable, // This is a bug in the parser + } + } + + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Weight and italic carry over from previous FONT statements within a single resource, + /// so they need to be parsed ahead-of-time and stored + const FontStatementValues = struct { + weight: u16 = 0, + italic: bool = false, + node: *Node.FontStatement, + }; + + pub fn writeDialogFont(self: *Compiler, resource: Resource, values: FontStatementValues, writer: anytype) !void { + const node = values.node; + const point_size = evaluateNumberExpression(node.point_size, self.source, self.input_code_pages); + try writer.writeIntLittle(u16, point_size.asWord()); + + if (resource == .dialogex) { + try writer.writeIntLittle(u16, values.weight); + } + + if (resource == .dialogex) { + try writer.writeIntLittle(u8, @intFromBool(values.italic)); + } + + if (node.char_set) |char_set| { + const value = evaluateNumberExpression(char_set, self.source, self.input_code_pages); + try writer.writeIntLittle(u8, @as(u8, @truncate(value.value))); + } else if (resource == .dialogex) { + try writer.writeIntLittle(u8, 1); // DEFAULT_CHARSET + } + + const typeface = try self.parseQuotedStringAsWideString(node.typeface); + defer self.allocator.free(typeface); + try writer.writeAll(std.mem.sliceAsBytes(typeface[0 .. typeface.len + 1])); + } + + pub fn writeMenu(self: *Compiler, node: *Node.Menu, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + const type_bytes = SourceBytes{ + .slice = node.type.slice(self.source), + .code_page = self.input_code_pages.getForToken(node.type), + }; + const resource = Resource.fromString(type_bytes); + std.debug.assert(resource == .menu or resource == .menuex); + + self.writeMenuData(node, data_writer, resource) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft + pub fn writeMenuData(self: *Compiler, node: *Node.Menu, data_writer: anytype, resource: Resource) !void { + // menu header + const version: u16 = if (resource == .menu) 0 else 1; + try data_writer.writeIntLittle(u16, version); + const header_size: u16 = if (resource == .menu) 0 else 4; + try data_writer.writeIntLittle(u16, header_size); // cbHeaderSize + // Note: There can be extra bytes at the end of this header (`rgbExtra`), + // but they are always zero-length for us, so we don't write anything + // (the length of the rgbExtra field is inferred from the header_size). + // MENU => rgbExtra: [cbHeaderSize]u8 + // MENUEX => rgbExtra: [cbHeaderSize-4]u8 + + if (resource == .menuex) { + if (node.help_id) |help_id_node| { + const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages); + try data_writer.writeIntLittle(u32, help_id.value); + } else { + try data_writer.writeIntLittle(u32, 0); + } + } + + for (node.items, 0..) |item, i| { + const is_last = i == node.items.len - 1; + try self.writeMenuItem(item, data_writer, is_last); + } + } + + pub fn writeMenuItem(self: *Compiler, node: *Node, writer: anytype, is_last_of_parent: bool) !void { + switch (node.id) { + .menu_item_separator => { + // This is the 'alternate compability form' of the separator, see + // https://devblogs.microsoft.com/oldnewthing/20080710-00/?p=21673 + // + // The 'correct' way is to set the MF_SEPARATOR flag, but the Win32 RC + // compiler still uses this alternate form, so that's what we use too. + var flags = res.MenuItemFlags{}; + if (is_last_of_parent) flags.markLast(); + try writer.writeIntLittle(u16, flags.value); + try writer.writeIntLittle(u16, 0); // id + try writer.writeIntLittle(u16, 0); // null-terminated UTF-16 text + }, + .menu_item => { + const menu_item = @fieldParentPtr(Node.MenuItem, "base", node); + var flags = res.MenuItemFlags{}; + for (menu_item.option_list) |option_token| { + // This failing would be a bug in the parser + const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable; + flags.apply(option); + } + if (is_last_of_parent) flags.markLast(); + try writer.writeIntLittle(u16, flags.value); + + var result = evaluateNumberExpression(menu_item.result, self.source, self.input_code_pages); + try writer.writeIntLittle(u16, result.asWord()); + + var text = try self.parseQuotedStringAsWideString(menu_item.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + }, + .popup => { + const popup = @fieldParentPtr(Node.Popup, "base", node); + var flags = res.MenuItemFlags{ .value = res.MF.POPUP }; + for (popup.option_list) |option_token| { + // This failing would be a bug in the parser + const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable; + flags.apply(option); + } + if (is_last_of_parent) flags.markLast(); + try writer.writeIntLittle(u16, flags.value); + + var text = try self.parseQuotedStringAsWideString(popup.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + + for (popup.items, 0..) |item, i| { + const is_last = i == popup.items.len - 1; + try self.writeMenuItem(item, writer, is_last); + } + }, + inline .menu_item_ex, .popup_ex => |node_type| { + const menu_item = @fieldParentPtr(node_type.Type(), "base", node); + + if (menu_item.type) |flags| { + const value = evaluateNumberExpression(flags, self.source, self.input_code_pages); + try writer.writeIntLittle(u32, value.value); + } else { + try writer.writeIntLittle(u32, 0); + } + + if (menu_item.state) |state| { + const value = evaluateNumberExpression(state, self.source, self.input_code_pages); + try writer.writeIntLittle(u32, value.value); + } else { + try writer.writeIntLittle(u32, 0); + } + + if (menu_item.id) |id| { + const value = evaluateNumberExpression(id, self.source, self.input_code_pages); + try writer.writeIntLittle(u32, value.value); + } else { + try writer.writeIntLittle(u32, 0); + } + + var flags: u16 = 0; + if (is_last_of_parent) flags |= comptime @as(u16, @intCast(res.MF.END)); + // This constant doesn't seem to have a named #define, it's different than MF_POPUP + if (node_type == .popup_ex) flags |= 0x01; + try writer.writeIntLittle(u16, flags); + + var text = try self.parseQuotedStringAsWideString(menu_item.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + + // Only the combination of the flags u16 and the text bytes can cause + // non-DWORD alignment, so we can just use the byte length of those + // two values to realign to DWORD alignment. + const relevant_bytes = 2 + (text.len + 1) * 2; + try writeDataPadding(writer, @intCast(relevant_bytes)); + + if (node_type == .popup_ex) { + if (menu_item.help_id) |help_id_node| { + const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages); + try writer.writeIntLittle(u32, help_id.value); + } else { + try writer.writeIntLittle(u32, 0); + } + + for (menu_item.items, 0..) |item, i| { + const is_last = i == menu_item.items.len - 1; + try self.writeMenuItem(item, writer, is_last); + } + } + }, + else => unreachable, + } + } + + pub fn writeVersionInfo(self: *Compiler, node: *Node.VersionInfo, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The node's length field (which is inclusive of the length of all of its children) is a u16 + // so limit the node's data size so that we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u16)); + const data_writer = limited_writer.writer(); + + try data_writer.writeIntLittle(u16, 0); // placeholder size + try data_writer.writeIntLittle(u16, res.FixedFileInfo.byte_len); + try data_writer.writeIntLittle(u16, res.VersionNode.type_binary); + const key_bytes = std.mem.sliceAsBytes(res.FixedFileInfo.key[0 .. res.FixedFileInfo.key.len + 1]); + try data_writer.writeAll(key_bytes); + // The number of bytes written up to this point is always the same, since the name + // of the node is a constant (FixedFileInfo.key). The total number of bytes + // written so far is 38, so we need 2 padding bytes to get back to DWORD alignment + try data_writer.writeIntLittle(u16, 0); + + var fixed_file_info = res.FixedFileInfo{}; + for (node.fixed_info) |fixed_info| { + switch (fixed_info.id) { + .version_statement => { + const version_statement = @fieldParentPtr(Node.VersionStatement, "base", fixed_info); + const version_type = rc.VersionInfo.map.get(version_statement.type.slice(self.source)).?; + + // Ensure that all parts are cleared for each version, to properly account for + // potential duplicate PRODUCTVERSION/FILEVERSION statements + switch (version_type) { + .file_version => @memset(&fixed_file_info.file_version.parts, 0), + .product_version => @memset(&fixed_file_info.product_version.parts, 0), + else => unreachable, + } + + for (version_statement.parts, 0..) |part, i| { + const part_value = evaluateNumberExpression(part, self.source, self.input_code_pages); + if (part_value.is_long) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = part.getFirstToken(), + .token_span_end = part.getLastToken(), + .extra = .{ .statement_with_u16_param = switch (version_type) { + .file_version => .fileversion, + .product_version => .productversion, + else => unreachable, + } }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = part.getFirstToken(), + .token_span_end = part.getLastToken(), + .extra = .{ .statement_with_u16_param = switch (version_type) { + .file_version => .fileversion, + .product_version => .productversion, + else => unreachable, + } }, + }); + } + switch (version_type) { + .file_version => { + fixed_file_info.file_version.parts[i] = part_value.asWord(); + }, + .product_version => { + fixed_file_info.product_version.parts[i] = part_value.asWord(); + }, + else => unreachable, + } + } + }, + .simple_statement => { + const statement = @fieldParentPtr(Node.SimpleStatement, "base", fixed_info); + const statement_type = rc.VersionInfo.map.get(statement.identifier.slice(self.source)).?; + const value = evaluateNumberExpression(statement.value, self.source, self.input_code_pages); + switch (statement_type) { + .file_flags_mask => fixed_file_info.file_flags_mask = value.value, + .file_flags => fixed_file_info.file_flags = value.value, + .file_os => fixed_file_info.file_os = value.value, + .file_type => fixed_file_info.file_type = value.value, + .file_subtype => fixed_file_info.file_subtype = value.value, + else => unreachable, + } + }, + else => unreachable, + } + } + try fixed_file_info.write(data_writer); + + for (node.block_statements) |statement| { + self.writeVersionNode(statement, data_writer, &data_buffer) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .version_node_size_exceeds_max, + .token = node.id, + }); + return self.addErrorDetailsAndFail(.{ + .err = .version_node_size_exceeds_max, + .type = .note, + .token = statement.getFirstToken(), + .token_span_end = statement.getLastToken(), + }); + }, + else => |e| return e, + }; + } + + // We know that data_buffer.items.len is within the limits of a u16, since we + // limited the writer to maxInt(u16) + const data_size: u16 = @intCast(data_buffer.items.len); + // And now that we know the full size of this node (including its children), set its size + std.mem.writeIntLittle(u16, data_buffer.items[0..2], data_size); + + var header = try self.resourceHeader(node.id, node.versioninfo, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects writer to be a LimitedWriter limited to u16, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft, and that buf.items.len + /// will never be able to exceed maxInt(u16). + pub fn writeVersionNode(self: *Compiler, node: *Node, writer: anytype, buf: *std.ArrayList(u8)) !void { + // We can assume that buf.items.len will never be able to exceed the limits of a u16 + try writeDataPadding(writer, @as(u16, @intCast(buf.items.len))); + + const node_and_children_size_offset = buf.items.len; + try writer.writeIntLittle(u16, 0); // placeholder for size + const data_size_offset = buf.items.len; + try writer.writeIntLittle(u16, 0); // placeholder for data size + const data_type_offset = buf.items.len; + // Data type is string unless the node contains values that are numbers. + try writer.writeIntLittle(u16, res.VersionNode.type_string); + + switch (node.id) { + inline .block, .block_value => |node_type| { + const block_or_value = @fieldParentPtr(node_type.Type(), "base", node); + const parsed_key = try self.parseQuotedStringAsWideString(block_or_value.key); + defer self.allocator.free(parsed_key); + + const parsed_key_to_first_null = std.mem.sliceTo(parsed_key, 0); + try writer.writeAll(std.mem.sliceAsBytes(parsed_key_to_first_null[0 .. parsed_key_to_first_null.len + 1])); + + var has_number_value: bool = false; + for (block_or_value.values) |value_value_node_uncasted| { + const value_value_node = value_value_node_uncasted.cast(.block_value_value).?; + if (value_value_node.expression.isNumberExpression()) { + has_number_value = true; + break; + } + } + // The units used here are dependent on the type. If there are any numbers, then + // this is a byte count. If there are only strings, then this is a count of + // UTF-16 code units. + // + // The Win32 RC compiler miscompiles this count in the case of values that + // have a mix of numbers and strings. This is detected and a warning is emitted + // during parsing, so we can just do the correct thing here. + var values_size: usize = 0; + + try writeDataPadding(writer, @intCast(buf.items.len)); + + for (block_or_value.values, 0..) |value_value_node_uncasted, i| { + const value_value_node = value_value_node_uncasted.cast(.block_value_value).?; + const value_node = value_value_node.expression; + if (value_node.isNumberExpression()) { + const number = evaluateNumberExpression(value_node, self.source, self.input_code_pages); + // This is used to write u16 or u32 depending on the number's suffix + const data_wrapper = Data{ .number = number }; + try data_wrapper.write(writer); + // Numbers use byte count + values_size += if (number.is_long) 4 else 2; + } else { + std.debug.assert(value_node.isStringLiteral()); + const literal_node = value_node.cast(.literal).?; + const parsed_value = try self.parseQuotedStringAsWideString(literal_node.token); + defer self.allocator.free(parsed_value); + + const parsed_to_first_null = std.mem.sliceTo(parsed_value, 0); + try writer.writeAll(std.mem.sliceAsBytes(parsed_to_first_null)); + // Strings use UTF-16 code-unit count including the null-terminator, but + // only if there are no number values in the list. + var value_size = parsed_to_first_null.len; + if (has_number_value) value_size *= 2; // 2 bytes per UTF-16 code unit + values_size += value_size; + // The null-terminator is only included if there's a trailing comma + // or this is the last value. If the value evaluates to empty, then + // it never gets a null terminator. If there was an explicit null-terminator + // in the string, we still need to potentially add one since we already + // sliced to the terminator. + const is_last = i == block_or_value.values.len - 1; + const is_empty = parsed_to_first_null.len == 0; + const is_only = block_or_value.values.len == 1; + if ((!is_empty or !is_only) and (is_last or value_value_node.trailing_comma)) { + try writer.writeIntLittle(u16, 0); + values_size += if (has_number_value) 2 else 1; + } + } + } + var data_size_slice = buf.items[data_size_offset..]; + std.mem.writeIntLittle(u16, data_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(values_size))); + + if (has_number_value) { + const data_type_slice = buf.items[data_type_offset..]; + std.mem.writeIntLittle(u16, data_type_slice[0..@sizeOf(u16)], res.VersionNode.type_binary); + } + + if (node_type == .block) { + const block = block_or_value; + for (block.children) |child| { + try self.writeVersionNode(child, writer, buf); + } + } + }, + else => unreachable, + } + + const node_and_children_size = buf.items.len - node_and_children_size_offset; + const node_and_children_size_slice = buf.items[node_and_children_size_offset..]; + std.mem.writeIntLittle(u16, node_and_children_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(node_and_children_size))); + } + + pub fn writeStringTable(self: *Compiler, node: *Node.StringTable) !void { + const language = getLanguageFromOptionalStatements(node.optional_statements, self.source, self.input_code_pages) orelse self.state.language; + + for (node.strings) |string_node| { + const string = @fieldParentPtr(Node.StringTableString, "base", string_node); + const string_id_data = try self.evaluateDataExpression(string.id); + const string_id = string_id_data.number.asWord(); + + self.state.string_tables.set( + self.arena, + language, + string_id, + string.string, + &node.base, + self.source, + self.input_code_pages, + self.state.version, + self.state.characteristics, + ) catch |err| switch (err) { + error.StringAlreadyDefined => { + // It might be nice to have these errors point to the ids rather than the + // string tokens, but that would mean storing the id token of each string + // which doesn't seem worth it just for slightly better error messages. + try self.addErrorDetails(ErrorDetails{ + .err = .string_already_defined, + .token = string.string, + .extra = .{ .string_and_language = .{ .id = string_id, .language = language } }, + }); + const existing_def_table = self.state.string_tables.tables.getPtr(language).?; + const existing_definition = existing_def_table.get(string_id).?; + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .string_already_defined, + .type = .note, + .token = existing_definition, + .extra = .{ .string_and_language = .{ .id = string_id, .language = language } }, + }); + }, + error.OutOfMemory => |e| return e, + }; + } + } + + /// Expects this to be a top-level LANGUAGE statement + pub fn writeLanguageStatement(self: *Compiler, node: *Node.LanguageStatement) void { + const primary = Compiler.evaluateNumberExpression(node.primary_language_id, self.source, self.input_code_pages); + const sublanguage = Compiler.evaluateNumberExpression(node.sublanguage_id, self.source, self.input_code_pages); + self.state.language.primary_language_id = @truncate(primary.value); + self.state.language.sublanguage_id = @truncate(sublanguage.value); + } + + /// Expects this to be a top-level VERSION or CHARACTERISTICS statement + pub fn writeTopLevelSimpleStatement(self: *Compiler, node: *Node.SimpleStatement) void { + const value = Compiler.evaluateNumberExpression(node.value, self.source, self.input_code_pages); + const statement_type = rc.TopLevelKeywords.map.get(node.identifier.slice(self.source)).?; + switch (statement_type) { + .characteristics => self.state.characteristics = value.value, + .version => self.state.version = value.value, + else => unreachable, + } + } + + pub const ResourceHeaderOptions = struct { + language: ?res.Language = null, + data_size: DWORD = 0, + }; + + pub fn resourceHeader(self: *Compiler, id_token: Token, type_token: Token, options: ResourceHeaderOptions) !ResourceHeader { + const id_bytes = self.sourceBytesForToken(id_token); + const type_bytes = self.sourceBytesForToken(type_token); + return ResourceHeader.init( + self.allocator, + id_bytes, + type_bytes, + options.data_size, + options.language orelse self.state.language, + self.state.version, + self.state.characteristics, + ) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.TypeNonAsciiOrdinal => { + const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes).?; + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = type_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = type_token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + }, + error.IdNonAsciiOrdinal => { + const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes).?; + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = id_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = id_token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + }, + }; + } + + pub const ResourceHeader = struct { + name_value: NameOrOrdinal, + type_value: NameOrOrdinal, + language: res.Language, + memory_flags: MemoryFlags, + data_size: DWORD, + version: DWORD, + characteristics: DWORD, + data_version: DWORD = 0, + + pub const InitError = error{ OutOfMemory, IdNonAsciiOrdinal, TypeNonAsciiOrdinal }; + + pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) InitError!ResourceHeader { + const type_value = type: { + const resource_type = Resource.fromString(type_bytes); + if (res.RT.fromResource(resource_type)) |rt_constant| { + break :type NameOrOrdinal{ .ordinal = @intFromEnum(rt_constant) }; + } else { + break :type try NameOrOrdinal.fromString(allocator, type_bytes); + } + }; + errdefer type_value.deinit(allocator); + if (type_value == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes)) |_| { + return error.TypeNonAsciiOrdinal; + } + } + + const name_value = try NameOrOrdinal.fromString(allocator, id_bytes); + errdefer name_value.deinit(allocator); + if (name_value == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes)) |_| { + return error.IdNonAsciiOrdinal; + } + } + + const predefined_resource_type = type_value.predefinedResourceType(); + + return ResourceHeader{ + .name_value = name_value, + .type_value = type_value, + .data_size = data_size, + .memory_flags = MemoryFlags.defaults(predefined_resource_type), + .language = language, + .version = version, + .characteristics = characteristics, + }; + } + + pub fn deinit(self: ResourceHeader, allocator: Allocator) void { + self.name_value.deinit(allocator); + self.type_value.deinit(allocator); + } + + pub const SizeInfo = struct { + bytes: u32, + padding_after_name: u2, + }; + + fn calcSize(self: ResourceHeader) error{Overflow}!SizeInfo { + var header_size: u32 = 8; + header_size = try std.math.add( + u32, + header_size, + std.math.cast(u32, self.name_value.byteLen()) orelse return error.Overflow, + ); + header_size = try std.math.add( + u32, + header_size, + std.math.cast(u32, self.type_value.byteLen()) orelse return error.Overflow, + ); + const padding_after_name = numPaddingBytesNeeded(header_size); + header_size = try std.math.add(u32, header_size, padding_after_name); + header_size = try std.math.add(u32, header_size, 16); + return .{ .bytes = header_size, .padding_after_name = padding_after_name }; + } + + pub fn writeAssertNoOverflow(self: ResourceHeader, writer: anytype) !void { + return self.writeSizeInfo(writer, self.calcSize() catch unreachable); + } + + pub fn write(self: ResourceHeader, writer: anytype, err_ctx: errors.DiagnosticsContext) !void { + const size_info = self.calcSize() catch { + try err_ctx.diagnostics.append(.{ + .err = .resource_data_size_exceeds_max, + .token = err_ctx.token, + }); + return error.CompileError; + }; + return self.writeSizeInfo(writer, size_info); + } + + fn writeSizeInfo(self: ResourceHeader, writer: anytype, size_info: SizeInfo) !void { + try writer.writeIntLittle(DWORD, self.data_size); // DataSize + try writer.writeIntLittle(DWORD, size_info.bytes); // HeaderSize + try self.type_value.write(writer); // TYPE + try self.name_value.write(writer); // NAME + try writer.writeByteNTimes(0, size_info.padding_after_name); + + try writer.writeIntLittle(DWORD, self.data_version); // DataVersion + try writer.writeIntLittle(WORD, self.memory_flags.value); // MemoryFlags + try writer.writeIntLittle(WORD, self.language.asInt()); // LanguageId + try writer.writeIntLittle(DWORD, self.version); // Version + try writer.writeIntLittle(DWORD, self.characteristics); // Characteristics + } + + pub fn predefinedResourceType(self: ResourceHeader) ?res.RT { + return self.type_value.predefinedResourceType(); + } + + pub fn applyMemoryFlags(self: *ResourceHeader, tokens: []Token, source: []const u8) void { + applyToMemoryFlags(&self.memory_flags, tokens, source); + } + + pub fn applyOptionalStatements(self: *ResourceHeader, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void { + applyToOptionalStatements(&self.language, &self.version, &self.characteristics, statements, source, code_page_lookup); + } + }; + + fn applyToMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void { + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags.set(attribute); + } + } + + /// RT_GROUP_ICON and RT_GROUP_CURSOR have their own special rules for memory flags + fn applyToGroupMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void { + // There's probably a cleaner implementation of this, but this will result in the same + // flags as the Win32 RC compiler for all 986,410 K-permutations of memory flags + // for an ICON resource. + // + // This was arrived at by iterating over the permutations and creating a + // list where each line looks something like this: + // MOVEABLE PRELOAD -> 0x1050 (MOVEABLE|PRELOAD|DISCARDABLE) + // + // and then noticing a few things: + + // 1. Any permutation that does not have PRELOAD in it just uses the + // default flags. + const initial_flags = flags.*; + var flags_set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty(); + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags_set.insert(attribute); + } + if (!flags_set.contains(.preload)) return; + + // 2. Any permutation of flags where applying only the PRELOAD and LOADONCALL flags + // results in no actual change by the end will just use the default flags. + // For example, `PRELOAD LOADONCALL` will result in default flags, but + // `LOADONCALL PRELOAD` will have PRELOAD set after they are both applied in order. + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + switch (attribute) { + .preload, .loadoncall => flags.set(attribute), + else => {}, + } + } + if (flags.value == initial_flags.value) return; + + // 3. If none of DISCARDABLE, SHARED, or PURE is specified, then PRELOAD + // implies `flags &= ~SHARED` and LOADONCALL implies `flags |= SHARED` + const shared_set = comptime blk: { + var set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty(); + set.insert(.discardable); + set.insert(.shared); + set.insert(.pure); + break :blk set; + }; + const discardable_shared_or_pure_specified = flags_set.intersectWith(shared_set).count() != 0; + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags.setGroup(attribute, !discardable_shared_or_pure_specified); + } + } + + /// Only handles the 'base' optional statements that are shared between resource types. + fn applyToOptionalStatements(language: *res.Language, version: *u32, characteristics: *u32, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void { + for (statements) |node| switch (node.id) { + .language_statement => { + const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node); + language.* = languageFromLanguageStatement(language_statement, source, code_page_lookup); + }, + .simple_statement => { + const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", node); + const statement_type = rc.OptionalStatements.map.get(simple_statement.identifier.slice(source)) orelse continue; + const result = Compiler.evaluateNumberExpression(simple_statement.value, source, code_page_lookup); + switch (statement_type) { + .version => version.* = result.value, + .characteristics => characteristics.* = result.value, + else => unreachable, // only VERSION and CHARACTERISTICS should be in an optional statements list + } + }, + else => {}, + }; + } + + pub fn languageFromLanguageStatement(language_statement: *const Node.LanguageStatement, source: []const u8, code_page_lookup: *const CodePageLookup) res.Language { + const primary = Compiler.evaluateNumberExpression(language_statement.primary_language_id, source, code_page_lookup); + const sublanguage = Compiler.evaluateNumberExpression(language_statement.sublanguage_id, source, code_page_lookup); + return .{ + .primary_language_id = @truncate(primary.value), + .sublanguage_id = @truncate(sublanguage.value), + }; + } + + pub fn getLanguageFromOptionalStatements(statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) ?res.Language { + for (statements) |node| switch (node.id) { + .language_statement => { + const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node); + return languageFromLanguageStatement(language_statement, source, code_page_lookup); + }, + else => continue, + }; + return null; + } + + pub fn writeEmptyResource(writer: anytype) !void { + const header = ResourceHeader{ + .name_value = .{ .ordinal = 0 }, + .type_value = .{ .ordinal = 0 }, + .language = .{ + .primary_language_id = 0, + .sublanguage_id = 0, + }, + .memory_flags = .{ .value = 0 }, + .data_size = 0, + .version = 0, + .characteristics = 0, + }; + try header.writeAssertNoOverflow(writer); + } + + pub fn sourceBytesForToken(self: *Compiler, token: Token) SourceBytes { + return .{ + .slice = token.slice(self.source), + .code_page = self.input_code_pages.getForToken(token), + }; + } + + /// Helper that calls parseQuotedStringAsWideString with the relevant context + /// Resulting slice is allocated by `self.allocator`. + pub fn parseQuotedStringAsWideString(self: *Compiler, token: Token) ![:0]u16 { + return literals.parseQuotedStringAsWideString( + self.allocator, + self.sourceBytesForToken(token), + .{ + .start_column = token.calculateColumn(self.source, 8, null), + .diagnostics = .{ .diagnostics = self.diagnostics, .token = token }, + }, + ); + } + + /// Helper that calls parseQuotedStringAsAsciiString with the relevant context + /// Resulting slice is allocated by `self.allocator`. + pub fn parseQuotedStringAsAsciiString(self: *Compiler, token: Token) ![]u8 { + return literals.parseQuotedStringAsAsciiString( + self.allocator, + self.sourceBytesForToken(token), + .{ + .start_column = token.calculateColumn(self.source, 8, null), + .diagnostics = .{ .diagnostics = self.diagnostics, .token = token }, + }, + ); + } + + fn addErrorDetails(self: *Compiler, details: ErrorDetails) Allocator.Error!void { + try self.diagnostics.append(details); + } + + fn addErrorDetailsAndFail(self: *Compiler, details: ErrorDetails) error{ CompileError, OutOfMemory } { + try self.addErrorDetails(details); + return error.CompileError; + } +}; + +pub const OpenSearchPathError = std.fs.Dir.OpenError; + +fn openSearchPathDir(dir: std.fs.Dir, path: []const u8) OpenSearchPathError!std.fs.Dir { + // Validate the search path to avoid possible unreachable on invalid paths, + // see https://github.com/ziglang/zig/issues/15607 for why this is currently necessary. + try validateSearchPath(path); + return dir.openDir(path, .{}); +} + +/// Very crude attempt at validating a path. This is imperfect +/// and AFAIK it is effectively impossible to implement perfect path +/// validation, since it ultimately depends on the underlying filesystem. +/// Note that this function won't be necessary if/when +/// https://github.com/ziglang/zig/issues/15607 +/// is accepted/implemented. +fn validateSearchPath(path: []const u8) error{BadPathName}!void { + switch (builtin.os.tag) { + .windows => { + // This will return error.BadPathName on non-Win32 namespaced paths + // (e.g. the NT \??\ prefix, the device \\.\ prefix, etc). + // Those path types are something of an unavoidable way to + // still hit unreachable during the openDir call. + var component_iterator = try std.fs.path.componentIterator(path); + while (component_iterator.next()) |component| { + // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file + if (std.mem.indexOfAny(u8, component.name, "\x00<>:\"|?*") != null) return error.BadPathName; + } + }, + else => { + if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName; + }, + } +} + +pub const SearchDir = struct { + dir: std.fs.Dir, + path: ?[]const u8, + + pub fn deinit(self: *SearchDir, allocator: Allocator) void { + self.dir.close(); + if (self.path) |path| { + allocator.free(path); + } + } +}; + +/// Slurps the first `size` bytes read into `slurped_header` +pub fn HeaderSlurpingReader(comptime size: usize, comptime ReaderType: anytype) type { + return struct { + child_reader: ReaderType, + bytes_read: u64 = 0, + slurped_header: [size]u8 = [_]u8{0x00} ** size, + + pub const Error = ReaderType.Error; + pub const Reader = std.io.Reader(*@This(), Error, read); + + pub fn read(self: *@This(), buf: []u8) Error!usize { + const amt = try self.child_reader.read(buf); + if (self.bytes_read < size) { + const bytes_to_add = @min(amt, size - self.bytes_read); + const end_index = self.bytes_read + bytes_to_add; + std.mem.copy(u8, self.slurped_header[self.bytes_read..end_index], buf[0..bytes_to_add]); + } + self.bytes_read += amt; + return amt; + } + + pub fn reader(self: *@This()) Reader { + return .{ .context = self }; + } + }; +} + +pub fn headerSlurpingReader(comptime size: usize, reader: anytype) HeaderSlurpingReader(size, @TypeOf(reader)) { + return .{ .child_reader = reader }; +} + +/// Sort of like std.io.LimitedReader, but a Writer. +/// Returns an error if writing the requested number of bytes +/// would ever exceed bytes_left, i.e. it does not always +/// write up to the limit and instead will error if the +/// limit would be breached if the entire slice was written. +pub fn LimitedWriter(comptime WriterType: type) type { + return struct { + inner_writer: WriterType, + bytes_left: u64, + + pub const Error = error{NoSpaceLeft} || WriterType.Error; + pub const Writer = std.io.Writer(*Self, Error, write); + + const Self = @This(); + + pub fn write(self: *Self, bytes: []const u8) Error!usize { + if (bytes.len > self.bytes_left) return error.NoSpaceLeft; + const amt = try self.inner_writer.write(bytes); + self.bytes_left -= amt; + return amt; + } + + pub fn writer(self: *Self) Writer { + return .{ .context = self }; + } + }; +} + +/// Returns an initialised `LimitedWriter` +/// `bytes_left` is a `u64` to be able to take 64 bit file offsets +pub fn limitedWriter(inner_writer: anytype, bytes_left: u64) LimitedWriter(@TypeOf(inner_writer)) { + return .{ .inner_writer = inner_writer, .bytes_left = bytes_left }; +} + +test "limitedWriter basic usage" { + var buf: [4]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buf); + var limited_stream = limitedWriter(fbs.writer(), 4); + var writer = limited_stream.writer(); + + try std.testing.expectEqual(@as(usize, 3), try writer.write("123")); + try std.testing.expectEqualSlices(u8, "123", buf[0..3]); + try std.testing.expectError(error.NoSpaceLeft, writer.write("45")); + try std.testing.expectEqual(@as(usize, 1), try writer.write("4")); + try std.testing.expectEqualSlices(u8, "1234", buf[0..4]); + try std.testing.expectError(error.NoSpaceLeft, writer.write("5")); +} + +pub const FontDir = struct { + fonts: std.ArrayListUnmanaged(Font) = .{}, + /// To keep track of which ids are set and where they were set from + ids: std.AutoHashMapUnmanaged(u16, Token) = .{}, + + pub const Font = struct { + id: u16, + header_bytes: [148]u8, + }; + + pub fn deinit(self: *FontDir, allocator: Allocator) void { + self.fonts.deinit(allocator); + } + + pub fn add(self: *FontDir, allocator: Allocator, font: Font, id_token: Token) !void { + try self.ids.putNoClobber(allocator, font.id, id_token); + try self.fonts.append(allocator, font); + } + + pub fn writeResData(self: *FontDir, compiler: *Compiler, writer: anytype) !void { + if (self.fonts.items.len == 0) return; + + // We know the number of fonts is limited to maxInt(u16) because fonts + // must have a valid and unique u16 ordinal ID (trying to specify a FONT + // with e.g. id 65537 will wrap around to 1 and be ignored if there's already + // a font with that ID in the file). + const num_fonts: u16 = @intCast(self.fonts.items.len); + + // u16 count + [(u16 id + 150 bytes) for each font] + // Note: This works out to a maximum data_size of 9,961,322. + const data_size: u32 = 2 + (2 + 150) * num_fonts; + + var header = Compiler.ResourceHeader{ + .name_value = try NameOrOrdinal.nameFromString(compiler.allocator, .{ .slice = "FONTDIR", .code_page = .windows1252 }), + .type_value = NameOrOrdinal{ .ordinal = @intFromEnum(res.RT.FONTDIR) }, + .memory_flags = res.MemoryFlags.defaults(res.RT.FONTDIR), + .language = compiler.state.language, + .version = compiler.state.version, + .characteristics = compiler.state.characteristics, + .data_size = data_size, + }; + defer header.deinit(compiler.allocator); + + try header.writeAssertNoOverflow(writer); + try writer.writeIntLittle(u16, num_fonts); + for (self.fonts.items) |font| { + // The format of the FONTDIR is a strange beast. + // Technically, each FONT is seemingly meant to be written as a + // FONTDIRENTRY with two trailing NUL-terminated strings corresponding to + // the 'device name' and 'face name' of the .FNT file, but: + // + // 1. When dealing with .FNT files, the Win32 implementation + // gets the device name and face name from the wrong locations, + // so it's basically never going to write the real device/face name + // strings. + // 2. When dealing with files 76-140 bytes long, the Win32 implementation + // can just crash (if there are no NUL bytes in the file). + // 3. The 32-bit Win32 rc.exe uses a 148 byte size for the portion of + // the FONTDIRENTRY before the NUL-terminated strings, which + // does not match the documented FONTDIRENTRY size that (presumably) + // this format is meant to be using, so anything iterating the + // FONTDIR according to the available documentation will get bogus results. + // 4. The FONT resource can be used for non-.FNT types like TTF and OTF, + // in which case emulating the Win32 behavior of unconditionally + // interpreting the bytes as a .FNT and trying to grab device/face names + // from random bytes in the TTF/OTF file can lead to weird behavior + // and errors in the Win32 implementation (for example, the device/face + // name fields are offsets into the file where the NUL-terminated + // string is located, but the Win32 implementation actually treats + // them as signed so if they are negative then the Win32 implementation + // will error; this happening for TTF fonts would just be a bug + // since the TTF could otherwise be valid) + // 5. The FONTDIR resource doesn't actually seem to be used at all by + // anything that I've found, and instead in Windows 3.0 and newer + // it seems like the FONT resources are always just iterated/accessed + // directly without ever looking at the FONTDIR. + // + // All of these combined means that we: + // - Do not need or want to emulate Win32 behavior here + // - For maximum simplicity and compatibility, we just write the first + // 148 bytes of the file without any interpretation (padded with + // zeroes to get up to 148 bytes if necessary), and then + // unconditionally write two NUL bytes, meaning that we always + // write 'device name' and 'face name' as if they were 0-length + // strings. + // + // This gives us byte-for-byte .RES compatibility in the common case while + // allowing us to avoid any erroneous errors caused by trying to read + // the face/device name from a bogus location. Note that the Win32 + // implementation never actually writes the real device/face name here + // anyway (except in the bizarre case that a .FNT file has the proper + // device/face name offsets within a reserved section of the .FNT file) + // so there's no feasible way that anything can actually think that the + // device name/face name in the FONTDIR is reliable. + + // First, the ID is written, though + try writer.writeIntLittle(u16, font.id); + try writer.writeAll(&font.header_bytes); + try writer.writeByteNTimes(0, 2); + } + try Compiler.writeDataPadding(writer, data_size); + } +}; + +pub const StringTablesByLanguage = struct { + /// String tables for each language are written to the .res file in order depending on + /// when the first STRINGTABLE for the language was defined, and all blocks for a given + /// language are written contiguously. + /// Using an ArrayHashMap here gives us this property for free. + tables: std.AutoArrayHashMapUnmanaged(res.Language, StringTable) = .{}, + + pub fn deinit(self: *StringTablesByLanguage, allocator: Allocator) void { + self.tables.deinit(allocator); + } + + pub fn set( + self: *StringTablesByLanguage, + allocator: Allocator, + language: res.Language, + id: u16, + string_token: Token, + node: *Node, + source: []const u8, + code_page_lookup: *const CodePageLookup, + version: u32, + characteristics: u32, + ) StringTable.SetError!void { + var get_or_put_result = try self.tables.getOrPut(allocator, language); + if (!get_or_put_result.found_existing) { + get_or_put_result.value_ptr.* = StringTable{}; + } + return get_or_put_result.value_ptr.set(allocator, id, string_token, node, source, code_page_lookup, version, characteristics); + } +}; + +pub const StringTable = struct { + /// Blocks are written to the .res file in order depending on when the first string + /// was added to the block (i.e. `STRINGTABLE { 16 "b" 0 "a" }` would then get written + /// with block ID 2 (the one with "b") first and block ID 1 (the one with "a") second). + /// Using an ArrayHashMap here gives us this property for free. + blocks: std.AutoArrayHashMapUnmanaged(u16, Block) = .{}, + + pub const Block = struct { + strings: std.ArrayListUnmanaged(Token) = .{}, + set_indexes: std.bit_set.IntegerBitSet(16) = .{ .mask = 0 }, + memory_flags: MemoryFlags = MemoryFlags.defaults(res.RT.STRING), + characteristics: u32, + version: u32, + + /// Returns the index to insert the string into the `strings` list. + /// Returns null if the string should be appended. + fn getInsertionIndex(self: *Block, index: u8) ?u8 { + std.debug.assert(!self.set_indexes.isSet(index)); + + const first_set = self.set_indexes.findFirstSet() orelse return null; + if (first_set > index) return 0; + + const last_set = 15 - @clz(self.set_indexes.mask); + if (index > last_set) return null; + + var bit = first_set + 1; + var insertion_index: u8 = 1; + while (bit != index) : (bit += 1) { + if (self.set_indexes.isSet(bit)) insertion_index += 1; + } + return insertion_index; + } + + fn getTokenIndex(self: *Block, string_index: u8) ?u8 { + const count = self.strings.items.len; + if (count == 0) return null; + if (count == 1) return 0; + + const first_set = self.set_indexes.findFirstSet() orelse unreachable; + if (first_set == string_index) return 0; + const last_set = 15 - @clz(self.set_indexes.mask); + if (last_set == string_index) return @intCast(count - 1); + + if (first_set == last_set) return null; + + var bit = first_set + 1; + var token_index: u8 = 1; + while (bit < last_set) : (bit += 1) { + if (!self.set_indexes.isSet(bit)) continue; + if (bit == string_index) return token_index; + token_index += 1; + } + return null; + } + + fn dump(self: *Block) void { + var bit_it = self.set_indexes.iterator(.{}); + var string_index: usize = 0; + while (bit_it.next()) |bit_index| { + const token = self.strings.items[string_index]; + std.debug.print("{}: [{}] {any}\n", .{ bit_index, string_index, token }); + string_index += 1; + } + } + + pub fn applyAttributes(self: *Block, string_table: *Node.StringTable, source: []const u8, code_page_lookup: *const CodePageLookup) void { + Compiler.applyToMemoryFlags(&self.memory_flags, string_table.common_resource_attributes, source); + var dummy_language: res.Language = undefined; + Compiler.applyToOptionalStatements(&dummy_language, &self.version, &self.characteristics, string_table.optional_statements, source, code_page_lookup); + } + + fn trimToDoubleNUL(comptime T: type, str: []const T) []const T { + var last_was_null = false; + for (str, 0..) |c, i| { + if (c == 0) { + if (last_was_null) return str[0 .. i - 1]; + last_was_null = true; + } else { + last_was_null = false; + } + } + return str; + } + + test "trimToDoubleNUL" { + try std.testing.expectEqualStrings("a\x00b", trimToDoubleNUL(u8, "a\x00b")); + try std.testing.expectEqualStrings("a", trimToDoubleNUL(u8, "a\x00\x00b")); + } + + pub fn writeResData(self: *Block, compiler: *Compiler, language: res.Language, block_id: u16, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(compiler.allocator); + defer data_buffer.deinit(); + const data_writer = data_buffer.writer(); + + var i: u8 = 0; + var string_i: u8 = 0; + while (true) : (i += 1) { + if (!self.set_indexes.isSet(i)) { + try data_writer.writeIntLittle(u16, 0); + if (i == 15) break else continue; + } + + const string_token = self.strings.items[string_i]; + const slice = string_token.slice(compiler.source); + const column = string_token.calculateColumn(compiler.source, 8, null); + const code_page = compiler.input_code_pages.getForToken(string_token); + const bytes = SourceBytes{ .slice = slice, .code_page = code_page }; + const utf16_string = try literals.parseQuotedStringAsWideString(compiler.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = compiler.diagnostics, .token = string_token }, + }); + defer compiler.allocator.free(utf16_string); + + const trimmed_string = trim: { + // Two NUL characters in a row act as a terminator + // Note: This is only the case for STRINGTABLE strings + var trimmed = trimToDoubleNUL(u16, utf16_string); + // We also want to trim any trailing NUL characters + break :trim std.mem.trimRight(u16, trimmed, &[_]u16{0}); + }; + + // String literals are limited to maxInt(u15) codepoints, so these UTF-16 encoded + // strings are limited to maxInt(u15) * 2 = 65,534 code units (since 2 is the + // maximum number of UTF-16 code units per codepoint). + // This leaves room for exactly one NUL terminator. + var string_len_in_utf16_code_units: u16 = @intCast(trimmed_string.len); + // If the option is set, then a NUL terminator is added unconditionally. + // We already trimmed any trailing NULs, so we know it will be a new addition to the string. + if (compiler.null_terminate_string_table_strings) string_len_in_utf16_code_units += 1; + try data_writer.writeIntLittle(u16, string_len_in_utf16_code_units); + for (trimmed_string) |wc| { + try data_writer.writeIntLittle(u16, wc); + } + if (compiler.null_terminate_string_table_strings) { + try data_writer.writeIntLittle(u16, 0); + } + + if (i == 15) break; + string_i += 1; + } + + // This intCast will never be able to fail due to the length constraints on string literals. + // + // - STRINGTABLE resource definitions can can only provide one string literal per index. + // - STRINGTABLE strings are limited to maxInt(u16) UTF-16 code units (see 'string_len_in_utf16_code_units' + // above), which means that the maximum number of bytes per string literal is + // 2 * maxInt(u16) = 131,070 (since there are 2 bytes per UTF-16 code unit). + // - Each Block/RT_STRING resource includes exactly 16 strings and each have a 2 byte + // length field, so the maximum number of total bytes in a RT_STRING resource's data is + // 16 * (131,070 + 2) = 2,097,152 which is well within the u32 max. + // + // Note: The string literal maximum length is enforced by the lexer. + const data_size: u32 = @intCast(data_buffer.items.len); + + const header = Compiler.ResourceHeader{ + .name_value = .{ .ordinal = block_id }, + .type_value = .{ .ordinal = @intFromEnum(res.RT.STRING) }, + .memory_flags = self.memory_flags, + .language = language, + .version = self.version, + .characteristics = self.characteristics, + .data_size = data_size, + }; + // The only variable parts of the header are name and type, which in this case + // we fully control and know are numbers, so they have a fixed size. + try header.writeAssertNoOverflow(writer); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try Compiler.writeResourceData(writer, data_fbs.reader(), data_size); + } + }; + + pub fn deinit(self: *StringTable, allocator: Allocator) void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + entry.value_ptr.strings.deinit(allocator); + } + self.blocks.deinit(allocator); + } + + const SetError = error{StringAlreadyDefined} || Allocator.Error; + + pub fn set( + self: *StringTable, + allocator: Allocator, + id: u16, + string_token: Token, + node: *Node, + source: []const u8, + code_page_lookup: *const CodePageLookup, + version: u32, + characteristics: u32, + ) SetError!void { + const block_id = (id / 16) + 1; + const string_index: u8 = @intCast(id & 0xF); + + var get_or_put_result = try self.blocks.getOrPut(allocator, block_id); + if (!get_or_put_result.found_existing) { + get_or_put_result.value_ptr.* = Block{ .version = version, .characteristics = characteristics }; + get_or_put_result.value_ptr.applyAttributes(node.cast(.string_table).?, source, code_page_lookup); + } else { + if (get_or_put_result.value_ptr.set_indexes.isSet(string_index)) { + return error.StringAlreadyDefined; + } + } + + var block = get_or_put_result.value_ptr; + if (block.getInsertionIndex(string_index)) |insertion_index| { + try block.strings.insert(allocator, insertion_index, string_token); + } else { + try block.strings.append(allocator, string_token); + } + block.set_indexes.set(string_index); + } + + pub fn get(self: *StringTable, id: u16) ?Token { + const block_id = (id / 16) + 1; + const string_index: u8 = @intCast(id & 0xF); + + const block = self.blocks.getPtr(block_id) orelse return null; + const token_index = block.getTokenIndex(string_index) orelse return null; + return block.strings.items[token_index]; + } + + pub fn dump(self: *StringTable) !void { + var it = self.iterator(); + while (it.next()) |entry| { + std.debug.print("block: {}\n", .{entry.key_ptr.*}); + entry.value_ptr.dump(); + } + } +}; + +test "StringTable" { + const S = struct { + fn makeDummyToken(id: usize) Token { + return Token{ + .id = .invalid, + .start = id, + .end = id, + .line_number = id, + }; + } + }; + const allocator = std.testing.allocator; + var string_table = StringTable{}; + defer string_table.deinit(allocator); + + var code_page_lookup = CodePageLookup.init(allocator, .windows1252); + defer code_page_lookup.deinit(); + + var dummy_node = Node.StringTable{ + .type = S.makeDummyToken(0), + .common_resource_attributes = &.{}, + .optional_statements = &.{}, + .begin_token = S.makeDummyToken(0), + .strings = &.{}, + .end_token = S.makeDummyToken(0), + }; + + // randomize an array of ids 0-99 + var ids = ids: { + var buf: [100]u16 = undefined; + var i: u16 = 0; + while (i < buf.len) : (i += 1) { + buf[i] = i; + } + break :ids buf; + }; + var prng = std.rand.DefaultPrng.init(0); + var random = prng.random(); + random.shuffle(u16, &ids); + + // set each one in the randomized order + for (ids) |id| { + try string_table.set(allocator, id, S.makeDummyToken(id), &dummy_node.base, "", &code_page_lookup, 0, 0); + } + + // make sure each one exists and is the right value when gotten + var id: u16 = 0; + while (id < 100) : (id += 1) { + const dummy = S.makeDummyToken(id); + try std.testing.expectError(error.StringAlreadyDefined, string_table.set(allocator, id, dummy, &dummy_node.base, "", &code_page_lookup, 0, 0)); + try std.testing.expectEqual(dummy, string_table.get(id).?); + } + + // make sure non-existent string ids are not found + try std.testing.expectEqual(@as(?Token, null), string_table.get(100)); +} diff --git a/src/resinator/errors.zig b/src/resinator/errors.zig new file mode 100644 index 000000000000..33cb19682b32 --- /dev/null +++ b/src/resinator/errors.zig @@ -0,0 +1,1033 @@ +const std = @import("std"); +const Token = @import("lex.zig").Token; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const utils = @import("utils.zig"); +const rc = @import("rc.zig"); +const res = @import("res.zig"); +const ico = @import("ico.zig"); +const bmp = @import("bmp.zig"); +const parse = @import("parse.zig"); +const CodePage = @import("code_pages.zig").CodePage; + +pub const Diagnostics = struct { + errors: std.ArrayListUnmanaged(ErrorDetails) = .{}, + /// Append-only, cannot handle removing strings. + /// Expects to own all strings within the list. + strings: std.ArrayListUnmanaged([]const u8) = .{}, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) Diagnostics { + return .{ + .allocator = allocator, + }; + } + + pub fn deinit(self: *Diagnostics) void { + self.errors.deinit(self.allocator); + for (self.strings.items) |str| { + self.allocator.free(str); + } + self.strings.deinit(self.allocator); + } + + pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void { + try self.errors.append(self.allocator, error_details); + } + + const SmallestStringIndexType = std.meta.Int(.unsigned, @min( + @bitSizeOf(ErrorDetails.FileOpenError.FilenameStringIndex), + @min( + @bitSizeOf(ErrorDetails.IconReadError.FilenameStringIndex), + @bitSizeOf(ErrorDetails.BitmapReadError.FilenameStringIndex), + ), + )); + + /// Returns the index of the added string as the SmallestStringIndexType + /// in order to avoid needing to `@intCast` it at callsites of putString. + /// Instead, this function will error if the index would ever exceed the + /// smallest FilenameStringIndex of an ErrorDetails type. + pub fn putString(self: *Diagnostics, str: []const u8) !SmallestStringIndexType { + if (self.strings.items.len >= std.math.maxInt(SmallestStringIndexType)) { + return error.OutOfMemory; // ran out of string indexes + } + const dupe = try self.allocator.dupe(u8, str); + const index = self.strings.items.len; + try self.strings.append(self.allocator, dupe); + return @intCast(index); + } + + pub fn renderToStdErr(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, tty_config: std.io.tty.Config, source_mappings: ?SourceMappings) void { + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + const stderr = std.io.getStdErr().writer(); + for (self.errors.items) |err_details| { + renderErrorMessage(self.allocator, stderr, tty_config, cwd, err_details, source, self.strings.items, source_mappings) catch return; + } + } + + pub fn renderToStdErrDetectTTY(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, source_mappings: ?SourceMappings) void { + const tty_config = std.io.tty.detectConfig(std.io.getStdErr()); + return self.renderToStdErr(cwd, source, tty_config, source_mappings); + } + + pub fn contains(self: *const Diagnostics, err: ErrorDetails.Error) bool { + for (self.errors.items) |details| { + if (details.err == err) return true; + } + return false; + } + + pub fn containsAny(self: *const Diagnostics, errors: []const ErrorDetails.Error) bool { + for (self.errors.items) |details| { + for (errors) |err| { + if (details.err == err) return true; + } + } + return false; + } +}; + +/// Contains enough context to append errors/warnings/notes etc +pub const DiagnosticsContext = struct { + diagnostics: *Diagnostics, + token: Token, +}; + +pub const ErrorDetails = struct { + err: Error, + token: Token, + /// If non-null, should be before `token`. If null, `token` is assumed to be the start. + token_span_start: ?Token = null, + /// If non-null, should be after `token`. If null, `token` is assumed to be the end. + token_span_end: ?Token = null, + type: Type = .err, + print_source_line: bool = true, + extra: union { + none: void, + expected: Token.Id, + number: u32, + expected_types: ExpectedTypes, + resource: rc.Resource, + string_and_language: StringAndLanguage, + file_open_error: FileOpenError, + icon_read_error: IconReadError, + icon_dir: IconDirContext, + bmp_read_error: BitmapReadError, + accelerator_error: AcceleratorError, + statement_with_u16_param: StatementWithU16Param, + menu_or_class: enum { class, menu }, + } = .{ .none = {} }, + + pub const Type = enum { + /// Fatal error, stops compilation + err, + /// Warning that does not affect compilation result + warning, + /// A note that typically provides further context for a warning/error + note, + /// An invisible diagnostic that is not printed to stderr but can + /// provide information useful when comparing the behavior of different + /// implementations. For example, a hint is emitted when a FONTDIR resource + /// was included in the .RES file which is significant because rc.exe + /// does something different than us, but ultimately it's not important + /// enough to be a warning/note. + hint, + }; + + comptime { + // all fields in the extra union should be 32 bits or less + for (std.meta.fields(std.meta.fieldInfo(ErrorDetails, .extra).type)) |field| { + std.debug.assert(@bitSizeOf(field.type) <= 32); + } + } + + pub const StatementWithU16Param = enum(u32) { + fileversion, + productversion, + language, + }; + + pub const StringAndLanguage = packed struct(u32) { + id: u16, + language: res.Language, + }; + + pub const FileOpenError = packed struct(u32) { + err: FileOpenErrorEnum, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(FileOpenErrorEnum)); + pub const FileOpenErrorEnum = std.meta.FieldEnum(std.fs.File.OpenError); + + pub fn enumFromError(err: std.fs.File.OpenError) FileOpenErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.FileOpenError.FileOpenErrorEnum, @errorName(e)), + }; + } + }; + + pub const IconReadError = packed struct(u32) { + err: IconReadErrorEnum, + icon_type: enum(u1) { cursor, icon }, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(IconReadErrorEnum) - 1); + pub const IconReadErrorEnum = std.meta.FieldEnum(ico.ReadError); + + pub fn enumFromError(err: ico.ReadError) IconReadErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.IconReadError.IconReadErrorEnum, @errorName(e)), + }; + } + }; + + pub const IconDirContext = packed struct(u32) { + icon_type: enum(u1) { cursor, icon }, + icon_format: ico.ImageFormat, + index: u16, + bitmap_version: ico.BitmapHeader.Version = .unknown, + _: Padding = 0, + + pub const Padding = std.meta.Int(.unsigned, 15 - @bitSizeOf(ico.BitmapHeader.Version) - @bitSizeOf(ico.ImageFormat)); + }; + + pub const BitmapReadError = packed struct(u32) { + err: BitmapReadErrorEnum, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(BitmapReadErrorEnum)); + pub const BitmapReadErrorEnum = std.meta.FieldEnum(bmp.ReadError); + + pub fn enumFromError(err: bmp.ReadError) BitmapReadErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.BitmapReadError.BitmapReadErrorEnum, @errorName(e)), + }; + } + }; + + pub const BitmapUnsupportedDIB = packed struct(u32) { + dib_version: ico.BitmapHeader.Version, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(ico.BitmapHeader.Version)); + }; + + pub const AcceleratorError = packed struct(u32) { + err: AcceleratorErrorEnum, + _: Padding = 0, + + pub const Padding = std.meta.Int(.unsigned, 32 - @bitSizeOf(AcceleratorErrorEnum)); + pub const AcceleratorErrorEnum = std.meta.FieldEnum(res.ParseAcceleratorKeyStringError); + + pub fn enumFromError(err: res.ParseAcceleratorKeyStringError) AcceleratorErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.AcceleratorError.AcceleratorErrorEnum, @errorName(e)), + }; + } + }; + + pub const ExpectedTypes = packed struct(u32) { + number: bool = false, + number_expression: bool = false, + string_literal: bool = false, + accelerator_type_or_option: bool = false, + control_class: bool = false, + literal: bool = false, + // Note: This being 0 instead of undefined is arbitrary and something of a workaround, + // see https://github.com/ziglang/zig/issues/15395 + _: u26 = 0, + + pub const strings = std.ComptimeStringMap([]const u8, .{ + .{ "number", "number" }, + .{ "number_expression", "number expression" }, + .{ "string_literal", "quoted string literal" }, + .{ "accelerator_type_or_option", "accelerator type or option [ASCII, VIRTKEY, etc]" }, + .{ "control_class", "control class [BUTTON, EDIT, etc]" }, + .{ "literal", "unquoted literal" }, + }); + + pub fn writeCommaSeparated(self: ExpectedTypes, writer: anytype) !void { + const struct_info = @typeInfo(ExpectedTypes).Struct; + const num_real_fields = struct_info.fields.len - 1; + const num_padding_bits = @bitSizeOf(ExpectedTypes) - num_real_fields; + const mask = std.math.maxInt(struct_info.backing_integer.?) >> num_padding_bits; + const relevant_bits_only = @as(struct_info.backing_integer.?, @bitCast(self)) & mask; + const num_set_bits = @popCount(relevant_bits_only); + + var i: usize = 0; + inline for (struct_info.fields) |field_info| { + if (field_info.type != bool) continue; + if (i == num_set_bits) return; + if (@field(self, field_info.name)) { + try writer.writeAll(strings.get(field_info.name).?); + i += 1; + if (num_set_bits > 2 and i != num_set_bits) { + try writer.writeAll(", "); + } else if (i != num_set_bits) { + try writer.writeByte(' '); + } + if (num_set_bits > 1 and i == num_set_bits - 1) { + try writer.writeAll("or "); + } + } + } + } + }; + + pub const Error = enum { + // Lexer + unfinished_string_literal, + string_literal_too_long, + invalid_number_with_exponent, + invalid_digit_character_in_number_literal, + illegal_byte, + illegal_byte_outside_string_literals, + illegal_codepoint_outside_string_literals, + illegal_byte_order_mark, + illegal_private_use_character, + found_c_style_escaped_quote, + code_page_pragma_missing_left_paren, + code_page_pragma_missing_right_paren, + code_page_pragma_invalid_code_page, + code_page_pragma_not_integer, + code_page_pragma_overflow, + code_page_pragma_unsupported_code_page, + + // Parser + unfinished_raw_data_block, + unfinished_string_table_block, + /// `expected` is populated. + expected_token, + /// `expected_types` is populated + expected_something_else, + /// `resource` is populated + resource_type_cant_use_raw_data, + /// `resource` is populated + id_must_be_ordinal, + /// `resource` is populated + name_or_id_not_allowed, + string_resource_as_numeric_type, + ascii_character_not_equivalent_to_virtual_key_code, + empty_menu_not_allowed, + rc_would_miscompile_version_value_padding, + rc_would_miscompile_version_value_byte_count, + code_page_pragma_in_included_file, + nested_resource_level_exceeds_max, + too_many_dialog_controls, + nested_expression_level_exceeds_max, + close_paren_expression, + unary_plus_expression, + rc_could_miscompile_control_params, + + // Compiler + /// `string_and_language` is populated + string_already_defined, + font_id_already_defined, + /// `file_open_error` is populated + file_open_error, + /// `accelerator_error` is populated + invalid_accelerator_key, + accelerator_type_required, + rc_would_miscompile_control_padding, + rc_would_miscompile_control_class_ordinal, + /// `icon_dir` is populated + rc_would_error_on_icon_dir, + /// `icon_dir` is populated + format_not_supported_in_icon_dir, + /// `resource` is populated and contains the expected type + icon_dir_and_resource_type_mismatch, + /// `icon_read_error` is populated + icon_read_error, + /// `icon_dir` is populated + rc_would_error_on_bitmap_version, + /// `icon_dir` is populated + max_icon_ids_exhausted, + /// `bmp_read_error` is populated + bmp_read_error, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of ignored bytes. + bmp_ignored_palette_bytes, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of missing bytes. + bmp_missing_palette_bytes, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of miscompiled bytes. + rc_would_miscompile_bmp_palette_padding, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of two `u64`s (native endian). The first contains the number of missing + /// palette bytes and the second contains the max number of missing palette bytes. + /// If type is `.note`, then `extra` is `none`. + bmp_too_many_missing_palette_bytes, + resource_header_size_exceeds_max, + resource_data_size_exceeds_max, + control_extra_data_size_exceeds_max, + version_node_size_exceeds_max, + fontdir_size_exceeds_max, + /// `number` is populated and contains a string index for the filename + number_expression_as_filename, + /// `number` is populated and contains the control ID that is a duplicate + control_id_already_defined, + /// `number` is populated and contains the disallowed codepoint + invalid_filename, + /// `statement_with_u16_param` is populated + rc_would_error_u16_with_l_suffix, + result_contains_fontdir, + /// `number` is populated and contains the ordinal value that the id would be miscompiled to + rc_would_miscompile_dialog_menu_id, + /// `number` is populated and contains the ordinal value that the value would be miscompiled to + rc_would_miscompile_dialog_class, + /// `menu_or_class` is populated and contains the type of the parameter statement + rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + rc_would_miscompile_dialog_menu_id_starts_with_digit, + dialog_menu_id_was_uppercased, + /// `menu_or_class` is populated and contains the type of the parameter statement + duplicate_menu_or_class_skipped, + invalid_digit_character_in_ordinal, + + // Literals + /// `number` is populated + rc_would_miscompile_codepoint_byte_swap, + /// `number` is populated + rc_would_miscompile_codepoint_skip, + tab_converted_to_spaces, + + // General (used in various places) + /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation + win32_non_ascii_ordinal, + }; + + pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void { + switch (self.err) { + .unfinished_string_literal => { + return writer.print("unfinished string literal at '{s}', expected closing '\"'", .{self.token.nameForErrorDisplay(source)}); + }, + .string_literal_too_long => { + return writer.print("string literal too long (max is currently {} characters)", .{self.extra.number}); + }, + .invalid_number_with_exponent => { + return writer.print("base 10 number literal with exponent is not allowed: {s}", .{self.token.slice(source)}); + }, + .invalid_digit_character_in_number_literal => switch (self.type) { + .err, .warning => return writer.writeAll("non-ASCII digit characters are not allowed in number literals"), + .note => return writer.writeAll("the Win32 RC compiler allows non-ASCII digit characters, but will miscompile them"), + .hint => return, + }, + .illegal_byte => { + return writer.print("character '{s}' is not allowed", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))}); + }, + .illegal_byte_outside_string_literals => { + return writer.print("character '{s}' is not allowed outside of string literals", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))}); + }, + .illegal_codepoint_outside_string_literals => { + // This is somewhat hacky, but we know that: + // - This error is only possible with codepoints outside of the Windows-1252 character range + // - So, the only supported code page that could generate this error is UTF-8 + // Therefore, we just assume the token bytes are UTF-8 and decode them to get the illegal + // codepoint. + // + // FIXME: Support other code pages if they become relevant + const bytes = self.token.slice(source); + const codepoint = std.unicode.utf8Decode(bytes) catch unreachable; + return writer.print("codepoint 4}> is not allowed outside of string literals", .{codepoint}); + }, + .illegal_byte_order_mark => { + return writer.writeAll("byte order mark is not allowed"); + }, + .illegal_private_use_character => { + return writer.writeAll("private use character is not allowed"); + }, + .found_c_style_escaped_quote => { + return writer.writeAll("escaping quotes with \\\" is not allowed (use \"\" instead)"); + }, + .code_page_pragma_missing_left_paren => { + return writer.writeAll("expected left parenthesis after 'code_page' in #pragma code_page"); + }, + .code_page_pragma_missing_right_paren => { + return writer.writeAll("expected right parenthesis after '' in #pragma code_page"); + }, + .code_page_pragma_invalid_code_page => { + return writer.writeAll("invalid or unknown code page in #pragma code_page"); + }, + .code_page_pragma_not_integer => { + return writer.writeAll("code page is not a valid integer in #pragma code_page"); + }, + .code_page_pragma_overflow => { + return writer.writeAll("code page too large in #pragma code_page"); + }, + .code_page_pragma_unsupported_code_page => { + // We know that the token slice is a well-formed #pragma code_page(N), so + // we can skip to the first ( and then get the number that follows + const token_slice = self.token.slice(source); + var number_start = std.mem.indexOfScalar(u8, token_slice, '(').? + 1; + while (std.ascii.isWhitespace(token_slice[number_start])) { + number_start += 1; + } + var number_slice = token_slice[number_start..number_start]; + while (std.ascii.isDigit(token_slice[number_start + number_slice.len])) { + number_slice.len += 1; + } + const number = std.fmt.parseUnsigned(u16, number_slice, 10) catch unreachable; + const code_page = CodePage.getByIdentifier(number) catch unreachable; + // TODO: Improve or maybe add a note making it more clear that the code page + // is valid and that the code page is unsupported purely due to a limitation + // in this compiler. + return writer.print("unsupported code page '{s} (id={})' in #pragma code_page", .{ @tagName(code_page), number }); + }, + .unfinished_raw_data_block => { + return writer.print("unfinished raw data block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)}); + }, + .unfinished_string_table_block => { + return writer.print("unfinished STRINGTABLE block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)}); + }, + .expected_token => { + return writer.print("expected '{s}', got '{s}'", .{ self.extra.expected.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) }); + }, + .expected_something_else => { + try writer.writeAll("expected "); + try self.extra.expected_types.writeCommaSeparated(writer); + return writer.print("; got '{s}'", .{self.token.nameForErrorDisplay(source)}); + }, + .resource_type_cant_use_raw_data => switch (self.type) { + .err, .warning => try writer.print("expected '', found '{s}' (resource type '{s}' can't use raw data)", .{ self.token.nameForErrorDisplay(source), self.extra.resource.nameForErrorDisplay() }), + .note => try writer.print("if '{s}' is intended to be a filename, it must be specified as a quoted string literal", .{self.token.nameForErrorDisplay(source)}), + .hint => return, + }, + .id_must_be_ordinal => { + try writer.print("id of resource type '{s}' must be an ordinal (u16), got '{s}'", .{ self.extra.resource.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) }); + }, + .name_or_id_not_allowed => { + try writer.print("name or id is not allowed for resource type '{s}'", .{self.extra.resource.nameForErrorDisplay()}); + }, + .string_resource_as_numeric_type => switch (self.type) { + .err, .warning => try writer.writeAll("the number 6 (RT_STRING) cannot be used as a resource type"), + .note => try writer.writeAll("using RT_STRING directly likely results in an invalid .res file, use a STRINGTABLE instead"), + .hint => return, + }, + .ascii_character_not_equivalent_to_virtual_key_code => { + // TODO: Better wording? This is what the Win32 RC compiler emits. + // This occurs when VIRTKEY and a control code is specified ("^c", etc) + try writer.writeAll("ASCII character not equivalent to virtual key code"); + }, + .empty_menu_not_allowed => { + try writer.print("empty menu of type '{s}' not allowed", .{self.token.nameForErrorDisplay(source)}); + }, + .rc_would_miscompile_version_value_padding => switch (self.type) { + .err, .warning => return writer.print("the padding before this quoted string value would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider adding a comma between the key and the quoted string", .{}), + .hint => return, + }, + .rc_would_miscompile_version_value_byte_count => switch (self.type) { + .err, .warning => return writer.print("the byte count of this value would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, do not mix numbers and strings within a value", .{}), + .hint => return, + }, + .code_page_pragma_in_included_file => { + try writer.print("#pragma code_page is not supported in an included resource file", .{}); + }, + .nested_resource_level_exceeds_max => switch (self.type) { + .err, .warning => { + const max = switch (self.extra.resource) { + .versioninfo => parse.max_nested_version_level, + .menu, .menuex => parse.max_nested_menu_level, + else => unreachable, + }; + return writer.print("{s} contains too many nested children (max is {})", .{ self.extra.resource.nameForErrorDisplay(), max }); + }, + .note => return writer.print("max {s} nesting level exceeded here", .{self.extra.resource.nameForErrorDisplay()}), + .hint => return, + }, + .too_many_dialog_controls => switch (self.type) { + .err, .warning => return writer.print("{s} contains too many controls (max is {})", .{ self.extra.resource.nameForErrorDisplay(), std.math.maxInt(u16) }), + .note => return writer.writeAll("maximum number of controls exceeded here"), + .hint => return, + }, + .nested_expression_level_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("expression contains too many syntax levels (max is {})", .{parse.max_nested_expression_level}), + .note => return writer.print("maximum expression level exceeded here", .{}), + .hint => return, + }, + .close_paren_expression => { + try writer.writeAll("the Win32 RC compiler would accept ')' as a valid expression, but it would be skipped over and potentially lead to unexpected outcomes"); + }, + .unary_plus_expression => { + try writer.writeAll("the Win32 RC compiler may accept '+' as a unary operator here, but it is not supported in this implementation; consider omitting the unary +"); + }, + .rc_could_miscompile_control_params => switch (self.type) { + .err, .warning => return writer.print("this token could be erroneously skipped over by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider adding a comma after the style parameter", .{}), + .hint => return, + }, + .string_already_defined => switch (self.type) { + // TODO: better printing of language, using constant names from WinNT.h + .err, .warning => return writer.print("string with id {d} (0x{X}) already defined for language {d},{d}", .{ self.extra.string_and_language.id, self.extra.string_and_language.id, self.extra.string_and_language.language.primary_language_id, self.extra.string_and_language.language.sublanguage_id }), + .note => return writer.print("previous definition of string with id {d} (0x{X}) here", .{ self.extra.string_and_language.id, self.extra.string_and_language.id }), + .hint => return, + }, + .font_id_already_defined => switch (self.type) { + .err => return writer.print("font with id {d} already defined", .{self.extra.number}), + .warning => return writer.print("skipped duplicate font with id {d}", .{self.extra.number}), + .note => return writer.print("previous definition of font with id {d} here", .{self.extra.number}), + .hint => return, + }, + .file_open_error => { + try writer.print("unable to open file '{s}': {s}", .{ strings[self.extra.file_open_error.filename_string_index], @tagName(self.extra.file_open_error.err) }); + }, + .invalid_accelerator_key => { + try writer.print("invalid accelerator key '{s}': {s}", .{ self.token.nameForErrorDisplay(source), @tagName(self.extra.accelerator_error.err) }); + }, + .accelerator_type_required => { + try writer.print("accelerator type [ASCII or VIRTKEY] required when key is an integer", .{}); + }, + .rc_would_miscompile_control_padding => switch (self.type) { + .err, .warning => return writer.print("the padding before this control would be miscompiled by the Win32 RC compiler (it would insert 2 extra bytes of padding)", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider removing any 'control data' blocks from the controls in this dialog", .{}), + .hint => return, + }, + .rc_would_miscompile_control_class_ordinal => switch (self.type) { + .err, .warning => return writer.print("the control class of this CONTROL would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider specifying the control class using a string (BUTTON, EDIT, etc) instead of a number", .{}), + .hint => return, + }, + .rc_would_error_on_icon_dir => switch (self.type) { + .err, .warning => return writer.print("the resource at index {} of this {s} has the format '{s}'; this would be an error in the Win32 RC compiler", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type), @tagName(self.extra.icon_dir.icon_format) }), + .note => { + // The only note supported is one specific to exactly this combination + if (!(self.extra.icon_dir.icon_type == .icon and self.extra.icon_dir.icon_format == .riff)) unreachable; + try writer.print("animated RIFF icons within resource groups may not be well supported, consider using an animated icon file (.ani) instead", .{}); + }, + .hint => return, + }, + .format_not_supported_in_icon_dir => { + try writer.print("resource with format '{s}' (at index {}) is not allowed in {s} resource groups", .{ @tagName(self.extra.icon_dir.icon_format), self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }); + }, + .icon_dir_and_resource_type_mismatch => { + const unexpected_type: rc.Resource = if (self.extra.resource == .icon) .cursor else .icon; + // TODO: Better wording + try writer.print("resource type '{s}' does not match type '{s}' specified in the file", .{ self.extra.resource.nameForErrorDisplay(), unexpected_type.nameForErrorDisplay() }); + }, + .icon_read_error => { + try writer.print("unable to read {s} file '{s}': {s}", .{ @tagName(self.extra.icon_read_error.icon_type), strings[self.extra.icon_read_error.filename_string_index], @tagName(self.extra.icon_read_error.err) }); + }, + .rc_would_error_on_bitmap_version => switch (self.type) { + .err => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this version is no longer allowed and should be upgraded to '{s}'", .{ + self.extra.icon_dir.index, + @tagName(self.extra.icon_dir.icon_type), + self.extra.icon_dir.bitmap_version.nameForErrorDisplay(), + ico.BitmapHeader.Version.@"nt3.1".nameForErrorDisplay(), + }), + .warning => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this would be an error in the Win32 RC compiler", .{ + self.extra.icon_dir.index, + @tagName(self.extra.icon_dir.icon_type), + self.extra.icon_dir.bitmap_version.nameForErrorDisplay(), + }), + .note => unreachable, + .hint => return, + }, + .max_icon_ids_exhausted => switch (self.type) { + .err, .warning => try writer.print("maximum global icon/cursor ids exhausted (max is {})", .{std.math.maxInt(u16) - 1}), + .note => try writer.print("maximum icon/cursor id exceeded at index {} of this {s}", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }), + .hint => return, + }, + .bmp_read_error => { + try writer.print("invalid bitmap file '{s}': {s}", .{ strings[self.extra.bmp_read_error.filename_string_index], @tagName(self.extra.bmp_read_error.err) }); + }, + .bmp_ignored_palette_bytes => { + const bytes = strings[self.extra.number]; + const ignored_bytes = std.mem.readIntNative(u64, bytes[0..8]); + try writer.print("bitmap has {d} extra bytes preceding the pixel data which will be ignored", .{ignored_bytes}); + }, + .bmp_missing_palette_bytes => { + const bytes = strings[self.extra.number]; + const missing_bytes = std.mem.readIntNative(u64, bytes[0..8]); + try writer.print("bitmap has {d} missing color palette bytes which will be padded with zeroes", .{missing_bytes}); + }, + .rc_would_miscompile_bmp_palette_padding => { + const bytes = strings[self.extra.number]; + const miscompiled_bytes = std.mem.readIntNative(u64, bytes[0..8]); + try writer.print("the missing color palette bytes would be miscompiled by the Win32 RC compiler (the added padding bytes would include {d} bytes of the pixel data)", .{miscompiled_bytes}); + }, + .bmp_too_many_missing_palette_bytes => switch (self.type) { + .err, .warning => { + const bytes = strings[self.extra.number]; + const missing_bytes = std.mem.readIntNative(u64, bytes[0..8]); + const max_missing_bytes = std.mem.readIntNative(u64, bytes[8..16]); + try writer.print("bitmap has {} missing color palette bytes which exceeds the maximum of {}", .{ missing_bytes, max_missing_bytes }); + }, + // TODO: command line option + .note => try writer.writeAll("the maximum number of missing color palette bytes is configurable via <>"), + .hint => return, + }, + .resource_header_size_exceeds_max => { + try writer.print("resource's header length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}); + }, + .resource_data_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("resource's data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}), + .note => return writer.print("maximum data length exceeded here", .{}), + .hint => return, + }, + .control_extra_data_size_exceeds_max => switch (self.type) { + .err, .warning => try writer.print("control data length exceeds maximum of {} bytes", .{std.math.maxInt(u16)}), + .note => return writer.print("maximum control data length exceeded here", .{}), + .hint => return, + }, + .version_node_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("version node tree size exceeds maximum of {} bytes", .{std.math.maxInt(u16)}), + .note => return writer.print("maximum tree size exceeded while writing this child", .{}), + .hint => return, + }, + .fontdir_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("FONTDIR data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}), + .note => return writer.writeAll("this is likely due to the size of the combined lengths of the device/face names of all FONT resources"), + .hint => return, + }, + .number_expression_as_filename => switch (self.type) { + .err, .warning => return writer.writeAll("filename cannot be specified using a number expression, consider using a quoted string instead"), + .note => return writer.print("the Win32 RC compiler would evaluate this number expression as the filename '{s}'", .{strings[self.extra.number]}), + .hint => return, + }, + .control_id_already_defined => switch (self.type) { + .err, .warning => return writer.print("control with id {d} already defined for this dialog", .{self.extra.number}), + .note => return writer.print("previous definition of control with id {d} here", .{self.extra.number}), + .hint => return, + }, + .invalid_filename => { + const disallowed_codepoint = self.extra.number; + if (disallowed_codepoint < 128 and std.ascii.isPrint(@intCast(disallowed_codepoint))) { + try writer.print("evaluated filename contains a disallowed character: '{c}'", .{@as(u8, @intCast(disallowed_codepoint))}); + } else { + try writer.print("evaluated filename contains a disallowed codepoint: 4}>", .{disallowed_codepoint}); + } + }, + .rc_would_error_u16_with_l_suffix => switch (self.type) { + .err, .warning => return writer.print("this {s} parameter would be an error in the Win32 RC compiler", .{@tagName(self.extra.statement_with_u16_param)}), + .note => return writer.writeAll("to avoid the error, remove any L suffixes from numbers within the parameter"), + .hint => return, + }, + .result_contains_fontdir => return, + .rc_would_miscompile_dialog_menu_id => switch (self.type) { + .err, .warning => return writer.print("the id of this menu would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("the Win32 RC compiler would evaluate the id as the ordinal/number value {d}", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_dialog_class => switch (self.type) { + .err, .warning => return writer.print("this class would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("the Win32 RC compiler would evaluate it as the ordinal/number value {d}", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal => switch (self.type) { + .err, .warning => return, + .note => return writer.print("to avoid the potential miscompilation, only specify one {s} per dialog resource", .{@tagName(self.extra.menu_or_class)}), + .hint => return, + }, + .rc_would_miscompile_dialog_menu_id_starts_with_digit => switch (self.type) { + .err, .warning => return, + .note => return writer.writeAll("to avoid the potential miscompilation, the first character of the id should not be a digit"), + .hint => return, + }, + .dialog_menu_id_was_uppercased => return, + .duplicate_menu_or_class_skipped => { + return writer.print("this {s} was ignored; when multiple {s} statements are specified, only the last takes precedence", .{ + @tagName(self.extra.menu_or_class), + @tagName(self.extra.menu_or_class), + }); + }, + .invalid_digit_character_in_ordinal => { + return writer.writeAll("non-ASCII digit characters are not allowed in ordinal (number) values"); + }, + .rc_would_miscompile_codepoint_byte_swap => switch (self.type) { + .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the bytes of the UTF-16 code unit would be swapped)", .{self.extra.number}), + .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_codepoint_skip => switch (self.type) { + .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the codepoint would be missing from the compiled resource)", .{self.extra.number}), + .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}), + .hint => return, + }, + .tab_converted_to_spaces => switch (self.type) { + .err, .warning => return writer.writeAll("the tab character(s) in this string will be converted into a variable number of spaces (determined by the column of the tab character in the .rc file)"), + .note => return writer.writeAll("to include the tab character itself in a string, the escape sequence \\t should be used"), + .hint => return, + }, + .win32_non_ascii_ordinal => switch (self.type) { + .err, .warning => unreachable, + .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}), + .hint => return, + }, + } + } + + pub const VisualTokenInfo = struct { + before_len: usize, + point_offset: usize, + after_len: usize, + }; + + pub fn visualTokenInfo(self: ErrorDetails, source_line_start: usize, source_line_end: usize) VisualTokenInfo { + // Note: A perfect solution here would involve full grapheme cluster + // awareness, but oh well. This will give incorrect offsets + // if there are any multibyte codepoints within the relevant span, + // and even more inflated for grapheme clusters. + // + // We mitigate this slightly when we know we'll be pointing at + // something that displays as 1 character. + return switch (self.err) { + // These can technically be more than 1 byte depending on encoding, + // but they always refer to one visual character/grapheme. + .illegal_byte, + .illegal_byte_outside_string_literals, + .illegal_codepoint_outside_string_literals, + .illegal_byte_order_mark, + .illegal_private_use_character, + => .{ + .before_len = 0, + .point_offset = self.token.start - source_line_start, + .after_len = 0, + }, + else => .{ + .before_len = before: { + const start = @max(source_line_start, if (self.token_span_start) |span_start| span_start.start else self.token.start); + break :before self.token.start - start; + }, + .point_offset = self.token.start - source_line_start, + .after_len = after: { + const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end); + if (end == self.token.start) break :after 0; + break :after end - self.token.start - 1; + }, + }, + }; + } +}; + +pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_config: std.io.tty.Config, cwd: std.fs.Dir, err_details: ErrorDetails, source: []const u8, strings: []const []const u8, source_mappings: ?SourceMappings) !void { + if (err_details.type == .hint) return; + + const source_line_start = err_details.token.getLineStart(source); + const column = err_details.token.calculateColumn(source, 1, source_line_start); + + // var counting_writer_container = std.io.countingWriter(writer); + // const counting_writer = counting_writer_container.writer(); + + const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings) |mappings| mappings.get(err_details.token.line_number) else null; + const corresponding_file: ?[]const u8 = if (source_mappings) |mappings| mappings.files.get(corresponding_span.?.filename_offset) else null; + + const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number; + + try tty_config.setColor(writer, .bold); + if (corresponding_file) |file| { + try writer.writeAll(file); + } else { + try tty_config.setColor(writer, .dim); + try writer.writeAll(""); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + } + try writer.print(":{d}:{d}: ", .{ err_line, column }); + switch (err_details.type) { + .err => { + try tty_config.setColor(writer, .red); + try writer.writeAll("error: "); + }, + .warning => { + try tty_config.setColor(writer, .yellow); + try writer.writeAll("warning: "); + }, + .note => { + try tty_config.setColor(writer, .cyan); + try writer.writeAll("note: "); + }, + .hint => unreachable, + } + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + try err_details.render(writer, source, strings); + try writer.writeByte('\n'); + try tty_config.setColor(writer, .reset); + + if (!err_details.print_source_line) { + try writer.writeByte('\n'); + return; + } + + const source_line = err_details.token.getLine(source, source_line_start); + const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len); + + // Need this to determine if the 'line originated from' note is worth printing + var source_line_for_display_buf = try std.ArrayList(u8).initCapacity(allocator, source_line.len); + defer source_line_for_display_buf.deinit(); + try writeSourceSlice(source_line_for_display_buf.writer(), source_line); + + // TODO: General handling of long lines, not tied to this specific error + if (err_details.err == .string_literal_too_long) { + const before_slice = source_line[0..@min(source_line.len, visual_info.point_offset + 16)]; + try writeSourceSlice(writer, before_slice); + try tty_config.setColor(writer, .dim); + try writer.writeAll("<...truncated...>"); + try tty_config.setColor(writer, .reset); + } else { + try writer.writeAll(source_line_for_display_buf.items); + } + try writer.writeByte('\n'); + + try tty_config.setColor(writer, .green); + const num_spaces = visual_info.point_offset - visual_info.before_len; + try writer.writeByteNTimes(' ', num_spaces); + try writer.writeByteNTimes('~', visual_info.before_len); + try writer.writeByte('^'); + if (visual_info.after_len > 0) { + var num_squiggles = visual_info.after_len; + if (err_details.err == .string_literal_too_long) { + num_squiggles = @min(num_squiggles, 15); + } + try writer.writeByteNTimes('~', num_squiggles); + } + try writer.writeByte('\n'); + try tty_config.setColor(writer, .reset); + + if (source_mappings) |_| { + var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?); + defer corresponding_lines.deinit(allocator); + + if (!corresponding_lines.worth_printing_note) return; + + try tty_config.setColor(writer, .bold); + if (corresponding_file) |file| { + try writer.writeAll(file); + } else { + try tty_config.setColor(writer, .dim); + try writer.writeAll(""); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + } + try writer.print(":{d}:{d}: ", .{ err_line, column }); + try tty_config.setColor(writer, .cyan); + try writer.writeAll("note: "); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + try writer.writeAll("this line originated from line"); + if (corresponding_span.?.start_line != corresponding_span.?.end_line) { + try writer.print("s {}-{}", .{ corresponding_span.?.start_line, corresponding_span.?.end_line }); + } else { + try writer.print(" {}", .{corresponding_span.?.start_line}); + } + try writer.print(" of file '{s}'\n", .{corresponding_file.?}); + try tty_config.setColor(writer, .reset); + + if (!corresponding_lines.worth_printing_lines) return; + + if (corresponding_lines.lines_is_error_message) { + try tty_config.setColor(writer, .red); + try writer.writeAll(" | "); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .dim); + try writer.writeAll(corresponding_lines.lines.items); + try tty_config.setColor(writer, .reset); + try writer.writeAll("\n\n"); + return; + } + + try writer.writeAll(corresponding_lines.lines.items); + try writer.writeAll("\n\n"); + } +} + +const CorrespondingLines = struct { + worth_printing_note: bool = true, + worth_printing_lines: bool = true, + lines: std.ArrayListUnmanaged(u8) = .{}, + lines_is_error_message: bool = false, + + pub fn init(allocator: std.mem.Allocator, cwd: std.fs.Dir, err_details: ErrorDetails, lines_for_comparison: []const u8, corresponding_span: SourceMappings.SourceSpan, corresponding_file: []const u8) !CorrespondingLines { + var corresponding_lines = CorrespondingLines{}; + + // We don't do line comparison for this error, so don't print the note if the line + // number is different + if (err_details.err == .string_literal_too_long and err_details.token.line_number == corresponding_span.start_line) { + corresponding_lines.worth_printing_note = false; + return corresponding_lines; + } + + // Don't print the originating line for this error, we know it's really long + if (err_details.err == .string_literal_too_long) { + corresponding_lines.worth_printing_lines = false; + return corresponding_lines; + } + + var writer = corresponding_lines.lines.writer(allocator); + if (utils.openFileNotDir(cwd, corresponding_file, .{})) |file| { + defer file.close(); + var buffered_reader = std.io.bufferedReader(file.reader()); + writeLinesFromStream(writer, buffered_reader.reader(), corresponding_span.start_line, corresponding_span.end_line) catch |err| switch (err) { + error.LinesNotFound => { + corresponding_lines.lines.clearRetainingCapacity(); + try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)}); + corresponding_lines.lines_is_error_message = true; + return corresponding_lines; + }, + else => |e| return e, + }; + } else |err| { + corresponding_lines.lines.clearRetainingCapacity(); + try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)}); + corresponding_lines.lines_is_error_message = true; + return corresponding_lines; + } + + // If the lines are the same as they were before preprocessing, skip printing the note entirely + if (std.mem.eql(u8, lines_for_comparison, corresponding_lines.lines.items)) { + corresponding_lines.worth_printing_note = false; + } + return corresponding_lines; + } + + pub fn deinit(self: *CorrespondingLines, allocator: std.mem.Allocator) void { + self.lines.deinit(allocator); + } +}; + +fn writeSourceSlice(writer: anytype, slice: []const u8) !void { + for (slice) |c| try writeSourceByte(writer, c); +} + +inline fn writeSourceByte(writer: anytype, byte: u8) !void { + switch (byte) { + '\x00'...'\x08', '\x0E'...'\x1F', '\x7F' => try writer.writeAll("�"), + // \r is seemingly ignored by the RC compiler so skipping it when printing source lines + // could help avoid confusing output (e.g. RC\rDATA if printed verbatim would show up + // in the console as DATA but the compiler reads it as RCDATA) + // + // NOTE: This is irrelevant when using the clang preprocessor, because unpaired \r + // characters get converted to \n, but may become relevant if another + // preprocessor is used instead. + '\r' => {}, + '\t', '\x0B', '\x0C' => try writer.writeByte(' '), + else => try writer.writeByte(byte), + } +} + +pub fn writeLinesFromStream(writer: anytype, input: anytype, start_line: usize, end_line: usize) !void { + var line_num: usize = 1; + while (try readByteOrEof(input)) |byte| { + switch (byte) { + '\n' => { + if (line_num == end_line) return; + if (line_num >= start_line) try writeSourceByte(writer, byte); + line_num += 1; + }, + else => { + if (line_num >= start_line) try writeSourceByte(writer, byte); + }, + } + } + if (line_num != end_line) { + return error.LinesNotFound; + } +} + +pub fn readByteOrEof(reader: anytype) !?u8 { + return reader.readByte() catch |err| switch (err) { + error.EndOfStream => return null, + else => |e| return e, + }; +} diff --git a/src/resinator/ico.zig b/src/resinator/ico.zig new file mode 100644 index 000000000000..205f5a0e594a --- /dev/null +++ b/src/resinator/ico.zig @@ -0,0 +1,310 @@ +//! https://devblogs.microsoft.com/oldnewthing/20120720-00/?p=7083 +//! https://learn.microsoft.com/en-us/previous-versions/ms997538(v=msdn.10) +//! https://learn.microsoft.com/en-us/windows/win32/menurc/newheader +//! https://learn.microsoft.com/en-us/windows/win32/menurc/resdir +//! https://learn.microsoft.com/en-us/windows/win32/menurc/localheader + +const std = @import("std"); + +pub const ReadError = std.mem.Allocator.Error || error{ InvalidHeader, InvalidImageType, ImpossibleDataSize, UnexpectedEOF, ReadError }; + +pub fn read(allocator: std.mem.Allocator, reader: anytype, max_size: u64) ReadError!IconDir { + // Some Reader implementations have an empty ReadError error set which would + // cause 'unreachable else' if we tried to use an else in the switch, so we + // need to detect this case and not try to translate to ReadError + const empty_reader_errorset = @typeInfo(@TypeOf(reader).Error).ErrorSet == null or @typeInfo(@TypeOf(reader).Error).ErrorSet.?.len == 0; + if (empty_reader_errorset) { + return readAnyError(allocator, reader, max_size) catch |err| switch (err) { + error.EndOfStream => error.UnexpectedEOF, + else => |e| return e, + }; + } else { + return readAnyError(allocator, reader, max_size) catch |err| switch (err) { + error.OutOfMemory, + error.InvalidHeader, + error.InvalidImageType, + error.ImpossibleDataSize, + => |e| return e, + error.EndOfStream => error.UnexpectedEOF, + // The remaining errors are dependent on the `reader`, so + // we just translate them all to generic ReadError + else => error.ReadError, + }; + } +} + +// TODO: This seems like a somewhat strange pattern, could be a better way +// to do this. Maybe it makes more sense to handle the translation +// at the call site instead of having a helper function here. +pub fn readAnyError(allocator: std.mem.Allocator, reader: anytype, max_size: u64) !IconDir { + const reserved = try reader.readIntLittle(u16); + if (reserved != 0) { + return error.InvalidHeader; + } + + const image_type = reader.readEnum(ImageType, .Little) catch |err| switch (err) { + error.InvalidValue => return error.InvalidImageType, + else => |e| return e, + }; + + const num_images = try reader.readIntLittle(u16); + + // To avoid over-allocation in the case of a file that says it has way more + // entries than it actually does, we use an ArrayList with a conservatively + // limited initial capacity instead of allocating the entire slice at once. + const initial_capacity = @min(num_images, 8); + var entries = try std.ArrayList(Entry).initCapacity(allocator, initial_capacity); + errdefer entries.deinit(); + + var i: usize = 0; + while (i < num_images) : (i += 1) { + var entry: Entry = undefined; + entry.width = try reader.readByte(); + entry.height = try reader.readByte(); + entry.num_colors = try reader.readByte(); + entry.reserved = try reader.readByte(); + switch (image_type) { + .icon => { + entry.type_specific_data = .{ .icon = .{ + .color_planes = try reader.readIntLittle(u16), + .bits_per_pixel = try reader.readIntLittle(u16), + } }; + }, + .cursor => { + entry.type_specific_data = .{ .cursor = .{ + .hotspot_x = try reader.readIntLittle(u16), + .hotspot_y = try reader.readIntLittle(u16), + } }; + }, + } + entry.data_size_in_bytes = try reader.readIntLittle(u32); + entry.data_offset_from_start_of_file = try reader.readIntLittle(u32); + // Validate that the offset/data size is feasible + if (@as(u64, entry.data_offset_from_start_of_file) + entry.data_size_in_bytes > max_size) { + return error.ImpossibleDataSize; + } + // and that the data size is large enough for at least the header of an image + // Note: This avoids needing to deal with a miscompilation from the Win32 RC + // compiler when the data size of an image is specified as zero but there + // is data to-be-read at the offset. The Win32 RC compiler will output + // an ICON/CURSOR resource with a bogus size in its header but with no actual + // data bytes in it, leading to an invalid .res. Similarly, if, for example, + // there is valid PNG data at the image's offset, but the size is specified + // as fewer bytes than the PNG header, then the Win32 RC compiler will still + // treat it as a PNG (e.g. unconditionally set num_planes to 1) but the data + // of the resource will only be 1 byte so treating it as a PNG doesn't make + // sense (especially not when you have to read past the data size to determine + // that it's a PNG). + if (entry.data_size_in_bytes < 16) { + return error.ImpossibleDataSize; + } + try entries.append(entry); + } + + return .{ + .image_type = image_type, + .entries = try entries.toOwnedSlice(), + .allocator = allocator, + }; +} + +pub const ImageType = enum(u16) { + icon = 1, + cursor = 2, +}; + +pub const IconDir = struct { + image_type: ImageType, + /// Note: entries.len will always fit into a u16, since the field containing the + /// number of images in an ico file is a u16. + entries: []Entry, + allocator: std.mem.Allocator, + + pub fn deinit(self: IconDir) void { + self.allocator.free(self.entries); + } + + pub const res_header_byte_len = 6; + + pub fn getResDataSize(self: IconDir) u32 { + // maxInt(u16) * Entry.res_byte_len = 917,490 which is well within the u32 range. + // Note: self.entries.len is limited to maxInt(u16) + return @intCast(IconDir.res_header_byte_len + self.entries.len * Entry.res_byte_len); + } + + pub fn writeResData(self: IconDir, writer: anytype, first_image_id: u16) !void { + try writer.writeIntLittle(u16, 0); + try writer.writeIntLittle(u16, @intFromEnum(self.image_type)); + // We know that entries.len must fit into a u16 + try writer.writeIntLittle(u16, @as(u16, @intCast(self.entries.len))); + + var image_id = first_image_id; + for (self.entries) |entry| { + try entry.writeResData(writer, image_id); + image_id += 1; + } + } +}; + +pub const Entry = struct { + // Icons are limited to u8 sizes, cursors can have u16, + // so we store as u16 and truncate when needed. + width: u16, + height: u16, + num_colors: u8, + /// This should always be zero, but whatever value it is gets + /// carried over so we need to store it + reserved: u8, + type_specific_data: union(ImageType) { + icon: struct { + color_planes: u16, + bits_per_pixel: u16, + }, + cursor: struct { + hotspot_x: u16, + hotspot_y: u16, + }, + }, + data_size_in_bytes: u32, + data_offset_from_start_of_file: u32, + + pub const res_byte_len = 14; + + pub fn writeResData(self: Entry, writer: anytype, id: u16) !void { + switch (self.type_specific_data) { + .icon => |icon_data| { + try writer.writeIntLittle(u8, @as(u8, @truncate(self.width))); + try writer.writeIntLittle(u8, @as(u8, @truncate(self.height))); + try writer.writeIntLittle(u8, self.num_colors); + try writer.writeIntLittle(u8, self.reserved); + try writer.writeIntLittle(u16, icon_data.color_planes); + try writer.writeIntLittle(u16, icon_data.bits_per_pixel); + try writer.writeIntLittle(u32, self.data_size_in_bytes); + }, + .cursor => |cursor_data| { + try writer.writeIntLittle(u16, self.width); + try writer.writeIntLittle(u16, self.height); + try writer.writeIntLittle(u16, cursor_data.hotspot_x); + try writer.writeIntLittle(u16, cursor_data.hotspot_y); + try writer.writeIntLittle(u32, self.data_size_in_bytes + 4); + }, + } + try writer.writeIntLittle(u16, id); + } +}; + +test "icon" { + const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16; + var fbs = std.io.fixedBufferStream(data); + const icon = try read(std.testing.allocator, fbs.reader(), data.len); + defer icon.deinit(); + + try std.testing.expectEqual(ImageType.icon, icon.image_type); + try std.testing.expectEqual(@as(usize, 1), icon.entries.len); +} + +test "icon too many images" { + // Note that with verifying that all data sizes are within the file bounds and >= 16, + // it's not possible to hit EOF when looking for more RESDIR structures, since they are + // themselves 16 bytes long, so we'll always hit ImpossibleDataSize instead. + const data = "\x00\x00\x01\x00\x02\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16; + var fbs = std.io.fixedBufferStream(data); + try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len)); +} + +test "icon data size past EOF" { + const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x01\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16; + var fbs = std.io.fixedBufferStream(data); + try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len)); +} + +test "icon data offset past EOF" { + const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x17\x00\x00\x00" ++ [_]u8{0} ** 16; + var fbs = std.io.fixedBufferStream(data); + try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len)); +} + +test "icon data size too small" { + const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x0F\x00\x00\x00\x16\x00\x00\x00"; + var fbs = std.io.fixedBufferStream(data); + try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len)); +} + +pub const ImageFormat = enum { + dib, + png, + riff, + + const riff_header = std.mem.readIntNative(u32, "RIFF"); + const png_signature = std.mem.readIntNative(u64, "\x89PNG\r\n\x1a\n"); + const ihdr_code = std.mem.readIntNative(u32, "IHDR"); + const acon_form_type = std.mem.readIntNative(u32, "ACON"); + + pub fn detect(header_bytes: *const [16]u8) ImageFormat { + if (std.mem.readIntNative(u32, header_bytes[0..4]) == riff_header) return .riff; + if (std.mem.readIntNative(u64, header_bytes[0..8]) == png_signature) return .png; + return .dib; + } + + pub fn validate(format: ImageFormat, header_bytes: *const [16]u8) bool { + return switch (format) { + .png => std.mem.readIntNative(u32, header_bytes[12..16]) == ihdr_code, + .riff => std.mem.readIntNative(u32, header_bytes[8..12]) == acon_form_type, + .dib => true, + }; + } +}; + +/// Contains only the fields of BITMAPINFOHEADER (WinGDI.h) that are both: +/// - relevant to what we need, and +/// - are shared between all versions of BITMAPINFOHEADER (V4, V5). +pub const BitmapHeader = extern struct { + bcSize: u32, + bcWidth: i32, + bcHeight: i32, + bcPlanes: u16, + bcBitCount: u16, + + pub fn version(self: *const BitmapHeader) Version { + return Version.get(self.bcSize); + } + + /// https://en.wikipedia.org/wiki/BMP_file_format#DIB_header_(bitmap_information_header) + pub const Version = enum { + unknown, + @"win2.0", // Windows 2.0 or later + @"nt3.1", // Windows NT, 3.1x or later + @"nt4.0", // Windows NT 4.0, 95 or later + @"nt5.0", // Windows NT 5.0, 98 or later + + pub fn get(header_size: u32) Version { + return switch (header_size) { + len(.@"win2.0") => .@"win2.0", + len(.@"nt3.1") => .@"nt3.1", + len(.@"nt4.0") => .@"nt4.0", + len(.@"nt5.0") => .@"nt5.0", + else => .unknown, + }; + } + + pub fn len(comptime v: Version) comptime_int { + return switch (v) { + .@"win2.0" => 12, + .@"nt3.1" => 40, + .@"nt4.0" => 108, + .@"nt5.0" => 124, + .unknown => unreachable, + }; + } + + pub fn nameForErrorDisplay(v: Version) []const u8 { + return switch (v) { + .unknown => "unknown", + .@"win2.0" => "Windows 2.0 (BITMAPCOREHEADER)", + .@"nt3.1" => "Windows NT, 3.1x (BITMAPINFOHEADER)", + .@"nt4.0" => "Windows NT 4.0, 95 (BITMAPV4HEADER)", + .@"nt5.0" => "Windows NT 5.0, 98 (BITMAPV5HEADER)", + }; + } + }; +}; diff --git a/src/resinator/lang.zig b/src/resinator/lang.zig new file mode 100644 index 000000000000..d43380fa052b --- /dev/null +++ b/src/resinator/lang.zig @@ -0,0 +1,877 @@ +const std = @import("std"); + +/// This function is specific to how the Win32 RC command line interprets +/// language IDs specified as integers. +/// - Always interpreted as hexadecimal, but explicit 0x prefix is also allowed +/// - Wraps on overflow of u16 +/// - Stops parsing on any invalid hexadecimal digits +/// - Errors if a digit is not the first char +/// - `-` (negative) prefix is allowed +pub fn parseInt(str: []const u8) error{InvalidLanguageId}!u16 { + var result: u16 = 0; + const radix: u8 = 16; + var buf = str; + + const Prefix = enum { none, minus }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + else => {}, + } + + if (buf.len > 2 and buf[0] == '0' and buf[1] == 'x') { + buf = buf[2..]; + } + + for (buf, 0..) |c, i| { + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + 'a'...'f', 'A'...'F', '0'...'9' => std.fmt.charToDigit(c, radix) catch break, + else => { + // First digit must be valid + if (i == 0) { + return error.InvalidLanguageId; + } + break; + }, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result = 0 -% result, + } + + return result; +} + +test parseInt { + try std.testing.expectEqual(@as(u16, 0x16), try parseInt("16")); + try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1A")); + try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1Azzzz")); + try std.testing.expectEqual(@as(u16, 0xffff), try parseInt("-1")); + try std.testing.expectEqual(@as(u16, 0xffea), try parseInt("-0x16")); + try std.testing.expectEqual(@as(u16, 0x0), try parseInt("0o100")); + try std.testing.expectEqual(@as(u16, 0x1), try parseInt("10001")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("--1")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("0xha")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("¹")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("~1")); +} + +/// This function is specific to how the Win32 RC command line interprets +/// language tags: invalid tags are rejected, but tags that don't have +/// a specific assigned ID but are otherwise valid enough will get +/// converted to an ID of LOCALE_CUSTOM_UNSPECIFIED. +pub fn tagToInt(tag: []const u8) error{InvalidLanguageTag}!u16 { + const maybe_id = try tagToId(tag); + if (maybe_id) |id| { + return @intFromEnum(id); + } else { + return LOCALE_CUSTOM_UNSPECIFIED; + } +} + +pub fn tagToId(tag: []const u8) error{InvalidLanguageTag}!?LanguageId { + const parsed = try parse(tag); + // There are currently no language tags with assigned IDs that have + // multiple suffixes, so we can skip the lookup. + if (parsed.multiple_suffixes) return null; + const longest_known_tag = comptime blk: { + var len = 0; + for (@typeInfo(LanguageId).Enum.fields) |field| { + if (field.name.len > len) len = field.name.len; + } + break :blk len; + }; + // If the tag is longer than the longest tag that has an assigned ID, + // then we can skip the lookup. + if (tag.len > longest_known_tag) return null; + var normalized_buf: [longest_known_tag]u8 = undefined; + // To allow e.g. `de-de_phoneb` to get looked up as `de-de`, we need to + // omit the suffix, but only if the tag contains a valid alternate sort order. + var tag_to_normalize = if (parsed.isSuffixValidSortOrder()) tag[0 .. tag.len - (parsed.suffix.?.len + 1)] else tag; + const normalized_tag = normalizeTag(tag_to_normalize, &normalized_buf); + return std.meta.stringToEnum(LanguageId, normalized_tag) orelse { + // special case for a tag that has been mapped to the same ID + // twice. + if (std.mem.eql(u8, "ff_latn_ng", normalized_tag)) { + return LanguageId.ff_ng; + } + return null; + }; +} + +test tagToId { + try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("ar-ae")).?); + try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("AR_AE")).?); + try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-ng")).?); + // Special case + try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-Latn-NG")).?); +} + +test "exhaustive tagToId" { + inline for (@typeInfo(LanguageId).Enum.fields) |field| { + const id = tagToId(field.name) catch |err| { + std.debug.print("tag: {s}\n", .{field.name}); + return err; + }; + try std.testing.expectEqual(@field(LanguageId, field.name), id orelse { + std.debug.print("tag: {s}, got null\n", .{field.name}); + return error.TestExpectedEqual; + }); + } + var buf: [32]u8 = undefined; + inline for (valid_alternate_sorts) |parsed_sort| { + var fbs = std.io.fixedBufferStream(&buf); + const writer = fbs.writer(); + writer.writeAll(parsed_sort.language_code) catch unreachable; + writer.writeAll("-") catch unreachable; + writer.writeAll(parsed_sort.country_code.?) catch unreachable; + writer.writeAll("-") catch unreachable; + writer.writeAll(parsed_sort.suffix.?) catch unreachable; + const expected_field_name = comptime field: { + var name_buf: [5]u8 = undefined; + std.mem.copy(u8, &name_buf, parsed_sort.language_code); + name_buf[2] = '_'; + std.mem.copy(u8, name_buf[3..], parsed_sort.country_code.?); + break :field name_buf; + }; + const expected = @field(LanguageId, &expected_field_name); + const id = tagToId(fbs.getWritten()) catch |err| { + std.debug.print("tag: {s}\n", .{fbs.getWritten()}); + return err; + }; + try std.testing.expectEqual(expected, id orelse { + std.debug.print("tag: {s}, expected: {}, got null\n", .{ fbs.getWritten(), expected }); + return error.TestExpectedEqual; + }); + } +} + +fn normalizeTag(tag: []const u8, buf: []u8) []u8 { + std.debug.assert(buf.len >= tag.len); + for (tag, 0..) |c, i| { + if (c == '-') + buf[i] = '_' + else + buf[i] = std.ascii.toLower(c); + } + return buf[0..tag.len]; +} + +/// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/%5bMS-LCID%5d.pdf#%5B%7B%22num%22%3A72%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C69%2C574%2C0%5D +/// "When an LCID is requested for a locale without a +/// permanent LCID assignment, nor a temporary +/// assignment as above, the protocol will respond +/// with LOCALE_CUSTOM_UNSPECIFIED for all such +/// locales. Because this single value is used for +/// numerous possible locale names, it is impossible to +/// round trip this locale, even temporarily. +/// Applications should discard this value as soon as +/// possible and never persist it. If the system is +/// forced to respond to a request for +/// LCID_CUSTOM_UNSPECIFIED, it will fall back to +/// the current user locale. This is often incorrect but +/// may prevent an application or component from +/// failing. As the meaning of this temporary LCID is +/// unstable, it should never be used for interchange +/// or persisted data. This is a 1-to-many relationship +/// that is very unstable." +pub const LOCALE_CUSTOM_UNSPECIFIED = 0x1000; + +pub const LANG_ENGLISH = 0x09; +pub const SUBLANG_ENGLISH_US = 0x01; + +/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers +pub fn MAKELANGID(primary: u10, sublang: u6) u16 { + return (@as(u16, primary) << 10) | sublang; +} + +/// Language tag format expressed as a regular expression (rough approximation): +/// +/// [a-zA-Z]{1,3}([-_][a-zA-Z]{4})?([-_][a-zA-Z]{2})?([-_][a-zA-Z0-9]{1,8})? +/// lang | script | country | suffix +/// +/// Notes: +/// - If lang code is 1 char, it seems to mean that everything afterwards uses suffix +/// parsing rules (e.g. `a-0` and `a-00000000` are allowed). +/// - There can also be any number of trailing suffix parts as long as they each +/// would be a valid suffix part, e.g. `en-us-blah-blah1-blah2-blah3` is allowed. +/// - When doing lookups, trailing suffix parts are taken into account, e.g. +/// `ca-es-valencia` is not considered equivalent to `ca-es-valencia-blah`. +/// - A suffix is only allowed if: +/// + Lang code is 1 char long, or +/// + A country code is present, or +/// + A script tag is not present and: +/// - the suffix is numeric-only and has a length of 3, or +/// - the lang is `qps` and the suffix is `ploca` or `plocm` +pub fn parse(lang_tag: []const u8) error{InvalidLanguageTag}!Parsed { + var it = std.mem.splitAny(u8, lang_tag, "-_"); + const lang_code = it.first(); + const is_valid_lang_code = lang_code.len >= 1 and lang_code.len <= 3 and isAllAlphabetic(lang_code); + if (!is_valid_lang_code) return error.InvalidLanguageTag; + var parsed = Parsed{ + .language_code = lang_code, + }; + // The second part could be a script tag, a country code, or a suffix + if (it.next()) |part_str| { + // The lang code being length 1 behaves strangely, so fully special case it. + if (lang_code.len == 1) { + // This is almost certainly not the 'right' way to do this, but I don't have a method + // to determine how exactly these language tags are parsed, and it seems like + // suffix parsing rules apply generally (digits allowed, length of 1 to 8). + // + // However, because we want to be able to lookup `x-iv-mathan` normally without + // `multiple_suffixes` being set to true, we need to make sure to treat two-length + // alphabetic parts as a country code. + if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } + // Everything else, though, we can just throw into the suffix as long as the normal + // rules apply. + else if (part_str.len > 0 and part_str.len <= 8 and isAllAlphanumeric(part_str)) { + parsed.suffix = part_str; + } else { + return error.InvalidLanguageTag; + } + } else if (part_str.len == 4 and isAllAlphabetic(part_str)) { + parsed.script_tag = part_str; + } else if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } + // Only a 3-len numeric suffix is allowed as the second part of a tag + else if (part_str.len == 3 and isAllNumeric(part_str)) { + parsed.suffix = part_str; + } + // Special case for qps-ploca and qps-plocm + else if (std.ascii.eqlIgnoreCase(lang_code, "qps") and + (std.ascii.eqlIgnoreCase(part_str, "ploca") or + std.ascii.eqlIgnoreCase(part_str, "plocm"))) + { + parsed.suffix = part_str; + } else { + return error.InvalidLanguageTag; + } + } else { + // If there's no part besides a 1-len lang code, then it is malformed + if (lang_code.len == 1) return error.InvalidLanguageTag; + return parsed; + } + if (parsed.script_tag != null) { + if (it.next()) |part_str| { + if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } else { + // Suffix is not allowed when a country code is not present. + return error.InvalidLanguageTag; + } + } else { + return parsed; + } + } + // We've now parsed any potential script tag/country codes, so anything remaining + // is a suffix + while (it.next()) |part_str| { + if (part_str.len == 0 or part_str.len > 8 or !isAllAlphanumeric(part_str)) { + return error.InvalidLanguageTag; + } + if (parsed.suffix == null) { + parsed.suffix = part_str; + } else { + // In theory we could return early here but we still want to validate + // that each part is a valid suffix all the way to the end, e.g. + // we should reject `en-us-suffix-a-b-c-!!!` because of the invalid `!!!` + // suffix part. + parsed.multiple_suffixes = true; + } + } + return parsed; +} + +pub const Parsed = struct { + language_code: []const u8, + script_tag: ?[]const u8 = null, + country_code: ?[]const u8 = null, + /// Can be a sort order (e.g. phoneb) or something like valencia, 001, etc + suffix: ?[]const u8 = null, + /// There can be any number of suffixes, but we don't need to care what their + /// values are, we just need to know if any exist so that e.g. `ca-es-valencia-blah` + /// can be seen as different from `ca-es-valencia`. Storing this as a bool + /// allows us to avoid needing either (a) dynamic allocation or (b) a limit to + /// the number of suffixes allowed when parsing. + multiple_suffixes: bool = false, + + pub fn isSuffixValidSortOrder(self: Parsed) bool { + if (self.country_code == null) return false; + if (self.suffix == null) return false; + if (self.script_tag != null) return false; + if (self.multiple_suffixes) return false; + for (valid_alternate_sorts) |valid_sort| { + if (std.ascii.eqlIgnoreCase(valid_sort.language_code, self.language_code) and + std.ascii.eqlIgnoreCase(valid_sort.country_code.?, self.country_code.?) and + std.ascii.eqlIgnoreCase(valid_sort.suffix.?, self.suffix.?)) + { + return true; + } + } + return false; + } +}; + +/// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f +/// See the table following this text: "Alternate sorts can be selected by using one of the identifiers from the following table." +const valid_alternate_sorts = [_]Parsed{ + // Note: x-IV-mathan is omitted due to how lookups are implemented. + // This table is used to make e.g. `de-de_phoneb` get looked up + // as `de-de` (the suffix is omitted for the lookup), but x-iv-mathan + // instead needs to be looked up with the suffix included because + // `x-iv` is not a tag with an assigned ID. + .{ .language_code = "de", .country_code = "de", .suffix = "phoneb" }, + .{ .language_code = "hu", .country_code = "hu", .suffix = "tchncl" }, + .{ .language_code = "ka", .country_code = "ge", .suffix = "modern" }, + .{ .language_code = "zh", .country_code = "cn", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "sg", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "mo", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "tw", .suffix = "pronun" }, + .{ .language_code = "zh", .country_code = "tw", .suffix = "radstr" }, + .{ .language_code = "ja", .country_code = "jp", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "hk", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "mo", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "cn", .suffix = "phoneb" }, + .{ .language_code = "zh", .country_code = "sg", .suffix = "phoneb" }, +}; + +test "parse" { + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + }, try parse("en")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .country_code = "us", + }, try parse("en-us")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .suffix = "123", + }, try parse("en-123")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .suffix = "123", + .multiple_suffixes = true, + }, try parse("en-123-blah")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .country_code = "us", + .suffix = "123", + .multiple_suffixes = true, + }, try parse("en-us_123-blah")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "eng", + .script_tag = "Latn", + }, try parse("eng-Latn")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "eng", + .script_tag = "Latn", + }, try parse("eng-Latn")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "ff", + .script_tag = "Latn", + .country_code = "NG", + }, try parse("ff-Latn-NG")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "qps", + .suffix = "Plocm", + }, try parse("qps-Plocm")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "qps", + .suffix = "ploca", + }, try parse("qps-ploca")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "x", + .country_code = "IV", + .suffix = "mathan", + }, try parse("x-IV-mathan")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "a", + }, try parse("a-a")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "000", + }, try parse("a-000")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "00000000", + }, try parse("a-00000000")); + // suffix not allowed if script tag is present without country code + try std.testing.expectError(error.InvalidLanguageTag, parse("eng-Latn-suffix")); + // suffix must be 3 numeric digits if neither script tag nor country code is present + try std.testing.expectError(error.InvalidLanguageTag, parse("eng-suffix")); + try std.testing.expectError(error.InvalidLanguageTag, parse("en-plocm")); + // 1-len lang code is not allowed if it's the only part + try std.testing.expectError(error.InvalidLanguageTag, parse("e")); +} + +fn isAllAlphabetic(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isAlphabetic(c)) return false; + } + return true; +} + +fn isAllAlphanumeric(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isAlphanumeric(c)) return false; + } + return true; +} + +fn isAllNumeric(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isDigit(c)) return false; + } + return true; +} + +/// Derived from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f +/// - Protocol Revision: 15.0 +/// - Language / Language ID / Language Tag table in Appendix A +/// - Removed all rows that have Language ID 0x1000 (LOCALE_CUSTOM_UNSPECIFIED) +/// - Normalized each language tag (lowercased, replaced all `-` with `_`) +/// - There is one special case where two tags are mapped to the same ID, the following +/// has been omitted and must be special cased during lookup to map to the ID ff_ng / 0x0467. +/// ff_latn_ng = 0x0467, // Fulah (Latin), Nigeria +/// - x_iv_mathan has been added which is not in the table but does appear in the Alternate sorts +/// table as 0x007F (LANG_INVARIANT). +pub const LanguageId = enum(u16) { + // Language tag = Language ID, // Language, Location (or type) + af = 0x0036, // Afrikaans + af_za = 0x0436, // Afrikaans, South Africa + sq = 0x001C, // Albanian + sq_al = 0x041C, // Albanian, Albania + gsw = 0x0084, // Alsatian + gsw_fr = 0x0484, // Alsatian, France + am = 0x005E, // Amharic + am_et = 0x045E, // Amharic, Ethiopia + ar = 0x0001, // Arabic + ar_dz = 0x1401, // Arabic, Algeria + ar_bh = 0x3C01, // Arabic, Bahrain + ar_eg = 0x0c01, // Arabic, Egypt + ar_iq = 0x0801, // Arabic, Iraq + ar_jo = 0x2C01, // Arabic, Jordan + ar_kw = 0x3401, // Arabic, Kuwait + ar_lb = 0x3001, // Arabic, Lebanon + ar_ly = 0x1001, // Arabic, Libya + ar_ma = 0x1801, // Arabic, Morocco + ar_om = 0x2001, // Arabic, Oman + ar_qa = 0x4001, // Arabic, Qatar + ar_sa = 0x0401, // Arabic, Saudi Arabia + ar_sy = 0x2801, // Arabic, Syria + ar_tn = 0x1C01, // Arabic, Tunisia + ar_ae = 0x3801, // Arabic, U.A.E. + ar_ye = 0x2401, // Arabic, Yemen + hy = 0x002B, // Armenian + hy_am = 0x042B, // Armenian, Armenia + as = 0x004D, // Assamese + as_in = 0x044D, // Assamese, India + az_cyrl = 0x742C, // Azerbaijani (Cyrillic) + az_cyrl_az = 0x082C, // Azerbaijani (Cyrillic), Azerbaijan + az = 0x002C, // Azerbaijani (Latin) + az_latn = 0x782C, // Azerbaijani (Latin) + az_latn_az = 0x042C, // Azerbaijani (Latin), Azerbaijan + bn = 0x0045, // Bangla + bn_bd = 0x0845, // Bangla, Bangladesh + bn_in = 0x0445, // Bangla, India + ba = 0x006D, // Bashkir + ba_ru = 0x046D, // Bashkir, Russia + eu = 0x002D, // Basque + eu_es = 0x042D, // Basque, Spain + be = 0x0023, // Belarusian + be_by = 0x0423, // Belarusian, Belarus + bs_cyrl = 0x641A, // Bosnian (Cyrillic) + bs_cyrl_ba = 0x201A, // Bosnian (Cyrillic), Bosnia and Herzegovina + bs_latn = 0x681A, // Bosnian (Latin) + bs = 0x781A, // Bosnian (Latin) + bs_latn_ba = 0x141A, // Bosnian (Latin), Bosnia and Herzegovina + br = 0x007E, // Breton + br_fr = 0x047E, // Breton, France + bg = 0x0002, // Bulgarian + bg_bg = 0x0402, // Bulgarian, Bulgaria + my = 0x0055, // Burmese + my_mm = 0x0455, // Burmese, Myanmar + ca = 0x0003, // Catalan + ca_es = 0x0403, // Catalan, Spain + tzm_arab_ma = 0x045F, // Central Atlas Tamazight (Arabic), Morocco + ku = 0x0092, // Central Kurdish + ku_arab = 0x7c92, // Central Kurdish + ku_arab_iq = 0x0492, // Central Kurdish, Iraq + chr = 0x005C, // Cherokee + chr_cher = 0x7c5C, // Cherokee + chr_cher_us = 0x045C, // Cherokee, United States + zh_hans = 0x0004, // Chinese (Simplified) + zh = 0x7804, // Chinese (Simplified) + zh_cn = 0x0804, // Chinese (Simplified), People's Republic of China + zh_sg = 0x1004, // Chinese (Simplified), Singapore + zh_hant = 0x7C04, // Chinese (Traditional) + zh_hk = 0x0C04, // Chinese (Traditional), Hong Kong S.A.R. + zh_mo = 0x1404, // Chinese (Traditional), Macao S.A.R. + zh_tw = 0x0404, // Chinese (Traditional), Taiwan + co = 0x0083, // Corsican + co_fr = 0x0483, // Corsican, France + hr = 0x001A, // Croatian + hr_hr = 0x041A, // Croatian, Croatia + hr_ba = 0x101A, // Croatian (Latin), Bosnia and Herzegovina + cs = 0x0005, // Czech + cs_cz = 0x0405, // Czech, Czech Republic + da = 0x0006, // Danish + da_dk = 0x0406, // Danish, Denmark + prs = 0x008C, // Dari + prs_af = 0x048C, // Dari, Afghanistan + dv = 0x0065, // Divehi + dv_mv = 0x0465, // Divehi, Maldives + nl = 0x0013, // Dutch + nl_be = 0x0813, // Dutch, Belgium + nl_nl = 0x0413, // Dutch, Netherlands + dz_bt = 0x0C51, // Dzongkha, Bhutan + en = 0x0009, // English + en_au = 0x0C09, // English, Australia + en_bz = 0x2809, // English, Belize + en_ca = 0x1009, // English, Canada + en_029 = 0x2409, // English, Caribbean + en_hk = 0x3C09, // English, Hong Kong + en_in = 0x4009, // English, India + en_ie = 0x1809, // English, Ireland + en_jm = 0x2009, // English, Jamaica + en_my = 0x4409, // English, Malaysia + en_nz = 0x1409, // English, New Zealand + en_ph = 0x3409, // English, Republic of the Philippines + en_sg = 0x4809, // English, Singapore + en_za = 0x1C09, // English, South Africa + en_tt = 0x2c09, // English, Trinidad and Tobago + en_ae = 0x4C09, // English, United Arab Emirates + en_gb = 0x0809, // English, United Kingdom + en_us = 0x0409, // English, United States + en_zw = 0x3009, // English, Zimbabwe + et = 0x0025, // Estonian + et_ee = 0x0425, // Estonian, Estonia + fo = 0x0038, // Faroese + fo_fo = 0x0438, // Faroese, Faroe Islands + fil = 0x0064, // Filipino + fil_ph = 0x0464, // Filipino, Philippines + fi = 0x000B, // Finnish + fi_fi = 0x040B, // Finnish, Finland + fr = 0x000C, // French + fr_be = 0x080C, // French, Belgium + fr_cm = 0x2c0C, // French, Cameroon + fr_ca = 0x0c0C, // French, Canada + fr_029 = 0x1C0C, // French, Caribbean + fr_cd = 0x240C, // French, Congo, DRC + fr_ci = 0x300C, // French, Côte d'Ivoire + fr_fr = 0x040C, // French, France + fr_ht = 0x3c0C, // French, Haiti + fr_lu = 0x140C, // French, Luxembourg + fr_ml = 0x340C, // French, Mali + fr_ma = 0x380C, // French, Morocco + fr_mc = 0x180C, // French, Principality of Monaco + fr_re = 0x200C, // French, Reunion + fr_sn = 0x280C, // French, Senegal + fr_ch = 0x100C, // French, Switzerland + fy = 0x0062, // Frisian + fy_nl = 0x0462, // Frisian, Netherlands + ff = 0x0067, // Fulah + ff_latn = 0x7C67, // Fulah (Latin) + ff_ng = 0x0467, // Fulah, Nigeria + ff_latn_sn = 0x0867, // Fulah, Senegal + gl = 0x0056, // Galician + gl_es = 0x0456, // Galician, Spain + ka = 0x0037, // Georgian + ka_ge = 0x0437, // Georgian, Georgia + de = 0x0007, // German + de_at = 0x0C07, // German, Austria + de_de = 0x0407, // German, Germany + de_li = 0x1407, // German, Liechtenstein + de_lu = 0x1007, // German, Luxembourg + de_ch = 0x0807, // German, Switzerland + el = 0x0008, // Greek + el_gr = 0x0408, // Greek, Greece + kl = 0x006F, // Greenlandic + kl_gl = 0x046F, // Greenlandic, Greenland + gn = 0x0074, // Guarani + gn_py = 0x0474, // Guarani, Paraguay + gu = 0x0047, // Gujarati + gu_in = 0x0447, // Gujarati, India + ha = 0x0068, // Hausa (Latin) + ha_latn = 0x7C68, // Hausa (Latin) + ha_latn_ng = 0x0468, // Hausa (Latin), Nigeria + haw = 0x0075, // Hawaiian + haw_us = 0x0475, // Hawaiian, United States + he = 0x000D, // Hebrew + he_il = 0x040D, // Hebrew, Israel + hi = 0x0039, // Hindi + hi_in = 0x0439, // Hindi, India + hu = 0x000E, // Hungarian + hu_hu = 0x040E, // Hungarian, Hungary + is = 0x000F, // Icelandic + is_is = 0x040F, // Icelandic, Iceland + ig = 0x0070, // Igbo + ig_ng = 0x0470, // Igbo, Nigeria + id = 0x0021, // Indonesian + id_id = 0x0421, // Indonesian, Indonesia + iu = 0x005D, // Inuktitut (Latin) + iu_latn = 0x7C5D, // Inuktitut (Latin) + iu_latn_ca = 0x085D, // Inuktitut (Latin), Canada + iu_cans = 0x785D, // Inuktitut (Syllabics) + iu_cans_ca = 0x045d, // Inuktitut (Syllabics), Canada + ga = 0x003C, // Irish + ga_ie = 0x083C, // Irish, Ireland + it = 0x0010, // Italian + it_it = 0x0410, // Italian, Italy + it_ch = 0x0810, // Italian, Switzerland + ja = 0x0011, // Japanese + ja_jp = 0x0411, // Japanese, Japan + kn = 0x004B, // Kannada + kn_in = 0x044B, // Kannada, India + kr_latn_ng = 0x0471, // Kanuri (Latin), Nigeria + ks = 0x0060, // Kashmiri + ks_arab = 0x0460, // Kashmiri, Perso-Arabic + ks_deva_in = 0x0860, // Kashmiri (Devanagari), India + kk = 0x003F, // Kazakh + kk_kz = 0x043F, // Kazakh, Kazakhstan + km = 0x0053, // Khmer + km_kh = 0x0453, // Khmer, Cambodia + quc = 0x0086, // K'iche + quc_latn_gt = 0x0486, // K'iche, Guatemala + rw = 0x0087, // Kinyarwanda + rw_rw = 0x0487, // Kinyarwanda, Rwanda + sw = 0x0041, // Kiswahili + sw_ke = 0x0441, // Kiswahili, Kenya + kok = 0x0057, // Konkani + kok_in = 0x0457, // Konkani, India + ko = 0x0012, // Korean + ko_kr = 0x0412, // Korean, Korea + ky = 0x0040, // Kyrgyz + ky_kg = 0x0440, // Kyrgyz, Kyrgyzstan + lo = 0x0054, // Lao + lo_la = 0x0454, // Lao, Lao P.D.R. + la_va = 0x0476, // Latin, Vatican City + lv = 0x0026, // Latvian + lv_lv = 0x0426, // Latvian, Latvia + lt = 0x0027, // Lithuanian + lt_lt = 0x0427, // Lithuanian, Lithuania + dsb = 0x7C2E, // Lower Sorbian + dsb_de = 0x082E, // Lower Sorbian, Germany + lb = 0x006E, // Luxembourgish + lb_lu = 0x046E, // Luxembourgish, Luxembourg + mk = 0x002F, // Macedonian + mk_mk = 0x042F, // Macedonian, North Macedonia + ms = 0x003E, // Malay + ms_bn = 0x083E, // Malay, Brunei Darussalam + ms_my = 0x043E, // Malay, Malaysia + ml = 0x004C, // Malayalam + ml_in = 0x044C, // Malayalam, India + mt = 0x003A, // Maltese + mt_mt = 0x043A, // Maltese, Malta + mi = 0x0081, // Maori + mi_nz = 0x0481, // Maori, New Zealand + arn = 0x007A, // Mapudungun + arn_cl = 0x047A, // Mapudungun, Chile + mr = 0x004E, // Marathi + mr_in = 0x044E, // Marathi, India + moh = 0x007C, // Mohawk + moh_ca = 0x047C, // Mohawk, Canada + mn = 0x0050, // Mongolian (Cyrillic) + mn_cyrl = 0x7850, // Mongolian (Cyrillic) + mn_mn = 0x0450, // Mongolian (Cyrillic), Mongolia + mn_mong = 0x7C50, // Mongolian (Traditional Mongolian) + mn_mong_cn = 0x0850, // Mongolian (Traditional Mongolian), People's Republic of China + mn_mong_mn = 0x0C50, // Mongolian (Traditional Mongolian), Mongolia + ne = 0x0061, // Nepali + ne_in = 0x0861, // Nepali, India + ne_np = 0x0461, // Nepali, Nepal + no = 0x0014, // Norwegian (Bokmal) + nb = 0x7C14, // Norwegian (Bokmal) + nb_no = 0x0414, // Norwegian (Bokmal), Norway + nn = 0x7814, // Norwegian (Nynorsk) + nn_no = 0x0814, // Norwegian (Nynorsk), Norway + oc = 0x0082, // Occitan + oc_fr = 0x0482, // Occitan, France + @"or" = 0x0048, // Odia + or_in = 0x0448, // Odia, India + om = 0x0072, // Oromo + om_et = 0x0472, // Oromo, Ethiopia + ps = 0x0063, // Pashto + ps_af = 0x0463, // Pashto, Afghanistan + fa = 0x0029, // Persian + fa_ir = 0x0429, // Persian, Iran + pl = 0x0015, // Polish + pl_pl = 0x0415, // Polish, Poland + pt = 0x0016, // Portuguese + pt_br = 0x0416, // Portuguese, Brazil + pt_pt = 0x0816, // Portuguese, Portugal + qps_ploca = 0x05FE, // Pseudo Language, Pseudo locale for east Asian/complex script localization testing + qps_ploc = 0x0501, // Pseudo Language, Pseudo locale used for localization testing + qps_plocm = 0x09FF, // Pseudo Language, Pseudo locale used for localization testing of mirrored locales + pa = 0x0046, // Punjabi + pa_arab = 0x7C46, // Punjabi + pa_in = 0x0446, // Punjabi, India + pa_arab_pk = 0x0846, // Punjabi, Islamic Republic of Pakistan + quz = 0x006B, // Quechua + quz_bo = 0x046B, // Quechua, Bolivia + quz_ec = 0x086B, // Quechua, Ecuador + quz_pe = 0x0C6B, // Quechua, Peru + ro = 0x0018, // Romanian + ro_md = 0x0818, // Romanian, Moldova + ro_ro = 0x0418, // Romanian, Romania + rm = 0x0017, // Romansh + rm_ch = 0x0417, // Romansh, Switzerland + ru = 0x0019, // Russian + ru_md = 0x0819, // Russian, Moldova + ru_ru = 0x0419, // Russian, Russia + sah = 0x0085, // Sakha + sah_ru = 0x0485, // Sakha, Russia + smn = 0x703B, // Sami (Inari) + smn_fi = 0x243B, // Sami (Inari), Finland + smj = 0x7C3B, // Sami (Lule) + smj_no = 0x103B, // Sami (Lule), Norway + smj_se = 0x143B, // Sami (Lule), Sweden + se = 0x003B, // Sami (Northern) + se_fi = 0x0C3B, // Sami (Northern), Finland + se_no = 0x043B, // Sami (Northern), Norway + se_se = 0x083B, // Sami (Northern), Sweden + sms = 0x743B, // Sami (Skolt) + sms_fi = 0x203B, // Sami (Skolt), Finland + sma = 0x783B, // Sami (Southern) + sma_no = 0x183B, // Sami (Southern), Norway + sma_se = 0x1C3B, // Sami (Southern), Sweden + sa = 0x004F, // Sanskrit + sa_in = 0x044F, // Sanskrit, India + gd = 0x0091, // Scottish Gaelic + gd_gb = 0x0491, // Scottish Gaelic, United Kingdom + sr_cyrl = 0x6C1A, // Serbian (Cyrillic) + sr_cyrl_ba = 0x1C1A, // Serbian (Cyrillic), Bosnia and Herzegovina + sr_cyrl_me = 0x301A, // Serbian (Cyrillic), Montenegro + sr_cyrl_rs = 0x281A, // Serbian (Cyrillic), Serbia + sr_cyrl_cs = 0x0C1A, // Serbian (Cyrillic), Serbia and Montenegro (Former) + sr_latn = 0x701A, // Serbian (Latin) + sr = 0x7C1A, // Serbian (Latin) + sr_latn_ba = 0x181A, // Serbian (Latin), Bosnia and Herzegovina + sr_latn_me = 0x2c1A, // Serbian (Latin), Montenegro + sr_latn_rs = 0x241A, // Serbian (Latin), Serbia + sr_latn_cs = 0x081A, // Serbian (Latin), Serbia and Montenegro (Former) + nso = 0x006C, // Sesotho sa Leboa + nso_za = 0x046C, // Sesotho sa Leboa, South Africa + tn = 0x0032, // Setswana + tn_bw = 0x0832, // Setswana, Botswana + tn_za = 0x0432, // Setswana, South Africa + sd = 0x0059, // Sindhi + sd_arab = 0x7C59, // Sindhi + sd_arab_pk = 0x0859, // Sindhi, Islamic Republic of Pakistan + si = 0x005B, // Sinhala + si_lk = 0x045B, // Sinhala, Sri Lanka + sk = 0x001B, // Slovak + sk_sk = 0x041B, // Slovak, Slovakia + sl = 0x0024, // Slovenian + sl_si = 0x0424, // Slovenian, Slovenia + so = 0x0077, // Somali + so_so = 0x0477, // Somali, Somalia + st = 0x0030, // Sotho + st_za = 0x0430, // Sotho, South Africa + es = 0x000A, // Spanish + es_ar = 0x2C0A, // Spanish, Argentina + es_ve = 0x200A, // Spanish, Bolivarian Republic of Venezuela + es_bo = 0x400A, // Spanish, Bolivia + es_cl = 0x340A, // Spanish, Chile + es_co = 0x240A, // Spanish, Colombia + es_cr = 0x140A, // Spanish, Costa Rica + es_cu = 0x5c0A, // Spanish, Cuba + es_do = 0x1c0A, // Spanish, Dominican Republic + es_ec = 0x300A, // Spanish, Ecuador + es_sv = 0x440A, // Spanish, El Salvador + es_gt = 0x100A, // Spanish, Guatemala + es_hn = 0x480A, // Spanish, Honduras + es_419 = 0x580A, // Spanish, Latin America + es_mx = 0x080A, // Spanish, Mexico + es_ni = 0x4C0A, // Spanish, Nicaragua + es_pa = 0x180A, // Spanish, Panama + es_py = 0x3C0A, // Spanish, Paraguay + es_pe = 0x280A, // Spanish, Peru + es_pr = 0x500A, // Spanish, Puerto Rico + es_es_tradnl = 0x040A, // Spanish, Spain + es_es = 0x0c0A, // Spanish, Spain + es_us = 0x540A, // Spanish, United States + es_uy = 0x380A, // Spanish, Uruguay + sv = 0x001D, // Swedish + sv_fi = 0x081D, // Swedish, Finland + sv_se = 0x041D, // Swedish, Sweden + syr = 0x005A, // Syriac + syr_sy = 0x045A, // Syriac, Syria + tg = 0x0028, // Tajik (Cyrillic) + tg_cyrl = 0x7C28, // Tajik (Cyrillic) + tg_cyrl_tj = 0x0428, // Tajik (Cyrillic), Tajikistan + tzm = 0x005F, // Tamazight (Latin) + tzm_latn = 0x7C5F, // Tamazight (Latin) + tzm_latn_dz = 0x085F, // Tamazight (Latin), Algeria + ta = 0x0049, // Tamil + ta_in = 0x0449, // Tamil, India + ta_lk = 0x0849, // Tamil, Sri Lanka + tt = 0x0044, // Tatar + tt_ru = 0x0444, // Tatar, Russia + te = 0x004A, // Telugu + te_in = 0x044A, // Telugu, India + th = 0x001E, // Thai + th_th = 0x041E, // Thai, Thailand + bo = 0x0051, // Tibetan + bo_cn = 0x0451, // Tibetan, People's Republic of China + ti = 0x0073, // Tigrinya + ti_er = 0x0873, // Tigrinya, Eritrea + ti_et = 0x0473, // Tigrinya, Ethiopia + ts = 0x0031, // Tsonga + ts_za = 0x0431, // Tsonga, South Africa + tr = 0x001F, // Turkish + tr_tr = 0x041F, // Turkish, Turkey + tk = 0x0042, // Turkmen + tk_tm = 0x0442, // Turkmen, Turkmenistan + uk = 0x0022, // Ukrainian + uk_ua = 0x0422, // Ukrainian, Ukraine + hsb = 0x002E, // Upper Sorbian + hsb_de = 0x042E, // Upper Sorbian, Germany + ur = 0x0020, // Urdu + ur_in = 0x0820, // Urdu, India + ur_pk = 0x0420, // Urdu, Islamic Republic of Pakistan + ug = 0x0080, // Uyghur + ug_cn = 0x0480, // Uyghur, People's Republic of China + uz_cyrl = 0x7843, // Uzbek (Cyrillic) + uz_cyrl_uz = 0x0843, // Uzbek (Cyrillic), Uzbekistan + uz = 0x0043, // Uzbek (Latin) + uz_latn = 0x7C43, // Uzbek (Latin) + uz_latn_uz = 0x0443, // Uzbek (Latin), Uzbekistan + ca_es_valencia = 0x0803, // Valencian, Spain + ve = 0x0033, // Venda + ve_za = 0x0433, // Venda, South Africa + vi = 0x002A, // Vietnamese + vi_vn = 0x042A, // Vietnamese, Vietnam + cy = 0x0052, // Welsh + cy_gb = 0x0452, // Welsh, United Kingdom + wo = 0x0088, // Wolof + wo_sn = 0x0488, // Wolof, Senegal + xh = 0x0034, // Xhosa + xh_za = 0x0434, // Xhosa, South Africa + ii = 0x0078, // Yi + ii_cn = 0x0478, // Yi, People's Republic of China + yi_001 = 0x043D, // Yiddish, World + yo = 0x006A, // Yoruba + yo_ng = 0x046A, // Yoruba, Nigeria + zu = 0x0035, // Zulu + zu_za = 0x0435, // Zulu, South Africa + + /// Special case + x_iv_mathan = 0x007F, // LANG_INVARIANT, "math alphanumeric sorting" +}; diff --git a/src/resinator/lex.zig b/src/resinator/lex.zig new file mode 100644 index 000000000000..98bb416a7be9 --- /dev/null +++ b/src/resinator/lex.zig @@ -0,0 +1,1104 @@ +//! Expects to be run after the C preprocessor and after `removeComments`. +//! This means that the lexer assumes that: +//! - Splices ('\' at the end of a line) have been handled/collapsed. +//! - Preprocessor directives and macros have been expanded (any remaining should be skipped with the exception of `#pragma code_page`). +//! - All comments have been removed. + +const std = @import("std"); +const ErrorDetails = @import("errors.zig").ErrorDetails; +const columnsUntilTabStop = @import("literals.zig").columnsUntilTabStop; +const code_pages = @import("code_pages.zig"); +const CodePage = code_pages.CodePage; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit; + +const dumpTokensDuringTests = false; + +pub const default_max_string_literal_codepoints = 4097; + +pub const Token = struct { + id: Id, + start: usize, + end: usize, + line_number: usize, + + pub const Id = enum { + literal, + number, + quoted_ascii_string, + quoted_wide_string, + operator, + begin, + end, + comma, + open_paren, + close_paren, + /// This Id is only used for errors, the Lexer will never return one + /// of these from a `next` call. + preprocessor_command, + invalid, + eof, + + pub fn nameForErrorDisplay(self: Id) []const u8 { + return switch (self) { + .literal => "", + .number => "", + .quoted_ascii_string => "", + .quoted_wide_string => "", + .operator => "", + .begin => "<'{' or BEGIN>", + .end => "<'}' or END>", + .comma => ",", + .open_paren => "(", + .close_paren => ")", + .preprocessor_command => "", + .invalid => unreachable, + .eof => "", + }; + } + }; + + pub fn slice(self: Token, buffer: []const u8) []const u8 { + return buffer[self.start..self.end]; + } + + pub fn nameForErrorDisplay(self: Token, buffer: []const u8) []const u8 { + return switch (self.id) { + .eof => self.id.nameForErrorDisplay(), + else => self.slice(buffer), + }; + } + + pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize { + const line_start = maybe_line_start orelse token.getLineStart(source); + + var i: usize = line_start; + var column: usize = 0; + while (i < token.start) : (i += 1) { + const c = source[i]; + switch (c) { + '\t' => column += columnsUntilTabStop(column, tab_columns), + else => column += 1, + } + } + return column; + } + + // TODO: This doesn't necessarily match up with how we count line numbers, but where a line starts + // has a knock-on effect on calculateColumn. More testing is needed to determine what needs + // to be changed to make this both (1) match how line numbers are counted and (2) match how + // the Win32 RC compiler counts tab columns. + // + // (the TODO in currentIndexFormsLineEndingPair should be taken into account as well) + pub fn getLineStart(token: Token, source: []const u8) usize { + const line_start = line_start: { + if (token.start != 0) { + // start checking at the byte before the token + var index = token.start - 1; + while (true) { + if (source[index] == '\n') break :line_start @min(source.len - 1, index + 1); + if (index != 0) index -= 1 else break; + } + } + break :line_start 0; + }; + return line_start; + } + + pub fn getLine(token: Token, source: []const u8, maybe_line_start: ?usize) []const u8 { + const line_start = maybe_line_start orelse token.getLineStart(source); + + var line_end = line_start + 1; + while (line_end < source.len and source[line_end] != '\n') : (line_end += 1) {} + while (line_end > 0 and source[line_end - 1] == '\r') : (line_end -= 1) {} + + return source[line_start..line_end]; + } + + pub fn isStringLiteral(token: Token) bool { + return token.id == .quoted_ascii_string or token.id == .quoted_wide_string; + } +}; + +pub const LineHandler = struct { + line_number: usize = 1, + buffer: []const u8, + last_line_ending_index: ?usize = null, + + /// Like incrementLineNumber but checks that the current char is a line ending first. + /// Returns the new line number if it was incremented, null otherwise. + pub fn maybeIncrementLineNumber(self: *LineHandler, cur_index: usize) ?usize { + const c = self.buffer[cur_index]; + if (c == '\r' or c == '\n') { + return self.incrementLineNumber(cur_index); + } + return null; + } + + /// Increments line_number appropriately (handling line ending pairs) + /// and returns the new line number if it was incremented, or null otherwise. + pub fn incrementLineNumber(self: *LineHandler, cur_index: usize) ?usize { + if (self.currentIndexFormsLineEndingPair(cur_index)) { + self.last_line_ending_index = null; + return null; + } else { + self.line_number += 1; + self.last_line_ending_index = cur_index; + return self.line_number; + } + } + + /// \r\n and \n\r pairs are treated as a single line ending (but not \r\r \n\n) + /// expects self.index and last_line_ending_index (if non-null) to contain line endings + /// + /// TODO: This is not really how the Win32 RC compiler handles line endings. Instead, it + /// seems to drop all carriage returns during preprocessing and then replace all + /// remaining line endings with well-formed CRLF pairs (e.g. `abc` becomes `abc`). + /// Handling this the same as the Win32 RC compiler would need control over the preprocessor, + /// since Clang converts unpaired into unpaired . + pub fn currentIndexFormsLineEndingPair(self: *const LineHandler, cur_index: usize) bool { + if (self.last_line_ending_index == null) return false; + + // must immediately precede the current index, we know cur_index must + // be >= 1 since last_line_ending_index is non-null (so if the subtraction + // overflows it is a bug at the callsite of this function). + if (self.last_line_ending_index.? != cur_index - 1) return false; + + const cur_line_ending = self.buffer[cur_index]; + const last_line_ending = self.buffer[self.last_line_ending_index.?]; + + // sanity check + std.debug.assert(cur_line_ending == '\r' or cur_line_ending == '\n'); + std.debug.assert(last_line_ending == '\r' or last_line_ending == '\n'); + + // can't be \n\n or \r\r + if (last_line_ending == cur_line_ending) return false; + + return true; + } +}; + +pub const LexError = error{ + UnfinishedStringLiteral, + StringLiteralTooLong, + InvalidNumberWithExponent, + InvalidDigitCharacterInNumberLiteral, + IllegalByte, + IllegalByteOutsideStringLiterals, + IllegalCodepointOutsideStringLiterals, + IllegalByteOrderMark, + IllegalPrivateUseCharacter, + FoundCStyleEscapedQuote, + CodePagePragmaMissingLeftParen, + CodePagePragmaMissingRightParen, + /// Can be caught and ignored + CodePagePragmaInvalidCodePage, + CodePagePragmaNotInteger, + CodePagePragmaOverflow, + CodePagePragmaUnsupportedCodePage, + /// Can be caught and ignored + CodePagePragmaInIncludedFile, +}; + +pub const Lexer = struct { + const Self = @This(); + + buffer: []const u8, + index: usize, + line_handler: LineHandler, + at_start_of_line: bool = true, + error_context_token: ?Token = null, + current_code_page: CodePage, + default_code_page: CodePage, + source_mappings: ?*SourceMappings, + max_string_literal_codepoints: u15, + /// Needed to determine whether or not the output code page should + /// be set in the parser. + seen_pragma_code_pages: u2 = 0, + + pub const Error = LexError; + + pub const LexerOptions = struct { + default_code_page: CodePage = .windows1252, + source_mappings: ?*SourceMappings = null, + max_string_literal_codepoints: u15 = default_max_string_literal_codepoints, + }; + + pub fn init(buffer: []const u8, options: LexerOptions) Self { + return Self{ + .buffer = buffer, + .index = 0, + .current_code_page = options.default_code_page, + .default_code_page = options.default_code_page, + .source_mappings = options.source_mappings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + .line_handler = .{ .buffer = buffer }, + }; + } + + pub fn dump(self: *Self, token: *const Token) void { + std.debug.print("{s}:{d}: {s}\n", .{ @tagName(token.id), token.line_number, std.fmt.fmtSliceEscapeLower(token.slice(self.buffer)) }); + } + + pub const LexMethod = enum { + whitespace_delimiter_only, + normal, + normal_expect_operator, + }; + + pub fn next(self: *Self, comptime method: LexMethod) LexError!Token { + switch (method) { + .whitespace_delimiter_only => return self.nextWhitespaceDelimeterOnly(), + .normal => return self.nextNormal(), + .normal_expect_operator => return self.nextNormalWithContext(.expect_operator), + } + } + + const StateWhitespaceDelimiterOnly = enum { + start, + literal, + preprocessor, + semicolon, + }; + + pub fn nextWhitespaceDelimeterOnly(self: *Self) LexError!Token { + const start_index = self.index; + var result = Token{ + .id = .eof, + .start = start_index, + .end = undefined, + .line_number = self.line_handler.line_number, + }; + var state = StateWhitespaceDelimiterOnly.start; + + while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) { + const c = codepoint.value; + try self.checkForIllegalCodepoint(codepoint, false); + switch (state) { + .start => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + result.line_number = self.incrementLineNumber(); + }, + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.start = self.index + 1; + }, + // NBSP only counts as whitespace at the start of a line (but + // can be intermixed with other whitespace). Who knows why. + '\xA0' => if (self.at_start_of_line) { + result.start = self.index + codepoint.byte_len; + } else { + state = .literal; + self.at_start_of_line = false; + }, + '#' => { + if (self.at_start_of_line) { + state = .preprocessor; + } else { + state = .literal; + } + self.at_start_of_line = false; + }, + // Semi-colon acts as a line-terminator, but in this lexing mode + // that's only true if it's at the start of a line. + ';' => { + if (self.at_start_of_line) { + state = .semicolon; + } + self.at_start_of_line = false; + }, + else => { + state = .literal; + self.at_start_of_line = false; + }, + }, + .literal => switch (c) { + '\r', '\n', ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.id = .literal; + break; + }, + else => {}, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + .semicolon => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + } + } else { // got EOF + switch (state) { + .start, .semicolon => {}, + .literal => { + result.id = .literal; + }, + .preprocessor => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index; + }, + } + } + + result.end = self.index; + return result; + } + + const StateNormal = enum { + start, + literal_or_quoted_wide_string, + quoted_ascii_string, + quoted_wide_string, + quoted_ascii_string_escape, + quoted_wide_string_escape, + quoted_ascii_string_maybe_end, + quoted_wide_string_maybe_end, + literal, + number_literal, + preprocessor, + semicolon, + // end + e, + en, + // begin + b, + be, + beg, + begi, + }; + + /// TODO: A not-terrible name + pub fn nextNormal(self: *Self) LexError!Token { + return self.nextNormalWithContext(.any); + } + + pub fn nextNormalWithContext(self: *Self, context: enum { expect_operator, any }) LexError!Token { + const start_index = self.index; + var result = Token{ + .id = .eof, + .start = start_index, + .end = undefined, + .line_number = self.line_handler.line_number, + }; + var state = StateNormal.start; + + // Note: The Windows RC compiler uses a non-standard method of computing + // length for its 'string literal too long' errors; it isn't easily + // explained or intuitive (it's sort-of pre-parsed byte length but with + // a few of exceptions/edge cases). + // + // It also behaves strangely with non-ASCII codepoints, e.g. even though the default + // limit is 4097, you can only have 4094 € codepoints (1 UTF-16 code unit each), + // and 2048 𐐷 codepoints (2 UTF-16 code units each). + // + // TODO: Understand this more, bring it more in line with how the Win32 limits work. + // Alternatively, do something that makes more sense but may be more permissive. + var string_literal_length: usize = 0; + var string_literal_collapsing_whitespace: bool = false; + var still_could_have_exponent: bool = true; + var exponent_index: ?usize = null; + while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) { + const c = codepoint.value; + const in_string_literal = switch (state) { + .quoted_ascii_string, + .quoted_wide_string, + .quoted_ascii_string_escape, + .quoted_wide_string_escape, + .quoted_ascii_string_maybe_end, + .quoted_wide_string_maybe_end, + => + // If the current line is not the same line as the start of the string literal, + // then we want to treat the current codepoint as 'not in a string literal' + // for the purposes of detecting illegal codepoints. This means that we will + // error on illegal-outside-string-literal characters that are outside string + // literals from the perspective of a C preprocessor, but that may be + // inside string literals from the perspective of the RC lexer. For example, + // "hello + // @" + // will be treated as a single string literal by the RC lexer but the Win32 + // preprocessor will consider this an unclosed string literal followed by + // the character @ and ", and will therefore error since the Win32 RC preprocessor + // errors on the @ character outside string literals. + // + // By doing this here, we can effectively emulate the Win32 RC preprocessor behavior + // at lex-time, and avoid the need for a separate step that checks for this edge-case + // specifically. + result.line_number == self.line_handler.line_number, + else => false, + }; + try self.checkForIllegalCodepoint(codepoint, in_string_literal); + switch (state) { + .start => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + result.line_number = self.incrementLineNumber(); + }, + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.start = self.index + 1; + }, + // NBSP only counts as whitespace at the start of a line (but + // can be intermixed with other whitespace). Who knows why. + '\xA0' => if (self.at_start_of_line) { + result.start = self.index + codepoint.byte_len; + } else { + state = .literal; + self.at_start_of_line = false; + }, + 'L', 'l' => { + state = .literal_or_quoted_wide_string; + self.at_start_of_line = false; + }, + 'E', 'e' => { + state = .e; + self.at_start_of_line = false; + }, + 'B', 'b' => { + state = .b; + self.at_start_of_line = false; + }, + '"' => { + state = .quoted_ascii_string; + self.at_start_of_line = false; + string_literal_collapsing_whitespace = false; + string_literal_length = 0; + }, + '+', '&', '|' => { + self.index += 1; + result.id = .operator; + self.at_start_of_line = false; + break; + }, + '-' => { + if (context == .expect_operator) { + self.index += 1; + result.id = .operator; + self.at_start_of_line = false; + break; + } else { + state = .number_literal; + still_could_have_exponent = true; + exponent_index = null; + self.at_start_of_line = false; + } + }, + '0'...'9', '~' => { + state = .number_literal; + still_could_have_exponent = true; + exponent_index = null; + self.at_start_of_line = false; + }, + '#' => { + if (self.at_start_of_line) { + state = .preprocessor; + } else { + state = .literal; + } + self.at_start_of_line = false; + }, + ';' => { + state = .semicolon; + self.at_start_of_line = false; + }, + '{', '}' => { + self.index += 1; + result.id = if (c == '{') .begin else .end; + self.at_start_of_line = false; + break; + }, + '(', ')' => { + self.index += 1; + result.id = if (c == '(') .open_paren else .close_paren; + self.at_start_of_line = false; + break; + }, + ',' => { + self.index += 1; + result.id = .comma; + self.at_start_of_line = false; + break; + }, + else => { + if (isNonAsciiDigit(c)) { + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidDigitCharacterInNumberLiteral; + } + state = .literal; + self.at_start_of_line = false; + }, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + // Semi-colon acts as a line-terminator--everything is skipped until + // the next line. + .semicolon => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + .number_literal => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + => { + // zig fmt: on + result.id = .number; + break; + }, + '0'...'9' => { + if (exponent_index) |exp_i| { + if (self.index - 1 == exp_i) { + // Note: This being an error is a quirk of the preprocessor used by + // the Win32 RC compiler. + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidNumberWithExponent; + } + } + }, + 'e', 'E' => { + if (still_could_have_exponent) { + exponent_index = self.index; + still_could_have_exponent = false; + } + }, + else => { + if (isNonAsciiDigit(c)) { + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidDigitCharacterInNumberLiteral; + } + still_could_have_exponent = false; + }, + }, + .literal_or_quoted_wide_string => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + // zig fmt: on + => { + result.id = .literal; + break; + }, + '"' => { + state = .quoted_wide_string; + string_literal_collapsing_whitespace = false; + string_literal_length = 0; + }, + else => { + state = .literal; + }, + }, + .literal => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + => { + // zig fmt: on + result.id = .literal; + break; + }, + else => {}, + }, + .e => switch (c) { + 'N', 'n' => { + state = .en; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .en => switch (c) { + 'D', 'd' => { + result.id = .end; + self.index += 1; + break; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .b => switch (c) { + 'E', 'e' => { + state = .be; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .be => switch (c) { + 'G', 'g' => { + state = .beg; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .beg => switch (c) { + 'I', 'i' => { + state = .begi; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .begi => switch (c) { + 'N', 'n' => { + result.id = .begin; + self.index += 1; + break; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .quoted_ascii_string, .quoted_wide_string => switch (c) { + '"' => { + state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end; + }, + '\\' => { + state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape; + }, + '\r' => { + // \r doesn't count towards string literal length + + // Increment line number but don't affect the result token's line number + _ = self.incrementLineNumber(); + }, + '\n' => { + // first \n expands to <\n> + if (!string_literal_collapsing_whitespace) { + string_literal_length += 2; + string_literal_collapsing_whitespace = true; + } + // the rest are collapsed into the <\n> + + // Increment line number but don't affect the result token's line number + _ = self.incrementLineNumber(); + }, + // only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing + '\t', ' ', '\x0b', '\x0c' => { + if (!string_literal_collapsing_whitespace) { + if (c == '\t') { + // Literal tab characters are counted as the number of space characters + // needed to reach the next 8-column tab stop. + // + // This implemention is ineffecient but hopefully it's enough of an + // edge case that it doesn't matter too much. Literal tab characters in + // string literals being replaced by a variable number of spaces depending + // on which column the tab character is located in the source .rc file seems + // like it has extremely limited use-cases, so it seems unlikely that it's used + // in real .rc files. + var dummy_token = Token{ + .start = self.index, + .end = self.index, + .line_number = self.line_handler.line_number, + .id = .invalid, + }; + dummy_token.start = self.index; + const current_column = dummy_token.calculateColumn(self.buffer, 8, null); + string_literal_length += columnsUntilTabStop(current_column, 8); + } else { + string_literal_length += 1; + } + } + }, + else => { + string_literal_collapsing_whitespace = false; + string_literal_length += 1; + }, + }, + .quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) { + '"' => { + self.error_context_token = .{ + .id = .invalid, + .start = self.index - 1, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.FoundCStyleEscapedQuote; + }, + else => { + state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string; + }, + }, + .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) { + '"' => { + state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + // Escaped quotes only count as 1 char for string literal length checks, + // so we don't increment string_literal_length here. + }, + else => { + result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + break; + }, + }, + } + } else { // got EOF + switch (state) { + .start, .semicolon => {}, + .literal_or_quoted_wide_string, .literal, .e, .en, .b, .be, .beg, .begi => { + result.id = .literal; + }, + .preprocessor => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index; + }, + .number_literal => { + result.id = .number; + }, + .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => { + result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + }, + .quoted_ascii_string, + .quoted_wide_string, + .quoted_ascii_string_escape, + .quoted_wide_string_escape, + => { + self.error_context_token = .{ + .id = .eof, + .start = self.index, + .end = self.index, + .line_number = self.line_handler.line_number, + }; + return LexError.UnfinishedStringLiteral; + }, + } + } + + if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) { + if (string_literal_length > self.max_string_literal_codepoints) { + self.error_context_token = result; + return LexError.StringLiteralTooLong; + } + } + + result.end = self.index; + return result; + } + + /// Increments line_number appropriately (handling line ending pairs) + /// and returns the new line number. + fn incrementLineNumber(self: *Self) usize { + _ = self.line_handler.incrementLineNumber(self.index); + self.at_start_of_line = true; + return self.line_handler.line_number; + } + + fn checkForIllegalCodepoint(self: *Self, codepoint: code_pages.Codepoint, in_string_literal: bool) LexError!void { + const err = switch (codepoint.value) { + // 0x00 = NUL + // 0x1A = Substitute (treated as EOF) + // NOTE: 0x1A gets treated as EOF by the clang preprocessor so after a .rc file + // is run through the clang preprocessor it will no longer have 0x1A characters in it. + // 0x7F = DEL (treated as a context-specific terminator by the Windows RC compiler) + 0x00, 0x1A, 0x7F => error.IllegalByte, + // 0x01...0x03 result in strange 'macro definition too big' errors when used outside of string literals + // 0x04 is valid but behaves strangely (sort of acts as a 'skip the next character' instruction) + 0x01...0x04 => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return, + // @ and ` both result in error RC2018: unknown character '0x60' (and subsequently + // fatal error RC1116: RC terminating after preprocessor errors) if they are ever used + // outside of string literals. Not exactly sure why this would be the case, though. + // TODO: Make sure there aren't any exceptions + '@', '`' => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return, + // The Byte Order Mark is mostly skipped over by the Windows RC compiler, but + // there are edge cases where it leads to cryptic 'compiler limit : macro definition too big' + // errors (e.g. a BOM within a number literal). By making this illegal we avoid having to + // deal with a lot of edge cases and remove the potential footgun of the bytes of a BOM + // being 'missing' when included in a string literal (the Windows RC compiler acts as + // if the codepoint was never part of the string literal). + '\u{FEFF}' => error.IllegalByteOrderMark, + // Similar deal with this private use codepoint, it gets skipped/ignored by the + // RC compiler (but without the cryptic errors). Silently dropping bytes still seems like + // enough of a footgun with no real use-cases that it's still worth erroring instead of + // emulating the RC compiler's behavior, though. + '\u{E000}' => error.IllegalPrivateUseCharacter, + // These codepoints lead to strange errors when used outside of string literals, + // and miscompilations when used within string literals. We avoid the miscompilation + // within string literals and emit a warning, but outside of string literals it makes + // more sense to just disallow these codepoints. + 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => if (!in_string_literal) error.IllegalCodepointOutsideStringLiterals else return, + else => return, + }; + self.error_context_token = .{ + .id = .invalid, + .start = self.index, + .end = self.index + codepoint.byte_len, + .line_number = self.line_handler.line_number, + }; + return err; + } + + fn evaluatePreprocessorCommand(self: *Self, start: usize, end: usize) !void { + const token = Token{ + .id = .preprocessor_command, + .start = start, + .end = end, + .line_number = self.line_handler.line_number, + }; + const full_command = self.buffer[start..end]; + var command = full_command; + + // Anything besides exactly this is ignored by the Windows RC implementation + const expected_directive = "#pragma"; + if (!std.mem.startsWith(u8, command, expected_directive)) return; + command = command[expected_directive.len..]; + + if (command.len == 0 or !std.ascii.isWhitespace(command[0])) return; + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + // Note: CoDe_PaGeZ is also treated as "code_page" by the Windows RC implementation, + // and it will error with 'Missing left parenthesis in code_page #pragma' + const expected_extension = "code_page"; + if (!std.ascii.startsWithIgnoreCase(command, expected_extension)) return; + command = command[expected_extension.len..]; + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + if (command.len == 0 or command[0] != '(') { + self.error_context_token = token; + return error.CodePagePragmaMissingLeftParen; + } + command = command[1..]; + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + var num_str: []u8 = command[0..0]; + while (command.len > 0 and (command[0] != ')' and !std.ascii.isWhitespace(command[0]))) { + command = command[1..]; + num_str.len += 1; + } + + if (num_str.len == 0) { + self.error_context_token = token; + return error.CodePagePragmaNotInteger; + } + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + if (command.len == 0 or command[0] != ')') { + self.error_context_token = token; + return error.CodePagePragmaMissingRightParen; + } + + const code_page = code_page: { + if (std.ascii.eqlIgnoreCase("DEFAULT", num_str)) { + break :code_page self.default_code_page; + } + + // The Win32 compiler behaves fairly strangely around maxInt(u32): + // - If the overflowed u32 wraps and becomes a known code page ID, then + // it will error/warn with "Codepage not valid: ignored" (depending on /w) + // - If the overflowed u32 wraps and does not become a known code page ID, + // then it will error with 'constant too big' and 'Codepage not integer' + // + // Instead of that, we just have a separate error specifically for overflow. + const num = parseCodePageNum(num_str) catch |err| switch (err) { + error.InvalidCharacter => { + self.error_context_token = token; + return error.CodePagePragmaNotInteger; + }, + error.Overflow => { + self.error_context_token = token; + return error.CodePagePragmaOverflow; + }, + }; + + // Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252 + if (num_str[0] == '0' and num != 0) { + self.error_context_token = token; + return error.CodePagePragmaInvalidCodePage; + } + // Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation. + else if (num == 0) { + self.error_context_token = token; + return error.CodePagePragmaNotInteger; + } + // Anything above u16 max is not going to be found since our CodePage enum is backed by a u16. + if (num > std.math.maxInt(u16)) { + self.error_context_token = token; + return error.CodePagePragmaInvalidCodePage; + } + + break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) { + error.InvalidCodePage => { + self.error_context_token = token; + return error.CodePagePragmaInvalidCodePage; + }, + error.UnsupportedCodePage => { + self.error_context_token = token; + return error.CodePagePragmaUnsupportedCodePage; + }, + }; + }; + + // https://learn.microsoft.com/en-us/windows/win32/menurc/pragma-directives + // > This pragma is not supported in an included resource file (.rc) + // + // Even though the Win32 behavior is to just ignore such directives silently, + // this is an error in the lexer to allow for emitting warnings/errors when + // such directives are found if that's wanted. The intention is for the lexer + // to still be able to work correctly after this error is returned. + if (self.source_mappings) |source_mappings| { + if (!source_mappings.isRootFile(token.line_number)) { + self.error_context_token = token; + return error.CodePagePragmaInIncludedFile; + } + } + + self.seen_pragma_code_pages +|= 1; + self.current_code_page = code_page; + } + + fn parseCodePageNum(str: []const u8) !u32 { + var x: u32 = 0; + for (str) |c| { + const digit = try std.fmt.charToDigit(c, 10); + if (x != 0) x = try std.math.mul(u32, x, 10); + x = try std.math.add(u32, x, digit); + } + return x; + } + + pub fn getErrorDetails(self: Self, lex_err: LexError) ErrorDetails { + const err = switch (lex_err) { + error.UnfinishedStringLiteral => ErrorDetails.Error.unfinished_string_literal, + error.StringLiteralTooLong => return .{ + .err = .string_literal_too_long, + .token = self.error_context_token.?, + .extra = .{ .number = self.max_string_literal_codepoints }, + }, + error.InvalidNumberWithExponent => ErrorDetails.Error.invalid_number_with_exponent, + error.InvalidDigitCharacterInNumberLiteral => ErrorDetails.Error.invalid_digit_character_in_number_literal, + error.IllegalByte => ErrorDetails.Error.illegal_byte, + error.IllegalByteOutsideStringLiterals => ErrorDetails.Error.illegal_byte_outside_string_literals, + error.IllegalCodepointOutsideStringLiterals => ErrorDetails.Error.illegal_codepoint_outside_string_literals, + error.IllegalByteOrderMark => ErrorDetails.Error.illegal_byte_order_mark, + error.IllegalPrivateUseCharacter => ErrorDetails.Error.illegal_private_use_character, + error.FoundCStyleEscapedQuote => ErrorDetails.Error.found_c_style_escaped_quote, + error.CodePagePragmaMissingLeftParen => ErrorDetails.Error.code_page_pragma_missing_left_paren, + error.CodePagePragmaMissingRightParen => ErrorDetails.Error.code_page_pragma_missing_right_paren, + error.CodePagePragmaInvalidCodePage => ErrorDetails.Error.code_page_pragma_invalid_code_page, + error.CodePagePragmaNotInteger => ErrorDetails.Error.code_page_pragma_not_integer, + error.CodePagePragmaOverflow => ErrorDetails.Error.code_page_pragma_overflow, + error.CodePagePragmaUnsupportedCodePage => ErrorDetails.Error.code_page_pragma_unsupported_code_page, + error.CodePagePragmaInIncludedFile => ErrorDetails.Error.code_page_pragma_in_included_file, + }; + return .{ + .err = err, + .token = self.error_context_token.?, + }; + } +}; + +fn testLexNormal(source: []const u8, expected_tokens: []const Token.Id) !void { + var lexer = Lexer.init(source, .{}); + if (dumpTokensDuringTests) std.debug.print("\n----------------------\n{s}\n----------------------\n", .{lexer.buffer}); + for (expected_tokens) |expected_token_id| { + const token = try lexer.nextNormal(); + if (dumpTokensDuringTests) lexer.dump(&token); + try std.testing.expectEqual(expected_token_id, token.id); + } + const last_token = try lexer.nextNormal(); + try std.testing.expectEqual(Token.Id.eof, last_token.id); +} + +fn expectLexError(expected: LexError, actual: anytype) !void { + try std.testing.expectError(expected, actual); + if (dumpTokensDuringTests) std.debug.print("{!}\n", .{actual}); +} + +test "normal: numbers" { + try testLexNormal("1", &.{.number}); + try testLexNormal("-1", &.{.number}); + try testLexNormal("- 1", &.{ .number, .number }); + try testLexNormal("-a", &.{.number}); +} + +test "normal: string literals" { + try testLexNormal("\"\"", &.{.quoted_ascii_string}); + // "" is an escaped " + try testLexNormal("\" \"\" \"", &.{.quoted_ascii_string}); +} + +test "superscript chars and code pages" { + const firstToken = struct { + pub fn firstToken(source: []const u8, default_code_page: CodePage, comptime lex_method: Lexer.LexMethod) LexError!Token { + var lexer = Lexer.init(source, .{ .default_code_page = default_code_page }); + return lexer.next(lex_method); + } + }.firstToken; + const utf8_source = "²"; + const windows1252_source = "\xB2"; + + const windows1252_encoded_as_windows1252 = firstToken(windows1252_source, .windows1252, .normal); + try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, windows1252_encoded_as_windows1252); + + const utf8_encoded_as_windows1252 = try firstToken(utf8_source, .windows1252, .normal); + try std.testing.expectEqual(Token{ + .id = .literal, + .start = 0, + .end = 2, + .line_number = 1, + }, utf8_encoded_as_windows1252); + + const utf8_encoded_as_utf8 = firstToken(utf8_source, .utf8, .normal); + try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, utf8_encoded_as_utf8); + + const windows1252_encoded_as_utf8 = try firstToken(windows1252_source, .utf8, .normal); + try std.testing.expectEqual(Token{ + .id = .literal, + .start = 0, + .end = 1, + .line_number = 1, + }, windows1252_encoded_as_utf8); +} diff --git a/src/resinator/literals.zig b/src/resinator/literals.zig new file mode 100644 index 000000000000..1d5258455bb2 --- /dev/null +++ b/src/resinator/literals.zig @@ -0,0 +1,904 @@ +const std = @import("std"); +const code_pages = @import("code_pages.zig"); +const CodePage = code_pages.CodePage; +const windows1252 = @import("windows1252.zig"); +const ErrorDetails = @import("errors.zig").ErrorDetails; +const DiagnosticsContext = @import("errors.zig").DiagnosticsContext; +const Token = @import("lex.zig").Token; + +/// rc is maximally liberal in terms of what it accepts as a number literal +/// for data values. As long as it starts with a number or - or ~, that's good enough. +pub fn isValidNumberDataLiteral(str: []const u8) bool { + if (str.len == 0) return false; + switch (str[0]) { + '~', '-', '0'...'9' => return true, + else => return false, + } +} + +pub const SourceBytes = struct { + slice: []const u8, + code_page: CodePage, +}; + +pub const StringType = enum { ascii, wide }; + +/// Valid escapes: +/// "" -> " +/// \a, \A => 0x08 (not 0x07 like in C) +/// \n => 0x0A +/// \r => 0x0D +/// \t, \T => 0x09 +/// \\ => \ +/// \nnn => byte with numeric value given by nnn interpreted as octal +/// (wraps on overflow, number of digits can be 1-3 for ASCII strings +/// and 1-7 for wide strings) +/// \xhh => byte with numeric value given by hh interpreted as hex +/// (number of digits can be 0-2 for ASCII strings and 0-4 for +/// wide strings) +/// \<\r+> => \ +/// \<[\r\n\t ]+> => +/// +/// Special cases: +/// <\t> => 1-8 spaces, dependent on columns in the source rc file itself +/// <\r> => +/// <\n+><\w+?\n?> => <\n> +/// +/// Special, especially weird case: +/// \"" => " +/// NOTE: This leads to footguns because the preprocessor can start parsing things +/// out-of-sync with the RC compiler, expanding macros within string literals, etc. +/// This parse function handles this case the same as the Windows RC compiler, but +/// \" within a string literal is treated as an error by the lexer, so the relevant +/// branches should never actually be hit during this function. +pub const IterativeStringParser = struct { + source: []const u8, + code_page: CodePage, + /// The type of the string inferred by the prefix (L"" or "") + /// This is what matters for things like the maximum digits in an + /// escape sequence, whether or not invalid escape sequences are skipped, etc. + declared_string_type: StringType, + pending_codepoint: ?u21 = null, + num_pending_spaces: u8 = 0, + index: usize = 0, + column: usize = 0, + diagnostics: ?DiagnosticsContext = null, + seen_tab: bool = false, + + const State = enum { + normal, + quote, + newline, + escaped, + escaped_cr, + escaped_newlines, + escaped_octal, + escaped_hex, + }; + + pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser { + const declared_string_type: StringType = switch (bytes.slice[0]) { + 'L', 'l' => .wide, + else => .ascii, + }; + var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove "" + var column = options.start_column + 1; // for the removed " + if (declared_string_type == .wide) { + source = source[1..]; // remove L + column += 1; // for the removed L + } + return .{ + .source = source, + .code_page = bytes.code_page, + .declared_string_type = declared_string_type, + .column = column, + .diagnostics = options.diagnostics, + }; + } + + pub const ParsedCodepoint = struct { + codepoint: u21, + from_escaped_integer: bool = false, + }; + + pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint { + const result = try self.nextUnchecked(); + if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) { + switch (result.?.codepoint) { + 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => { + const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00) + .rc_would_miscompile_codepoint_skip + else + .rc_would_miscompile_codepoint_byte_swap; + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = err, + .type = .warning, + .token = self.diagnostics.?.token, + .extra = .{ .number = result.?.codepoint }, + }); + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = err, + .type = .note, + .token = self.diagnostics.?.token, + .print_source_line = false, + .extra = .{ .number = result.?.codepoint }, + }); + }, + else => {}, + } + } + return result; + } + + pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint { + if (self.num_pending_spaces > 0) { + // Ensure that we don't get into this predicament so we can ensure that + // the order of processing any pending stuff doesn't matter + std.debug.assert(self.pending_codepoint == null); + self.num_pending_spaces -= 1; + return .{ .codepoint = ' ' }; + } + if (self.pending_codepoint) |pending_codepoint| { + self.pending_codepoint = null; + return .{ .codepoint = pending_codepoint }; + } + if (self.index >= self.source.len) return null; + + var state: State = .normal; + var string_escape_n: u16 = 0; + var string_escape_i: u8 = 0; + const max_octal_escape_digits: u8 = switch (self.declared_string_type) { + .ascii => 3, + .wide => 7, + }; + const max_hex_escape_digits: u8 = switch (self.declared_string_type) { + .ascii => 2, + .wide => 4, + }; + + while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : (self.index += codepoint.byte_len) { + const c = codepoint.value; + var backtrack = false; + defer { + if (backtrack) { + self.index -= codepoint.byte_len; + } else { + if (c == '\t') { + self.column += columnsUntilTabStop(self.column, 8); + } else { + self.column += codepoint.byte_len; + } + } + } + switch (state) { + .normal => switch (c) { + '\\' => state = .escaped, + '"' => state = .quote, + '\r' => {}, + '\n' => state = .newline, + '\t' => { + // Only warn about a tab getting converted to spaces once per string + if (self.diagnostics != null and !self.seen_tab) { + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = .tab_converted_to_spaces, + .type = .warning, + .token = self.diagnostics.?.token, + }); + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = .tab_converted_to_spaces, + .type = .note, + .token = self.diagnostics.?.token, + .print_source_line = false, + }); + self.seen_tab = true; + } + const cols = columnsUntilTabStop(self.column, 8); + self.num_pending_spaces = @intCast(cols - 1); + self.index += codepoint.byte_len; + return .{ .codepoint = ' ' }; + }, + else => { + self.index += codepoint.byte_len; + return .{ .codepoint = c }; + }, + }, + .quote => switch (c) { + '"' => { + // "" => " + self.index += codepoint.byte_len; + return .{ .codepoint = '"' }; + }, + else => unreachable, // this is a bug in the lexer + }, + .newline => switch (c) { + '\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {}, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + // + self.index += codepoint.byte_len; + self.pending_codepoint = '\n'; + return .{ .codepoint = ' ' }; + }, + }, + .escaped => switch (c) { + '\r' => state = .escaped_cr, + '\n' => state = .escaped_newlines, + '0'...'7' => { + string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable; + string_escape_i = 1; + state = .escaped_octal; + }, + 'x', 'X' => { + string_escape_n = 0; + string_escape_i = 0; + state = .escaped_hex; + }, + else => { + switch (c) { + 'a', 'A' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\x08' }; + }, // might be a bug in RC, but matches its behavior + 'n' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\n' }; + }, + 'r' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\r' }; + }, + 't', 'T' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\t' }; + }, + '\\' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\\' }; + }, + '"' => { + // \" is a special case that doesn't get the \ included, + backtrack = true; + }, + else => switch (self.declared_string_type) { + .wide => {}, // invalid escape sequences are skipped in wide strings + .ascii => { + // backtrack so that we handle the current char properly + backtrack = true; + self.index += codepoint.byte_len; + return .{ .codepoint = '\\' }; + }, + }, + } + state = .normal; + }, + }, + .escaped_cr => switch (c) { + '\r' => {}, + '\n' => state = .escaped_newlines, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + self.index += codepoint.byte_len; + return .{ .codepoint = '\\' }; + }, + }, + .escaped_newlines => switch (c) { + '\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {}, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + state = .normal; + }, + }, + .escaped_octal => switch (c) { + '0'...'7' => { + string_escape_n *%= 8; + string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable; + string_escape_i += 1; + if (string_escape_i == max_octal_escape_digits) { + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + self.index += codepoint.byte_len; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + } + }, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + // write out whatever byte we have parsed so far + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + self.index += codepoint.byte_len; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + }, + }, + .escaped_hex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + string_escape_n *= 16; + string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable; + string_escape_i += 1; + if (string_escape_i == max_hex_escape_digits) { + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + self.index += codepoint.byte_len; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + } + }, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + // write out whatever byte we have parsed so far + // (even with 0 actual digits, \x alone parses to 0) + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + self.index += codepoint.byte_len; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + }, + }, + } + } + + switch (state) { + .normal, .escaped_newlines => {}, + .newline => { + // + self.pending_codepoint = '\n'; + return .{ .codepoint = ' ' }; + }, + .escaped, .escaped_cr => return .{ .codepoint = '\\' }, + .escaped_octal, .escaped_hex => { + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + }, + .quote => unreachable, // this is a bug in the lexer + } + + return null; + } +}; + +pub const StringParseOptions = struct { + start_column: usize = 0, + diagnostics: ?DiagnosticsContext = null, + output_code_page: CodePage = .windows1252, +}; + +pub fn parseQuotedString( + comptime literal_type: StringType, + allocator: std.mem.Allocator, + bytes: SourceBytes, + options: StringParseOptions, +) !(switch (literal_type) { + .ascii => []u8, + .wide => [:0]u16, +}) { + const T = if (literal_type == .ascii) u8 else u16; + std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars + + var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len); + errdefer buf.deinit(); + + var iterative_parser = IterativeStringParser.init(bytes, options); + + while (try iterative_parser.next()) |parsed| { + const c = parsed.codepoint; + if (parsed.from_escaped_integer) { + try buf.append(@intCast(c)); + } else { + switch (literal_type) { + .ascii => switch (options.output_code_page) { + .windows1252 => { + if (windows1252.bestFitFromCodepoint(c)) |best_fit| { + try buf.append(best_fit); + } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) { + try buf.append('?'); + } else { + try buf.appendSlice("??"); + } + }, + .utf8 => { + var codepoint_to_encode = c; + if (c == code_pages.Codepoint.invalid) { + codepoint_to_encode = '�'; + } + var utf8_buf: [4]u8 = undefined; + const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable; + try buf.appendSlice(utf8_buf[0..utf8_len]); + }, + else => unreachable, // Unsupported code page + }, + .wide => { + if (c == code_pages.Codepoint.invalid) { + try buf.append(std.mem.nativeToLittle(u16, '�')); + } else if (c < 0x10000) { + const short: u16 = @intCast(c); + try buf.append(std.mem.nativeToLittle(u16, short)); + } else { + const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800; + try buf.append(std.mem.nativeToLittle(u16, high)); + const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00; + try buf.append(std.mem.nativeToLittle(u16, low)); + } + }, + } + } + } + + if (literal_type == .wide) { + return buf.toOwnedSliceSentinel(0); + } else { + return buf.toOwnedSlice(); + } +} + +pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 { + std.debug.assert(bytes.slice.len >= 2); // "" + return parseQuotedString(.ascii, allocator, bytes, options); +} + +pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 { + std.debug.assert(bytes.slice.len >= 3); // L"" + return parseQuotedString(.wide, allocator, bytes, options); +} + +pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 { + std.debug.assert(bytes.slice.len >= 2); // "" + return parseQuotedString(.wide, allocator, bytes, options); +} + +pub fn parseQuotedStringAsAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 { + std.debug.assert(bytes.slice.len >= 2); // "" + return parseQuotedString(.ascii, allocator, bytes, options); +} + +test "parse quoted ascii string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{ + .slice = + \\"hello" + , + .code_page = .windows1252, + }, .{})); + // hex with 0 digits + try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\x" + , + .code_page = .windows1252, + }, .{})); + // hex max of 2 digits + try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\XfFf" + , + .code_page = .windows1252, + }, .{})); + // octal with invalid octal digit + try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\19" + , + .code_page = .windows1252, + }, .{})); + // escaped quotes + try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{ + .slice = + \\" "" " + , + .code_page = .windows1252, + }, .{})); + // backslash right before escaped quotes + try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\""" + , + .code_page = .windows1252, + }, .{})); + // octal overflow + try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\401" + , + .code_page = .windows1252, + }, .{})); + // escapes + try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\a\n\r\t\\" + , + .code_page = .windows1252, + }, .{})); + // uppercase escapes + try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\A\N\R\T\\" + , + .code_page = .windows1252, + }, .{})); + // backslash on its own + try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\" + , + .code_page = .windows1252, + }, .{})); + // unrecognized escapes + try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\b" + , + .code_page = .windows1252, + }, .{})); + // escaped carriage returns + try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 }, + .{}, + )); + // escaped newlines + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 }, + .{}, + )); + // escaped CRLF pairs + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 }, + .{}, + )); + // escaped newlines with other whitespace + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\n \t\r\n \r\t\n \t\"", .code_page = .windows1252 }, + .{}, + )); + // literal tab characters get converted to spaces (dependent on source file columns) + try std.testing.expectEqualSlices(u8, " ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "abc ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"abc\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "abcdefg ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "\\ ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\t\"", .code_page = .windows1252 }, + .{}, + )); + // literal CR's get dropped + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 }, + .{}, + )); + // contiguous newlines and whitespace get collapsed to + try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\n\r\r \r\n \t \"", .code_page = .windows1252 }, + .{}, + )); +} + +test "parse quoted ascii string with utf8 code page" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that don't have a Windows-1252 representation get converted to ? + try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString( + arena, + .{ .slice = "\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that have a best fit mapping get converted accordingly, + // these are box drawing codepoints + try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString( + arena, + .{ .slice = "\"┌─┐\"", .code_page = .utf8 }, + .{}, + )); + // Invalid UTF-8 gets converted to ? depending on well-formedness + try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that would require a UTF-16 surrogate pair get converted to ?? + try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 }, + .{}, + )); + + // Output code page changes how invalid UTF-8 gets converted, since it + // now encodes the result as UTF-8 so it can write replacement characters. + try std.testing.expectEqualSlices(u8, "����", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{ .output_code_page = .utf8 }, + )); + try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 }, + .{ .output_code_page = .utf8 }, + )); +} + +test "parse quoted wide string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 'h', 'e', 'l', 'l', 'o' }, try parseQuotedWideString(arena, .{ + .slice = + \\L"hello" + , + .code_page = .windows1252, + }, .{})); + // hex with 0 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{ + .slice = + \\L"\x" + , + .code_page = .windows1252, + }, .{})); + // hex max of 4 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 0xFFFF, 'f' }, try parseQuotedWideString(arena, .{ + .slice = + \\L"\XfFfFf" + , + .code_page = .windows1252, + }, .{})); + // octal max of 7 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 0x9493, '3', '3' }, try parseQuotedWideString(arena, .{ + .slice = + \\L"\111222333" + , + .code_page = .windows1252, + }, .{})); + // octal overflow + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0xFF01}, try parseQuotedWideString(arena, .{ + .slice = + \\L"\777401" + , + .code_page = .windows1252, + }, .{})); + // literal tab characters get converted to spaces (dependent on source file columns) + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg "), try parseQuotedWideString( + arena, + .{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 }, + .{}, + )); + // Windows-1252 conversion + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString( + arena, + .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 }, + .{}, + )); + // Invalid escape sequences are skipped + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString( + arena, + .{ .slice = "L\"\\H\"", .code_page = .windows1252 }, + .{}, + )); +} + +test "parse quoted wide string with utf8 code page" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString( + arena, + .{ .slice = "L\"\"", .code_page = .utf8 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString( + arena, + .{ .slice = "L\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Invalid UTF-8 gets converted to � depending on well-formedness + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("����"), try parseQuotedWideString( + arena, + .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{}, + )); +} + +test "parse quoted ascii string as wide string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Whether or not invalid escapes are skipped is still determined by the L prefix + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"\\H\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString( + arena, + .{ .slice = "L\"\\H\"", .code_page = .windows1252 }, + .{}, + )); + // Maximum escape sequence value is also determined by the L prefix + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\x1234"), try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"\\x1234\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x1234}, try parseQuotedStringAsWideString( + arena, + .{ .slice = "L\"\\x1234\"", .code_page = .windows1252 }, + .{}, + )); +} + +pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize { + // 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4 + // 5 => 3, 6 => 2, 7 => 1, 8 => 8 + return tab_columns - (column % tab_columns); +} + +pub const Number = struct { + value: u32, + is_long: bool = false, + + pub fn asWord(self: Number) u16 { + return @truncate(self.value); + } + + pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number { + const result = switch (operator_char) { + '-' => lhs.value -% rhs.value, + '+' => lhs.value +% rhs.value, + '|' => lhs.value | rhs.value, + '&' => lhs.value & rhs.value, + else => unreachable, // invalid operator, this would be a lexer/parser bug + }; + return .{ + .value = result, + .is_long = lhs.is_long or rhs.is_long, + }; + } +}; + +/// Assumes that number literals normally rejected by RC's preprocessor +/// are similarly rejected before being parsed. +/// +/// Relevant RC preprocessor errors: +/// RC2021: expected exponent value, not '' +/// example that is rejected: 1e1 +/// example that is accepted: 1ea +/// (this function will parse the two examples above the same) +pub fn parseNumberLiteral(bytes: SourceBytes) Number { + std.debug.assert(bytes.slice.len > 0); + var result = Number{ .value = 0, .is_long = false }; + var radix: u8 = 10; + var buf = bytes.slice; + + const Prefix = enum { none, minus, complement }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + '~' => { + prefix = .complement; + buf = buf[1..]; + }, + else => {}, + } + + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + 'o' => { // octal radix prefix is case-sensitive + radix = 8; + buf = buf[2..]; + }, + 'x', 'X' => { + radix = 16; + buf = buf[2..]; + }, + else => {}, + } + } + + var i: usize = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + if (c == 'L' or c == 'l') { + result.is_long = true; + break; + } + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break, + else => break, + }; + + if (result.value != 0) { + result.value *%= radix; + } + result.value +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result.value = 0 -% result.value, + .complement => result.value = ~result.value, + } + + return result; +} + +test "parse number literal" { + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 })); + + // can handle any length of number, wraps on overflow appropriately + const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 }); + try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow); + try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord()); + + try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 })); + + try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 })); + + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 })); + + // anything after L is ignored + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 })); +} diff --git a/src/resinator/parse.zig b/src/resinator/parse.zig new file mode 100644 index 000000000000..2e528bea657b --- /dev/null +++ b/src/resinator/parse.zig @@ -0,0 +1,1880 @@ +const std = @import("std"); +const Lexer = @import("lex.zig").Lexer; +const Token = @import("lex.zig").Token; +const Node = @import("ast.zig").Node; +const Tree = @import("ast.zig").Tree; +const CodePageLookup = @import("ast.zig").CodePageLookup; +const Resource = @import("rc.zig").Resource; +const Allocator = std.mem.Allocator; +const ErrorDetails = @import("errors.zig").ErrorDetails; +const Diagnostics = @import("errors.zig").Diagnostics; +const SourceBytes = @import("literals.zig").SourceBytes; +const Compiler = @import("compile.zig").Compiler; +const rc = @import("rc.zig"); +const res = @import("res.zig"); + +// TODO: Make these configurable? +pub const max_nested_menu_level: u32 = 512; +pub const max_nested_version_level: u32 = 512; +pub const max_nested_expression_level: u32 = 200; + +pub const Parser = struct { + const Self = @This(); + + lexer: *Lexer, + /// values that need to be initialized per-parse + state: Parser.State = undefined, + options: Parser.Options, + + pub const Error = error{ParseError} || Allocator.Error; + + pub const Options = struct { + warn_instead_of_error_on_invalid_code_page: bool = false, + }; + + pub fn init(lexer: *Lexer, options: Options) Parser { + return Parser{ + .lexer = lexer, + .options = options, + }; + } + + pub const State = struct { + token: Token, + lookahead_lexer: Lexer, + allocator: Allocator, + arena: Allocator, + diagnostics: *Diagnostics, + input_code_page_lookup: CodePageLookup, + output_code_page_lookup: CodePageLookup, + }; + + pub fn parse(self: *Self, allocator: Allocator, diagnostics: *Diagnostics) Error!*Tree { + var arena = std.heap.ArenaAllocator.init(allocator); + errdefer arena.deinit(); + + self.state = Parser.State{ + .token = undefined, + .lookahead_lexer = undefined, + .allocator = allocator, + .arena = arena.allocator(), + .diagnostics = diagnostics, + .input_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page), + .output_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page), + }; + + const parsed_root = try self.parseRoot(); + + const tree = try self.state.arena.create(Tree); + tree.* = .{ + .node = parsed_root, + .input_code_pages = self.state.input_code_page_lookup, + .output_code_pages = self.state.output_code_page_lookup, + .source = self.lexer.buffer, + .arena = arena.state, + .allocator = allocator, + }; + return tree; + } + + fn parseRoot(self: *Self) Error!*Node { + var statements = std.ArrayList(*Node).init(self.state.allocator); + defer statements.deinit(); + + try self.parseStatements(&statements); + try self.check(.eof); + + const node = try self.state.arena.create(Node.Root); + node.* = .{ + .body = try self.state.arena.dupe(*Node, statements.items), + }; + return &node.base; + } + + fn parseStatements(self: *Self, statements: *std.ArrayList(*Node)) Error!void { + while (true) { + try self.nextToken(.whitespace_delimiter_only); + if (self.state.token.id == .eof) break; + // The Win32 compiler will sometimes try to recover from errors + // and then restart parsing afterwards. We don't ever do this + // because it almost always leads to unhelpful error messages + // (usually it will end up with bogus things like 'file + // not found: {') + var statement = try self.parseStatement(); + try statements.append(statement); + } + } + + /// Expects the current token to be the token before possible common resource attributes. + /// After return, the current token will be the token immediately before the end of the + /// common resource attributes (if any). If there are no common resource attributes, the + /// current token is unchanged. + /// The returned slice is allocated by the parser's arena + fn parseCommonResourceAttributes(self: *Self) ![]Token { + var common_resource_attributes = std.ArrayListUnmanaged(Token){}; + while (true) { + const maybe_common_resource_attribute = try self.lookaheadToken(.normal); + if (maybe_common_resource_attribute.id == .literal and rc.CommonResourceAttributes.map.has(maybe_common_resource_attribute.slice(self.lexer.buffer))) { + try common_resource_attributes.append(self.state.arena, maybe_common_resource_attribute); + self.nextToken(.normal) catch unreachable; + } else { + break; + } + } + return common_resource_attributes.toOwnedSlice(self.state.arena); + } + + /// Expects the current token to have already been dealt with, and that the + /// optional statements will potentially start on the next token. + /// After return, the current token will be the token immediately before the end of the + /// optional statements (if any). If there are no optional statements, the + /// current token is unchanged. + /// The returned slice is allocated by the parser's arena + fn parseOptionalStatements(self: *Self, resource: Resource) ![]*Node { + var optional_statements = std.ArrayListUnmanaged(*Node){}; + while (true) { + const lookahead_token = try self.lookaheadToken(.normal); + if (lookahead_token.id != .literal) break; + const slice = lookahead_token.slice(self.lexer.buffer); + const optional_statement_type = rc.OptionalStatements.map.get(slice) orelse switch (resource) { + .dialog, .dialogex => rc.OptionalStatements.dialog_map.get(slice) orelse break, + else => break, + }; + self.nextToken(.normal) catch unreachable; + switch (optional_statement_type) { + .language => { + const language = try self.parseLanguageStatement(); + try optional_statements.append(self.state.arena, language); + }, + // Number only + .version, .characteristics, .style, .exstyle => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ + .can_contain_not_expressions = optional_statement_type == .style or optional_statement_type == .exstyle, + .allowed_types = .{ .number = true }, + }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // String only + .caption => { + const identifier = self.state.token; + try self.nextToken(.normal); + const value = self.state.token; + if (!value.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = value, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + // TODO: Wrapping this in a Node.Literal is superfluous but necessary + // to put it in a SimpleStatement + const value_node = try self.state.arena.create(Node.Literal); + value_node.* = .{ + .token = value, + }; + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = &value_node.base, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // String or number + .class => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // Special case + .menu => { + const identifier = self.state.token; + try self.nextToken(.whitespace_delimiter_only); + try self.check(.literal); + // TODO: Wrapping this in a Node.Literal is superfluous but necessary + // to put it in a SimpleStatement + const value_node = try self.state.arena.create(Node.Literal); + value_node.* = .{ + .token = self.state.token, + }; + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = &value_node.base, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + .font => { + const identifier = self.state.token; + const point_size = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + // The comma between point_size and typeface is both optional and + // there can be any number of them + try self.skipAnyCommas(); + + try self.nextToken(.normal); + const typeface = self.state.token; + if (!typeface.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = typeface, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + + const ExSpecificValues = struct { + weight: ?*Node = null, + italic: ?*Node = null, + char_set: ?*Node = null, + }; + var ex_specific = ExSpecificValues{}; + ex_specific: { + var optional_param_parser = OptionalParamParser{ .parser = self }; + switch (resource) { + .dialogex => { + { + ex_specific.weight = try optional_param_parser.parse(.{}); + if (optional_param_parser.finished) break :ex_specific; + } + { + if (!(try self.parseOptionalToken(.comma))) break :ex_specific; + ex_specific.italic = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + } + { + ex_specific.char_set = try optional_param_parser.parse(.{}); + if (optional_param_parser.finished) break :ex_specific; + } + }, + .dialog => {}, + else => unreachable, // only DIALOG and DIALOGEX have FONT optional-statements + } + } + + const node = try self.state.arena.create(Node.FontStatement); + node.* = .{ + .identifier = identifier, + .point_size = point_size, + .typeface = typeface, + .weight = ex_specific.weight, + .italic = ex_specific.italic, + .char_set = ex_specific.char_set, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + } + } + return optional_statements.toOwnedSlice(self.state.arena); + } + + /// Expects the current token to be the first token of the statement. + fn parseStatement(self: *Self) Error!*Node { + const first_token = self.state.token; + std.debug.assert(first_token.id == .literal); + + if (rc.TopLevelKeywords.map.get(first_token.slice(self.lexer.buffer))) |keyword| switch (keyword) { + .language => { + const language_statement = try self.parseLanguageStatement(); + return language_statement; + }, + .version, .characteristics => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + return &node.base; + }, + .stringtable => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + const optional_statements = try self.parseOptionalStatements(.stringtable); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var strings = std.ArrayList(*Node).init(self.state.allocator); + defer strings.deinit(); + while (true) { + const maybe_end_token = try self.lookaheadToken(.normal); + switch (maybe_end_token.id) { + .end => { + self.nextToken(.normal) catch unreachable; + break; + }, + .eof => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .unfinished_string_table_block, + .token = maybe_end_token, + }); + }, + else => {}, + } + const id_expression = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const comma_token: ?Token = if (try self.parseOptionalToken(.comma)) self.state.token else null; + + try self.nextToken(.normal); + if (self.state.token.id != .quoted_ascii_string and self.state.token.id != .quoted_wide_string) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ .string_literal = true } }, + }); + } + + const string_node = try self.state.arena.create(Node.StringTableString); + string_node.* = .{ + .id = id_expression, + .maybe_comma = comma_token, + .string = self.state.token, + }; + try strings.append(&string_node.base); + } + + if (strings.items.len == 0) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, // TODO: probably a more specific error message + .token = self.state.token, + .extra = .{ .expected = .number }, + }); + } + + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.StringTable); + node.* = .{ + .type = first_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .begin_token = begin_token, + .strings = try self.state.arena.dupe(*Node, strings.items), + .end_token = end_token, + }; + return &node.base; + }, + }; + + // The Win32 RC compiler allows for a 'dangling' literal at the end of a file + // (as long as it's not a valid top-level keyword), and there is actually an + // .rc file with a such a dangling literal in the Windows-classic-samples set + // of projects. So, we have special compatibility for this particular case. + const maybe_eof = try self.lookaheadToken(.whitespace_delimiter_only); + if (maybe_eof.id == .eof) { + // TODO: emit warning + var context = try self.state.arena.alloc(Token, 2); + context[0] = first_token; + context[1] = maybe_eof; + const invalid_node = try self.state.arena.create(Node.Invalid); + invalid_node.* = .{ + .context = context, + }; + return &invalid_node.base; + } + + const id_token = first_token; + const id_code_page = self.lexer.current_code_page; + try self.nextToken(.whitespace_delimiter_only); + const resource = try self.checkResource(); + const type_token = self.state.token; + + if (resource == .string_num) { + try self.addErrorDetails(.{ + .err = .string_resource_as_numeric_type, + .token = type_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .string_resource_as_numeric_type, + .token = type_token, + .type = .note, + .print_source_line = false, + }); + } + + if (resource == .font) { + const id_bytes = SourceBytes{ + .slice = id_token.slice(self.lexer.buffer), + .code_page = id_code_page, + }; + const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(id_bytes); + if (maybe_ordinal == null) { + const would_be_win32_rc_ordinal = res.NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes); + if (would_be_win32_rc_ordinal) |win32_rc_ordinal| { + try self.addErrorDetails(ErrorDetails{ + .err = .id_must_be_ordinal, + .token = id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .win32_non_ascii_ordinal, + .token = id_token, + .type = .note, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + } else { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .id_must_be_ordinal, + .token = id_token, + .extra = .{ .resource = resource }, + }); + } + } + } + + switch (resource) { + .accelerators => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + const optional_statements = try self.parseOptionalStatements(resource); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var accelerators = std.ArrayListUnmanaged(*Node){}; + + while (true) { + const lookahead = try self.lookaheadToken(.normal); + switch (lookahead.id) { + .end, .eof => { + self.nextToken(.normal) catch unreachable; + break; + }, + else => {}, + } + const event = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const idvalue = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var type_and_options = std.ArrayListUnmanaged(Token){}; + while (true) { + if (!(try self.parseOptionalToken(.comma))) break; + + try self.nextToken(.normal); + if (!rc.AcceleratorTypeAndOptions.map.has(self.tokenSlice())) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .accelerator_type_or_option = true, + } }, + }); + } + try type_and_options.append(self.state.arena, self.state.token); + } + + const node = try self.state.arena.create(Node.Accelerator); + node.* = .{ + .event = event, + .idvalue = idvalue, + .type_and_options = try type_and_options.toOwnedSlice(self.state.arena), + }; + try accelerators.append(self.state.arena, &node.base); + } + + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Accelerators); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .begin_token = begin_token, + .accelerators = try accelerators.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .dialog, .dialogex => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var optional_param_parser = OptionalParamParser{ .parser = self }; + const help_id: ?*Node = try optional_param_parser.parse(.{}); + + const optional_statements = try self.parseOptionalStatements(resource); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var controls = std.ArrayListUnmanaged(*Node){}; + defer controls.deinit(self.state.allocator); + while (try self.parseControlStatement(resource)) |control_node| { + // The number of controls must fit in a u16 in order for it to + // be able to be written into the relevant field in the .res data. + if (controls.items.len >= std.math.maxInt(u16)) { + try self.addErrorDetails(.{ + .err = .too_many_dialog_controls, + .token = id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .too_many_dialog_controls, + .type = .note, + .token = control_node.getFirstToken(), + .token_span_end = control_node.getLastToken(), + .extra = .{ .resource = resource }, + }); + } + + try controls.append(self.state.allocator, control_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Dialog); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .x = x, + .y = y, + .width = width, + .height = height, + .help_id = help_id, + .optional_statements = optional_statements, + .begin_token = begin_token, + .controls = try self.state.arena.dupe(*Node, controls.items), + .end_token = end_token, + }; + return &node.base; + }, + .toolbar => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const button_width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const button_height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var buttons = std.ArrayListUnmanaged(*Node){}; + while (try self.parseToolbarButtonStatement()) |button_node| { + try buttons.append(self.state.arena, button_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Toolbar); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .button_width = button_width, + .button_height = button_height, + .begin_token = begin_token, + .buttons = try buttons.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .menu, .menuex => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + // help id is optional but must come between common resource attributes and optional-statements + var help_id: ?*Node = null; + // Note: No comma is allowed before or after help_id of MENUEX and help_id is not + // a possible field of MENU. + if (resource == .menuex and try self.lookaheadCouldBeNumberExpression(.not_disallowed)) { + help_id = try self.parseExpression(.{ + .is_known_to_be_number_expression = true, + }); + } + const optional_statements = try self.parseOptionalStatements(.stringtable); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + defer items.deinit(self.state.allocator); + while (try self.parseMenuItemStatement(resource, id_token, 1)) |item_node| { + try items.append(self.state.allocator, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = type_token, + }); + } + + const node = try self.state.arena.create(Node.Menu); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .help_id = help_id, + .begin_token = begin_token, + .items = try self.state.arena.dupe(*Node, items.items), + .end_token = end_token, + }; + return &node.base; + }, + .versioninfo => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + var fixed_info = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionStatement()) |version_statement| { + try fixed_info.append(self.state.arena, version_statement); + } + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var block_statements = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionBlockOrValue(id_token, 1)) |block_node| { + try block_statements.append(self.state.arena, block_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.VersionInfo); + node.* = .{ + .id = id_token, + .versioninfo = type_token, + .common_resource_attributes = common_resource_attributes, + .fixed_info = try fixed_info.toOwnedSlice(self.state.arena), + .begin_token = begin_token, + .block_statements = try block_statements.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .dlginclude => { + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + var filename_expression = try self.parseExpression(.{ + .allowed_types = .{ .string = true }, + }); + + const node = try self.state.arena.create(Node.ResourceExternal); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .filename = filename_expression, + }; + return &node.base; + }, + .stringtable => { + return self.addErrorDetailsAndFail(.{ + .err = .name_or_id_not_allowed, + .token = id_token, + .extra = .{ .resource = resource }, + }); + }, + // Just try everything as a 'generic' resource (raw data or external file) + // TODO: More fine-grained switch cases as necessary + else => { + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const maybe_begin = try self.lookaheadToken(.normal); + if (maybe_begin.id == .begin) { + self.nextToken(.normal) catch unreachable; + + if (!resource.canUseRawData()) { + try self.addErrorDetails(ErrorDetails{ + .err = .resource_type_cant_use_raw_data, + .token = maybe_begin, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .resource_type_cant_use_raw_data, + .type = .note, + .print_source_line = false, + .token = maybe_begin, + }); + } + + const raw_data = try self.parseRawDataBlock(); + const end_token = self.state.token; + + const node = try self.state.arena.create(Node.ResourceRawData); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .begin_token = maybe_begin, + .raw_data = raw_data, + .end_token = end_token, + }; + return &node.base; + } + + var filename_expression = try self.parseExpression(.{ + // Don't tell the user that numbers are accepted since we error on + // number expressions and regular number literals are treated as unquoted + // literals rather than numbers, so from the users perspective + // numbers aren't really allowed. + .expected_types_override = .{ + .literal = true, + .string_literal = true, + }, + }); + + const node = try self.state.arena.create(Node.ResourceExternal); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .filename = filename_expression, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be a begin token. + /// After return, the current token will be the end token. + fn parseRawDataBlock(self: *Self) Error![]*Node { + var raw_data = std.ArrayList(*Node).init(self.state.allocator); + defer raw_data.deinit(); + while (true) { + const maybe_end_token = try self.lookaheadToken(.normal); + switch (maybe_end_token.id) { + .comma => { + // comma as the first token in a raw data block is an error + if (raw_data.items.len == 0) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = maybe_end_token, + .extra = .{ .expected_types = .{ + .number = true, + .number_expression = true, + .string_literal = true, + } }, + }); + } + // otherwise just skip over commas + self.nextToken(.normal) catch unreachable; + continue; + }, + .end => { + self.nextToken(.normal) catch unreachable; + break; + }, + .eof => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .unfinished_raw_data_block, + .token = maybe_end_token, + }); + }, + else => {}, + } + const expression = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + try raw_data.append(expression); + + if (expression.isNumberExpression()) { + const maybe_close_paren = try self.lookaheadToken(.normal); + if (maybe_close_paren.id == .close_paren) { + // ) is an error + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = maybe_close_paren, + .extra = .{ .expected = .operator }, + }); + } + } + } + return try self.state.arena.dupe(*Node, raw_data.items); + } + + /// Expects the current token to be handled, and that the control statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// control statement (or unchanged if the function returns null). + fn parseControlStatement(self: *Self, resource: Resource) Error!?*Node { + const control_token = try self.lookaheadToken(.normal); + const control = rc.Control.map.get(control_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + try self.skipAnyCommas(); + + var text: ?Token = null; + if (control.hasTextParam()) { + try self.nextToken(.normal); + switch (self.state.token.id) { + .quoted_ascii_string, .quoted_wide_string, .number => { + text = self.state.token; + }, + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .number = true, + .string_literal = true, + } }, + }); + }, + } + try self.skipAnyCommas(); + } + + const id = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.skipAnyCommas(); + + var class: ?*Node = null; + var style: ?*Node = null; + if (control == .control) { + class = try self.parseExpression(.{}); + if (class.?.id == .literal) { + const class_literal = @fieldParentPtr(Node.Literal, "base", class.?); + const is_invalid_control_class = class_literal.token.id == .literal and !rc.ControlClass.map.has(class_literal.token.slice(self.lexer.buffer)); + if (is_invalid_control_class) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .control_class = true, + } }, + }); + } + } + try self.skipAnyCommas(); + style = try self.parseExpression(.{ + .can_contain_not_expressions = true, + .allowed_types = .{ .number = true }, + }); + // If there is no comma after the style paramter, the Win32 RC compiler + // could misinterpret the statement and end up skipping over at least one token + // that should have been interepeted as the next parameter (x). For example: + // CONTROL "text", 1, BUTTON, 15 30, 1, 2, 3, 4 + // the `15` is the style parameter, but in the Win32 implementation the `30` + // is completely ignored (i.e. the `1, 2, 3, 4` are `x`, `y`, `w`, `h`). + // If a comma is added after the `15`, then `30` gets interpreted (correctly) + // as the `x` value. + // + // Instead of emulating this behavior, we just warn about the potential for + // weird behavior in the Win32 implementation whenever there isn't a comma after + // the style parameter. + const lookahead_token = try self.lookaheadToken(.normal); + if (lookahead_token.id != .comma and lookahead_token.id != .eof) { + try self.addErrorDetails(.{ + .err = .rc_could_miscompile_control_params, + .type = .warning, + .token = lookahead_token, + }); + try self.addErrorDetails(.{ + .err = .rc_could_miscompile_control_params, + .type = .note, + .token = style.?.getFirstToken(), + .token_span_end = style.?.getLastToken(), + }); + } + try self.skipAnyCommas(); + } + + const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var optional_param_parser = OptionalParamParser{ .parser = self }; + if (control != .control) { + style = try optional_param_parser.parse(.{ .not_expression_allowed = true }); + } + + var exstyle: ?*Node = try optional_param_parser.parse(.{ .not_expression_allowed = true }); + var help_id: ?*Node = switch (resource) { + .dialogex => try optional_param_parser.parse(.{}), + else => null, + }; + + var extra_data: []*Node = &[_]*Node{}; + var extra_data_begin: ?Token = null; + var extra_data_end: ?Token = null; + // extra data is DIALOGEX-only + if (resource == .dialogex and try self.parseOptionalToken(.begin)) { + extra_data_begin = self.state.token; + extra_data = try self.parseRawDataBlock(); + extra_data_end = self.state.token; + } + + const node = try self.state.arena.create(Node.ControlStatement); + node.* = .{ + .type = control_token, + .text = text, + .class = class, + .id = id, + .x = x, + .y = y, + .width = width, + .height = height, + .style = style, + .exstyle = exstyle, + .help_id = help_id, + .extra_data_begin = extra_data_begin, + .extra_data = extra_data, + .extra_data_end = extra_data_end, + }; + return &node.base; + } + + fn parseToolbarButtonStatement(self: *Self) Error!?*Node { + const keyword_token = try self.lookaheadToken(.normal); + const button_type = rc.ToolbarButton.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + switch (button_type) { + .separator => { + const node = try self.state.arena.create(Node.Literal); + node.* = .{ + .token = keyword_token, + }; + return &node.base; + }, + .button => { + const button_id = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = keyword_token, + .value = button_id, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be handled, and that the menuitem/popup statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// menuitem statement (or unchanged if the function returns null). + fn parseMenuItemStatement(self: *Self, resource: Resource, top_level_menu_id_token: Token, nesting_level: u32) Error!?*Node { + const menuitem_token = try self.lookaheadToken(.normal); + const menuitem = rc.MenuItem.map.get(menuitem_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + if (nesting_level > max_nested_menu_level) { + try self.addErrorDetails(.{ + .err = .nested_resource_level_exceeds_max, + .token = top_level_menu_id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_resource_level_exceeds_max, + .type = .note, + .token = menuitem_token, + .extra = .{ .resource = resource }, + }); + } + + switch (resource) { + .menu => switch (menuitem) { + .menuitem => { + try self.nextToken(.normal); + if (rc.MenuItem.isSeparator(self.state.token.slice(self.lexer.buffer))) { + const separator_token = self.state.token; + // There can be any number of trailing commas after SEPARATOR + try self.skipAnyCommas(); + const node = try self.state.arena.create(Node.MenuItemSeparator); + node.* = .{ + .menuitem = menuitem_token, + .separator = separator_token, + }; + return &node.base; + } else { + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + try self.skipAnyCommas(); + + const result = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + _ = try self.parseOptionalToken(.comma); + + var options = std.ArrayListUnmanaged(Token){}; + while (true) { + const option_token = try self.lookaheadToken(.normal); + if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) { + break; + } + self.nextToken(.normal) catch unreachable; + try options.append(self.state.arena, option_token); + try self.skipAnyCommas(); + } + + const node = try self.state.arena.create(Node.MenuItem); + node.* = .{ + .menuitem = menuitem_token, + .text = text, + .result = result, + .option_list = try options.toOwnedSlice(self.state.arena), + }; + return &node.base; + } + }, + .popup => { + try self.nextToken(.normal); + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + try self.skipAnyCommas(); + + var options = std.ArrayListUnmanaged(Token){}; + while (true) { + const option_token = try self.lookaheadToken(.normal); + if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) { + break; + } + self.nextToken(.normal) catch unreachable; + try options.append(self.state.arena, option_token); + try self.skipAnyCommas(); + } + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| { + try items.append(self.state.arena, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = menuitem_token, + }); + } + + const node = try self.state.arena.create(Node.Popup); + node.* = .{ + .popup = menuitem_token, + .text = text, + .option_list = try options.toOwnedSlice(self.state.arena), + .begin_token = begin_token, + .items = try items.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + }, + .menuex => { + try self.nextToken(.normal); + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + + var param_parser = OptionalParamParser{ .parser = self }; + const id = try param_parser.parse(.{}); + const item_type = try param_parser.parse(.{}); + const state = try param_parser.parse(.{}); + + if (menuitem == .menuitem) { + // trailing comma is allowed, skip it + _ = try self.parseOptionalToken(.comma); + + const node = try self.state.arena.create(Node.MenuItemEx); + node.* = .{ + .menuitem = menuitem_token, + .text = text, + .id = id, + .type = item_type, + .state = state, + }; + return &node.base; + } + + const help_id = try param_parser.parse(.{}); + + // trailing comma is allowed, skip it + _ = try self.parseOptionalToken(.comma); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| { + try items.append(self.state.arena, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = menuitem_token, + }); + } + + const node = try self.state.arena.create(Node.PopupEx); + node.* = .{ + .popup = menuitem_token, + .text = text, + .id = id, + .type = item_type, + .state = state, + .help_id = help_id, + .begin_token = begin_token, + .items = try items.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + else => unreachable, + } + @compileError("unreachable"); + } + + pub const OptionalParamParser = struct { + finished: bool = false, + parser: *Self, + + pub const Options = struct { + not_expression_allowed: bool = false, + }; + + pub fn parse(self: *OptionalParamParser, options: OptionalParamParser.Options) Error!?*Node { + if (self.finished) return null; + if (!(try self.parser.parseOptionalToken(.comma))) { + self.finished = true; + return null; + } + // If the next lookahead token could be part of a number expression, + // then parse it. Otherwise, treat it as an 'empty' expression and + // continue parsing, since 'empty' values are allowed. + if (try self.parser.lookaheadCouldBeNumberExpression(switch (options.not_expression_allowed) { + true => .not_allowed, + false => .not_disallowed, + })) { + const node = try self.parser.parseExpression(.{ + .allowed_types = .{ .number = true }, + .can_contain_not_expressions = options.not_expression_allowed, + }); + return node; + } + return null; + } + }; + + /// Expects the current token to be handled, and that the version statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// version statement (or unchanged if the function returns null). + fn parseVersionStatement(self: *Self) Error!?*Node { + const type_token = try self.lookaheadToken(.normal); + const statement_type = rc.VersionInfo.map.get(type_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + switch (statement_type) { + .file_version, .product_version => { + var parts = std.BoundedArray(*Node, 4){}; + + while (parts.len < 4) { + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + parts.addOneAssumeCapacity().* = value; + + if (parts.len == 4 or !(try self.parseOptionalToken(.comma))) { + break; + } + } + + const node = try self.state.arena.create(Node.VersionStatement); + node.* = .{ + .type = type_token, + .parts = try self.state.arena.dupe(*Node, parts.slice()), + }; + return &node.base; + }, + else => { + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = type_token, + .value = value, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be handled, and that the version BLOCK/VALUE will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// version BLOCK/VALUE (or unchanged if the function returns null). + fn parseVersionBlockOrValue(self: *Self, top_level_version_id_token: Token, nesting_level: u32) Error!?*Node { + const keyword_token = try self.lookaheadToken(.normal); + const keyword = rc.VersionBlock.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + if (nesting_level > max_nested_version_level) { + try self.addErrorDetails(.{ + .err = .nested_resource_level_exceeds_max, + .token = top_level_version_id_token, + .extra = .{ .resource = .versioninfo }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_resource_level_exceeds_max, + .type = .note, + .token = keyword_token, + .extra = .{ .resource = .versioninfo }, + }); + } + + try self.nextToken(.normal); + const key = self.state.token; + if (!key.isStringLiteral()) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = key, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + // Need to keep track of this to detect a potential miscompilation when + // the comma is omitted and the first value is a quoted string. + const had_comma_before_first_value = try self.parseOptionalToken(.comma); + try self.skipAnyCommas(); + + const values = try self.parseBlockValuesList(had_comma_before_first_value); + + switch (keyword) { + .block => { + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var children = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionBlockOrValue(top_level_version_id_token, nesting_level + 1)) |value_node| { + try children.append(self.state.arena, value_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Block); + node.* = .{ + .identifier = keyword_token, + .key = key, + .values = values, + .begin_token = begin_token, + .children = try children.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .value => { + const node = try self.state.arena.create(Node.BlockValue); + node.* = .{ + .identifier = keyword_token, + .key = key, + .values = values, + }; + return &node.base; + }, + } + } + + fn parseBlockValuesList(self: *Self, had_comma_before_first_value: bool) Error![]*Node { + var values = std.ArrayListUnmanaged(*Node){}; + var seen_number: bool = false; + var first_string_value: ?*Node = null; + while (true) { + const lookahead_token = try self.lookaheadToken(.normal); + switch (lookahead_token.id) { + .operator, + .number, + .open_paren, + .quoted_ascii_string, + .quoted_wide_string, + => {}, + else => break, + } + const value = try self.parseExpression(.{}); + + if (value.isNumberExpression()) { + seen_number = true; + } else if (first_string_value == null) { + std.debug.assert(value.isStringLiteral()); + first_string_value = value; + } + + const has_trailing_comma = try self.parseOptionalToken(.comma); + try self.skipAnyCommas(); + + const value_value = try self.state.arena.create(Node.BlockValueValue); + value_value.* = .{ + .expression = value, + .trailing_comma = has_trailing_comma, + }; + try values.append(self.state.arena, &value_value.base); + } + if (seen_number and first_string_value != null) { + // The Win32 RC compiler does some strange stuff with the data size: + // Strings are counted as UTF-16 code units including the null-terminator + // Numbers are counted as their byte lengths + // So, when both strings and numbers are within a single value, + // it incorrectly sets the value's type as binary, but then gives the + // data length as a mixture of bytes and UTF-16 code units. This means that + // when the length is read, it will be treated as byte length and will + // not read the full value. We don't reproduce this behavior, so we warn + // of the miscompilation here. + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_byte_count, + .type = .warning, + .token = first_string_value.?.getFirstToken(), + .token_span_start = values.items[0].getFirstToken(), + .token_span_end = values.items[values.items.len - 1].getLastToken(), + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_byte_count, + .type = .note, + .token = first_string_value.?.getFirstToken(), + .token_span_start = values.items[0].getFirstToken(), + .token_span_end = values.items[values.items.len - 1].getLastToken(), + .print_source_line = false, + }); + } + if (!had_comma_before_first_value and values.items.len > 0 and values.items[0].cast(.block_value_value).?.expression.isStringLiteral()) { + const token = values.items[0].cast(.block_value_value).?.expression.cast(.literal).?.token; + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_padding, + .type = .warning, + .token = token, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_padding, + .type = .note, + .token = token, + .print_source_line = false, + }); + } + return values.toOwnedSlice(self.state.arena); + } + + fn numberExpressionContainsAnyLSuffixes(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) bool { + // TODO: This could probably be done without evaluating the whole expression + return Compiler.evaluateNumberExpression(expression_node, source, code_page_lookup).is_long; + } + + /// Expects the current token to be a literal token that contains the string LANGUAGE + fn parseLanguageStatement(self: *Self) Error!*Node { + const language_token = self.state.token; + + const primary_language = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const sublanguage = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + // The Win32 RC compiler errors if either parameter contains any number with an L + // suffix. Instead of that, we want to warn and then let the values get truncated. + // The warning is done here to allow the compiler logic to not have to deal with this. + if (numberExpressionContainsAnyLSuffixes(primary_language, self.lexer.buffer, &self.state.input_code_page_lookup)) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = primary_language.getFirstToken(), + .token_span_end = primary_language.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = primary_language.getFirstToken(), + .token_span_end = primary_language.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + } + if (numberExpressionContainsAnyLSuffixes(sublanguage, self.lexer.buffer, &self.state.input_code_page_lookup)) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = sublanguage.getFirstToken(), + .token_span_end = sublanguage.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = sublanguage.getFirstToken(), + .token_span_end = sublanguage.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + } + + const node = try self.state.arena.create(Node.LanguageStatement); + node.* = .{ + .language_token = language_token, + .primary_language_id = primary_language, + .sublanguage_id = sublanguage, + }; + return &node.base; + } + + pub const ParseExpressionOptions = struct { + is_known_to_be_number_expression: bool = false, + can_contain_not_expressions: bool = false, + nesting_context: NestingContext = .{}, + allowed_types: AllowedTypes = .{ .literal = true, .number = true, .string = true }, + expected_types_override: ?ErrorDetails.ExpectedTypes = null, + + pub const AllowedTypes = struct { + literal: bool = false, + number: bool = false, + string: bool = false, + }; + + pub const NestingContext = struct { + first_token: ?Token = null, + last_token: ?Token = null, + level: u32 = 0, + + /// Returns a new NestingContext with values modified appropriately for an increased nesting level + fn incremented(ctx: NestingContext, first_token: Token, most_recent_token: Token) NestingContext { + return .{ + .first_token = ctx.first_token orelse first_token, + .last_token = most_recent_token, + .level = ctx.level + 1, + }; + } + }; + + pub fn toErrorDetails(options: ParseExpressionOptions, token: Token) ErrorDetails { + // TODO: expected_types_override interaction with is_known_to_be_number_expression? + var expected_types = options.expected_types_override orelse ErrorDetails.ExpectedTypes{ + .number = options.allowed_types.number, + .number_expression = options.allowed_types.number, + .string_literal = options.allowed_types.string and !options.is_known_to_be_number_expression, + .literal = options.allowed_types.literal and !options.is_known_to_be_number_expression, + }; + return ErrorDetails{ + .err = .expected_something_else, + .token = token, + .extra = .{ .expected_types = expected_types }, + }; + } + }; + + /// Returns true if the next lookahead token is a number or could be the start of a number expression. + /// Only useful when looking for empty expressions in optional fields. + fn lookaheadCouldBeNumberExpression(self: *Self, not_allowed: enum { not_allowed, not_disallowed }) Error!bool { + var lookahead_token = try self.lookaheadToken(.normal); + switch (lookahead_token.id) { + .literal => if (not_allowed == .not_allowed) { + return std.ascii.eqlIgnoreCase("NOT", lookahead_token.slice(self.lexer.buffer)); + } else return false, + .number => return true, + .open_paren => return true, + .operator => { + // + can be a unary operator, see parseExpression's handling of unary + + const operator_char = lookahead_token.slice(self.lexer.buffer)[0]; + return operator_char == '+'; + }, + else => return false, + } + } + + fn parsePrimary(self: *Self, options: ParseExpressionOptions) Error!*Node { + try self.nextToken(.normal); + const first_token = self.state.token; + var is_close_paren_expression = false; + var is_unary_plus_expression = false; + switch (self.state.token.id) { + .quoted_ascii_string, .quoted_wide_string => { + if (!options.allowed_types.string) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .literal => { + if (options.can_contain_not_expressions and std.ascii.eqlIgnoreCase("NOT", self.state.token.slice(self.lexer.buffer))) { + const not_token = self.state.token; + try self.nextToken(.normal); + try self.check(.number); + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.NotExpression); + node.* = .{ + .not_token = not_token, + .number_token = self.state.token, + }; + return &node.base; + } + if (!options.allowed_types.literal) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .number => { + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .open_paren => { + const open_paren_token = self.state.token; + + const expression = try self.parseExpression(.{ + .is_known_to_be_number_expression = true, + .can_contain_not_expressions = options.can_contain_not_expressions, + .nesting_context = options.nesting_context.incremented(first_token, open_paren_token), + .allowed_types = .{ .number = true }, + }); + + try self.nextToken(.normal); + // TODO: Add context to error about where the open paren is + try self.check(.close_paren); + + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(open_paren_token)); + const node = try self.state.arena.create(Node.GroupedExpression); + node.* = .{ + .open_token = open_paren_token, + .expression = expression, + .close_token = self.state.token, + }; + return &node.base; + }, + .close_paren => { + // Note: In the Win32 implementation, a single close paren + // counts as a valid "expression", but only when its the first and + // only token in the expression. Such an expression is then treated + // as a 'skip this expression' instruction. For example: + // 1 RCDATA { 1, ), ), ), 2 } + // will be evaluated as if it were `1 RCDATA { 1, 2 }` and only + // 0x0001 and 0x0002 will be written to the .res data. + // + // This behavior is not emulated because it almost certainly has + // no valid use cases and only introduces edge cases that are + // not worth the effort to track down and deal with. Instead, + // we error but also add a note about the Win32 RC behavior if + // this edge case is detected. + if (!options.is_known_to_be_number_expression) { + is_close_paren_expression = true; + } + }, + .operator => { + // In the Win32 implementation, something akin to a unary + + // is allowed but it doesn't behave exactly like a unary +. + // Instead of emulating the Win32 behavior, we instead error + // and add a note about unary plus not being allowed. + // + // This is done because unary + only works in some places, + // and there's no real use-case for it since it's so limited + // in how it can be used (e.g. +1 is accepted but (+1) will error) + // + // Even understanding when unary plus is allowed is difficult, so + // we don't do any fancy detection of when the Win32 RC compiler would + // allow a unary + and instead just output the note in all cases. + // + // Some examples of allowed expressions by the Win32 compiler: + // +1 + // 0|+5 + // +1+2 + // +~-5 + // +(1) + // + // Some examples of disallowed expressions by the Win32 compiler: + // (+1) + // ++5 + // + // TODO: Potentially re-evaluate and support the unary plus in a bug-for-bug + // compatible way. + const operator_char = self.state.token.slice(self.lexer.buffer)[0]; + if (operator_char == '+') { + is_unary_plus_expression = true; + } + }, + else => {}, + } + + try self.addErrorDetails(options.toErrorDetails(self.state.token)); + if (is_close_paren_expression) { + try self.addErrorDetails(ErrorDetails{ + .err = .close_paren_expression, + .type = .note, + .token = self.state.token, + .print_source_line = false, + }); + } + if (is_unary_plus_expression) { + try self.addErrorDetails(ErrorDetails{ + .err = .unary_plus_expression, + .type = .note, + .token = self.state.token, + .print_source_line = false, + }); + } + return error.ParseError; + } + + /// Expects the current token to have already been dealt with, and that the + /// expression will start on the next token. + /// After return, the current token will have been dealt with. + fn parseExpression(self: *Self, options: ParseExpressionOptions) Error!*Node { + if (options.nesting_context.level > max_nested_expression_level) { + try self.addErrorDetails(.{ + .err = .nested_expression_level_exceeds_max, + .token = options.nesting_context.first_token.?, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_expression_level_exceeds_max, + .type = .note, + .token = options.nesting_context.last_token.?, + }); + } + var expr: *Node = try self.parsePrimary(options); + const first_token = expr.getFirstToken(); + + // Non-number expressions can't have operators, so we can just return + if (!expr.isNumberExpression()) return expr; + + while (try self.parseOptionalTokenAdvanced(.operator, .normal_expect_operator)) { + const operator = self.state.token; + const rhs_node = try self.parsePrimary(.{ + .is_known_to_be_number_expression = true, + .can_contain_not_expressions = options.can_contain_not_expressions, + .nesting_context = options.nesting_context.incremented(first_token, operator), + .allowed_types = options.allowed_types, + }); + + if (!rhs_node.isNumberExpression()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = rhs_node.getFirstToken(), + .token_span_end = rhs_node.getLastToken(), + .extra = .{ .expected_types = .{ + .number = true, + .number_expression = true, + } }, + }); + } + + const node = try self.state.arena.create(Node.BinaryExpression); + node.* = .{ + .left = expr, + .operator = operator, + .right = rhs_node, + }; + expr = &node.base; + } + + return expr; + } + + /// Skips any amount of commas (including zero) + /// In other words, it will skip the regex `,*` + /// Assumes the token(s) should be parsed with `.normal` as the method. + fn skipAnyCommas(self: *Self) !void { + while (try self.parseOptionalToken(.comma)) {} + } + + /// Advances the current token only if the token's id matches the specified `id`. + /// Assumes the token should be parsed with `.normal` as the method. + /// Returns true if the token matched, false otherwise. + fn parseOptionalToken(self: *Self, id: Token.Id) Error!bool { + return self.parseOptionalTokenAdvanced(id, .normal); + } + + /// Advances the current token only if the token's id matches the specified `id`. + /// Returns true if the token matched, false otherwise. + fn parseOptionalTokenAdvanced(self: *Self, id: Token.Id, comptime method: Lexer.LexMethod) Error!bool { + const maybe_token = try self.lookaheadToken(method); + if (maybe_token.id != id) return false; + self.nextToken(method) catch unreachable; + return true; + } + + fn addErrorDetails(self: *Self, details: ErrorDetails) Allocator.Error!void { + try self.state.diagnostics.append(details); + } + + fn addErrorDetailsAndFail(self: *Self, details: ErrorDetails) Error { + try self.addErrorDetails(details); + return error.ParseError; + } + + fn nextToken(self: *Self, comptime method: Lexer.LexMethod) Error!void { + self.state.token = token: while (true) { + const token = self.lexer.next(method) catch |err| switch (err) { + error.CodePagePragmaInIncludedFile => { + // The Win32 RC compiler silently ignores such `#pragma code_point` directives, + // but we want to both ignore them *and* emit a warning + try self.addErrorDetails(.{ + .err = .code_page_pragma_in_included_file, + .type = .warning, + .token = self.lexer.error_context_token.?, + }); + continue; + }, + error.CodePagePragmaInvalidCodePage => { + var details = self.lexer.getErrorDetails(err); + if (!self.options.warn_instead_of_error_on_invalid_code_page) { + return self.addErrorDetailsAndFail(details); + } + details.type = .warning; + try self.addErrorDetails(details); + continue; + }, + error.InvalidDigitCharacterInNumberLiteral => { + const details = self.lexer.getErrorDetails(err); + try self.addErrorDetails(details); + return self.addErrorDetailsAndFail(.{ + .err = details.err, + .type = .note, + .token = details.token, + .print_source_line = false, + }); + }, + else => return self.addErrorDetailsAndFail(self.lexer.getErrorDetails(err)), + }; + break :token token; + }; + // After every token, set the input code page for its line + try self.state.input_code_page_lookup.setForToken(self.state.token, self.lexer.current_code_page); + // But only set the output code page to the current code page if we are past the first code_page pragma in the file. + // Otherwise, we want to fill the lookup using the default code page so that lookups still work for lines that + // don't have an explicit output code page set. + const output_code_page = if (self.lexer.seen_pragma_code_pages > 1) self.lexer.current_code_page else self.state.output_code_page_lookup.default_code_page; + try self.state.output_code_page_lookup.setForToken(self.state.token, output_code_page); + } + + fn lookaheadToken(self: *Self, comptime method: Lexer.LexMethod) Error!Token { + self.state.lookahead_lexer = self.lexer.*; + return token: while (true) { + break :token self.state.lookahead_lexer.next(method) catch |err| switch (err) { + // Ignore this error and get the next valid token, we'll deal with this + // properly when getting the token for real + error.CodePagePragmaInIncludedFile => continue, + else => return self.addErrorDetailsAndFail(self.state.lookahead_lexer.getErrorDetails(err)), + }; + }; + } + + fn tokenSlice(self: *Self) []const u8 { + return self.state.token.slice(self.lexer.buffer); + } + + /// Check that the current token is something that can be used as an ID + fn checkId(self: *Self) !void { + switch (self.state.token.id) { + .literal => {}, + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = .literal }, + }); + }, + } + } + + fn check(self: *Self, expected_token_id: Token.Id) !void { + if (self.state.token.id != expected_token_id) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = expected_token_id }, + }); + } + } + + fn checkResource(self: *Self) !Resource { + switch (self.state.token.id) { + .literal => return Resource.fromString(.{ + .slice = self.state.token.slice(self.lexer.buffer), + .code_page = self.lexer.current_code_page, + }), + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = .literal }, + }); + }, + } + } +}; diff --git a/src/resinator/rc.zig b/src/resinator/rc.zig new file mode 100644 index 000000000000..00cb455058bd --- /dev/null +++ b/src/resinator/rc.zig @@ -0,0 +1,407 @@ +const std = @import("std"); +const utils = @import("utils.zig"); +const res = @import("res.zig"); +const SourceBytes = @import("literals.zig").SourceBytes; + +// https://learn.microsoft.com/en-us/windows/win32/menurc/about-resource-files + +pub const Resource = enum { + accelerators, + bitmap, + cursor, + dialog, + dialogex, + /// As far as I can tell, this is undocumented; the most I could find was this: + /// https://www.betaarchive.com/wiki/index.php/Microsoft_KB_Archive/91697 + dlginclude, + /// Undocumented, basically works exactly like RCDATA + dlginit, + font, + html, + icon, + menu, + menuex, + messagetable, + plugplay, // Obsolete + rcdata, + stringtable, + /// Undocumented + toolbar, + user_defined, + versioninfo, + vxd, // Obsolete + + // Types that are treated as a user-defined type when encountered, but have + // special meaning without the Visual Studio GUI. We match the Win32 RC compiler + // behavior by acting as if these keyword don't exist when compiling the .rc + // (thereby treating them as user-defined). + //textinclude, // A special resource that is interpreted by Visual C++. + //typelib, // A special resource that is used with the /TLBID and /TLBOUT linker options + + // Types that can only be specified by numbers, they don't have keywords + cursor_num, + icon_num, + string_num, + anicursor_num, + aniicon_num, + fontdir_num, + manifest_num, + + const map = std.ComptimeStringMapWithEql(Resource, .{ + .{ "ACCELERATORS", .accelerators }, + .{ "BITMAP", .bitmap }, + .{ "CURSOR", .cursor }, + .{ "DIALOG", .dialog }, + .{ "DIALOGEX", .dialogex }, + .{ "DLGINCLUDE", .dlginclude }, + .{ "DLGINIT", .dlginit }, + .{ "FONT", .font }, + .{ "HTML", .html }, + .{ "ICON", .icon }, + .{ "MENU", .menu }, + .{ "MENUEX", .menuex }, + .{ "MESSAGETABLE", .messagetable }, + .{ "PLUGPLAY", .plugplay }, + .{ "RCDATA", .rcdata }, + .{ "STRINGTABLE", .stringtable }, + .{ "TOOLBAR", .toolbar }, + .{ "VERSIONINFO", .versioninfo }, + .{ "VXD", .vxd }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + pub fn fromString(bytes: SourceBytes) Resource { + const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(bytes); + if (maybe_ordinal) |ordinal| { + if (ordinal.ordinal >= 256) return .user_defined; + return fromRT(@enumFromInt(ordinal.ordinal)); + } + return map.get(bytes.slice) orelse .user_defined; + } + + // TODO: Some comptime validation that RT <-> Resource conversion is synced? + pub fn fromRT(rt: res.RT) Resource { + return switch (rt) { + .ACCELERATOR => .accelerators, + .ANICURSOR => .anicursor_num, + .ANIICON => .aniicon_num, + .BITMAP => .bitmap, + .CURSOR => .cursor_num, + .DIALOG => .dialog, + .DLGINCLUDE => .dlginclude, + .DLGINIT => .dlginit, + .FONT => .font, + .FONTDIR => .fontdir_num, + .GROUP_CURSOR => .cursor, + .GROUP_ICON => .icon, + .HTML => .html, + .ICON => .icon_num, + .MANIFEST => .manifest_num, + .MENU => .menu, + .MESSAGETABLE => .messagetable, + .PLUGPLAY => .plugplay, + .RCDATA => .rcdata, + .STRING => .string_num, + .TOOLBAR => .toolbar, + .VERSION => .versioninfo, + .VXD => .vxd, + _ => .user_defined, + }; + } + + pub fn canUseRawData(resource: Resource) bool { + return switch (resource) { + .user_defined, + .html, + .plugplay, // Obsolete + .rcdata, + .vxd, // Obsolete + .manifest_num, + .dlginit, + => true, + else => false, + }; + } + + pub fn nameForErrorDisplay(resource: Resource) []const u8 { + return switch (resource) { + // zig fmt: off + .accelerators, .bitmap, .cursor, .dialog, .dialogex, .dlginclude, .dlginit, .font, + .html, .icon, .menu, .menuex, .messagetable, .plugplay, .rcdata, .stringtable, + .toolbar, .versioninfo, .vxd => @tagName(resource), + // zig fmt: on + .user_defined => "user-defined", + .cursor_num => std.fmt.comptimePrint("{d} (cursor)", .{@intFromEnum(res.RT.CURSOR)}), + .icon_num => std.fmt.comptimePrint("{d} (icon)", .{@intFromEnum(res.RT.ICON)}), + .string_num => std.fmt.comptimePrint("{d} (string)", .{@intFromEnum(res.RT.STRING)}), + .anicursor_num => std.fmt.comptimePrint("{d} (anicursor)", .{@intFromEnum(res.RT.ANICURSOR)}), + .aniicon_num => std.fmt.comptimePrint("{d} (aniicon)", .{@intFromEnum(res.RT.ANIICON)}), + .fontdir_num => std.fmt.comptimePrint("{d} (fontdir)", .{@intFromEnum(res.RT.FONTDIR)}), + .manifest_num => std.fmt.comptimePrint("{d} (manifest)", .{@intFromEnum(res.RT.MANIFEST)}), + }; + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/menurc/stringtable-resource#parameters +/// https://learn.microsoft.com/en-us/windows/win32/menurc/dialog-resource#parameters +/// https://learn.microsoft.com/en-us/windows/win32/menurc/dialogex-resource#parameters +pub const OptionalStatements = enum { + characteristics, + language, + version, + + // DIALOG + caption, + class, + exstyle, + font, + menu, + style, + + pub const map = std.ComptimeStringMapWithEql(OptionalStatements, .{ + .{ "CHARACTERISTICS", .characteristics }, + .{ "LANGUAGE", .language }, + .{ "VERSION", .version }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + pub const dialog_map = std.ComptimeStringMapWithEql(OptionalStatements, .{ + .{ "CAPTION", .caption }, + .{ "CLASS", .class }, + .{ "EXSTYLE", .exstyle }, + .{ "FONT", .font }, + .{ "MENU", .menu }, + .{ "STYLE", .style }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const Control = enum { + auto3state, + autocheckbox, + autoradiobutton, + checkbox, + combobox, + control, + ctext, + defpushbutton, + edittext, + hedit, + iedit, + groupbox, + icon, + listbox, + ltext, + pushbox, + pushbutton, + radiobutton, + rtext, + scrollbar, + state3, + userbutton, + + pub const map = std.ComptimeStringMapWithEql(Control, .{ + .{ "AUTO3STATE", .auto3state }, + .{ "AUTOCHECKBOX", .autocheckbox }, + .{ "AUTORADIOBUTTON", .autoradiobutton }, + .{ "CHECKBOX", .checkbox }, + .{ "COMBOBOX", .combobox }, + .{ "CONTROL", .control }, + .{ "CTEXT", .ctext }, + .{ "DEFPUSHBUTTON", .defpushbutton }, + .{ "EDITTEXT", .edittext }, + .{ "HEDIT", .hedit }, + .{ "IEDIT", .iedit }, + .{ "GROUPBOX", .groupbox }, + .{ "ICON", .icon }, + .{ "LISTBOX", .listbox }, + .{ "LTEXT", .ltext }, + .{ "PUSHBOX", .pushbox }, + .{ "PUSHBUTTON", .pushbutton }, + .{ "RADIOBUTTON", .radiobutton }, + .{ "RTEXT", .rtext }, + .{ "SCROLLBAR", .scrollbar }, + .{ "STATE3", .state3 }, + .{ "USERBUTTON", .userbutton }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + pub fn hasTextParam(control: Control) bool { + switch (control) { + .scrollbar, .listbox, .iedit, .hedit, .edittext, .combobox => return false, + else => return true, + } + } +}; + +pub const ControlClass = struct { + pub const map = std.ComptimeStringMapWithEql(res.ControlClass, .{ + .{ "BUTTON", .button }, + .{ "EDIT", .edit }, + .{ "STATIC", .static }, + .{ "LISTBOX", .listbox }, + .{ "SCROLLBAR", .scrollbar }, + .{ "COMBOBOX", .combobox }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + /// Like `map.get` but works on WTF16 strings, for use with parsed + /// string literals ("BUTTON", or even "\x42UTTON") + pub fn fromWideString(str: []const u16) ?res.ControlClass { + const utf16Literal = std.unicode.utf8ToUtf16LeStringLiteral; + return if (ascii.eqlIgnoreCaseW(str, utf16Literal("BUTTON"))) + .button + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("EDIT"))) + .edit + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("STATIC"))) + .static + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("LISTBOX"))) + .listbox + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("SCROLLBAR"))) + .scrollbar + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("COMBOBOX"))) + .combobox + else + null; + } +}; + +const ascii = struct { + /// Compares ASCII values case-insensitively, non-ASCII values are compared directly + pub fn eqlIgnoreCaseW(a: []const u16, b: []const u16) bool { + if (a.len != b.len) return false; + for (a, b) |a_c, b_c| { + if (a_c < 128) { + if (std.ascii.toLower(@intCast(a_c)) != std.ascii.toLower(@intCast(b_c))) return false; + } else { + if (a_c != b_c) return false; + } + } + return true; + } +}; + +pub const MenuItem = enum { + menuitem, + popup, + + pub const map = std.ComptimeStringMapWithEql(MenuItem, .{ + .{ "MENUITEM", .menuitem }, + .{ "POPUP", .popup }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + pub fn isSeparator(bytes: []const u8) bool { + return std.ascii.eqlIgnoreCase(bytes, "SEPARATOR"); + } + + pub const Option = enum { + checked, + grayed, + help, + inactive, + menubarbreak, + menubreak, + + pub const map = std.ComptimeStringMapWithEql(Option, .{ + .{ "CHECKED", .checked }, + .{ "GRAYED", .grayed }, + .{ "HELP", .help }, + .{ "INACTIVE", .inactive }, + .{ "MENUBARBREAK", .menubarbreak }, + .{ "MENUBREAK", .menubreak }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + }; +}; + +pub const ToolbarButton = enum { + button, + separator, + + pub const map = std.ComptimeStringMapWithEql(ToolbarButton, .{ + .{ "BUTTON", .button }, + .{ "SEPARATOR", .separator }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const VersionInfo = enum { + file_version, + product_version, + file_flags_mask, + file_flags, + file_os, + file_type, + file_subtype, + + pub const map = std.ComptimeStringMapWithEql(VersionInfo, .{ + .{ "FILEVERSION", .file_version }, + .{ "PRODUCTVERSION", .product_version }, + .{ "FILEFLAGSMASK", .file_flags_mask }, + .{ "FILEFLAGS", .file_flags }, + .{ "FILEOS", .file_os }, + .{ "FILETYPE", .file_type }, + .{ "FILESUBTYPE", .file_subtype }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const VersionBlock = enum { + block, + value, + + pub const map = std.ComptimeStringMapWithEql(VersionBlock, .{ + .{ "BLOCK", .block }, + .{ "VALUE", .value }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +/// Keywords that are be the first token in a statement and (if so) dictate how the rest +/// of the statement is parsed. +pub const TopLevelKeywords = enum { + language, + version, + characteristics, + stringtable, + + pub const map = std.ComptimeStringMapWithEql(TopLevelKeywords, .{ + .{ "LANGUAGE", .language }, + .{ "VERSION", .version }, + .{ "CHARACTERISTICS", .characteristics }, + .{ "STRINGTABLE", .stringtable }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const CommonResourceAttributes = enum { + preload, + loadoncall, + fixed, + moveable, + discardable, + pure, + impure, + shared, + nonshared, + + pub const map = std.ComptimeStringMapWithEql(CommonResourceAttributes, .{ + .{ "PRELOAD", .preload }, + .{ "LOADONCALL", .loadoncall }, + .{ "FIXED", .fixed }, + .{ "MOVEABLE", .moveable }, + .{ "DISCARDABLE", .discardable }, + .{ "PURE", .pure }, + .{ "IMPURE", .impure }, + .{ "SHARED", .shared }, + .{ "NONSHARED", .nonshared }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const AcceleratorTypeAndOptions = enum { + virtkey, + ascii, + noinvert, + alt, + shift, + control, + + pub const map = std.ComptimeStringMapWithEql(AcceleratorTypeAndOptions, .{ + .{ "VIRTKEY", .virtkey }, + .{ "ASCII", .ascii }, + .{ "NOINVERT", .noinvert }, + .{ "ALT", .alt }, + .{ "SHIFT", .shift }, + .{ "CONTROL", .control }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; diff --git a/src/resinator/res.zig b/src/resinator/res.zig new file mode 100644 index 000000000000..48edeeccbcf8 --- /dev/null +++ b/src/resinator/res.zig @@ -0,0 +1,1108 @@ +const std = @import("std"); +const rc = @import("rc.zig"); +const Resource = rc.Resource; +const CommonResourceAttributes = rc.CommonResourceAttributes; +const Allocator = std.mem.Allocator; +const windows1252 = @import("windows1252.zig"); +const CodePage = @import("code_pages.zig").CodePage; +const literals = @import("literals.zig"); +const SourceBytes = literals.SourceBytes; +const Codepoint = @import("code_pages.zig").Codepoint; +const lang = @import("lang.zig"); +const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit; + +/// https://learn.microsoft.com/en-us/windows/win32/menurc/resource-types +pub const RT = enum(u8) { + ACCELERATOR = 9, + ANICURSOR = 21, + ANIICON = 22, + BITMAP = 2, + CURSOR = 1, + DIALOG = 5, + DLGINCLUDE = 17, + DLGINIT = 240, + FONT = 8, + FONTDIR = 7, + GROUP_CURSOR = 1 + 11, // CURSOR + 11 + GROUP_ICON = 3 + 11, // ICON + 11 + HTML = 23, + ICON = 3, + MANIFEST = 24, + MENU = 4, + MESSAGETABLE = 11, + PLUGPLAY = 19, + RCDATA = 10, + STRING = 6, + TOOLBAR = 241, + VERSION = 16, + VXD = 20, + _, + + /// Returns null if the resource type is user-defined + /// Asserts that the resource is not `stringtable` + pub fn fromResource(resource: Resource) ?RT { + return switch (resource) { + .accelerators => .ACCELERATOR, + .bitmap => .BITMAP, + .cursor => .GROUP_CURSOR, + .dialog => .DIALOG, + .dialogex => .DIALOG, + .dlginclude => .DLGINCLUDE, + .dlginit => .DLGINIT, + .font => .FONT, + .html => .HTML, + .icon => .GROUP_ICON, + .menu => .MENU, + .menuex => .MENU, + .messagetable => .MESSAGETABLE, + .plugplay => .PLUGPLAY, + .rcdata => .RCDATA, + .stringtable => unreachable, + .toolbar => .TOOLBAR, + .user_defined => null, + .versioninfo => .VERSION, + .vxd => .VXD, + + .cursor_num => .CURSOR, + .icon_num => .ICON, + .string_num => .STRING, + .anicursor_num => .ANICURSOR, + .aniicon_num => .ANIICON, + .fontdir_num => .FONTDIR, + .manifest_num => .MANIFEST, + }; + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/menurc/common-resource-attributes +/// https://learn.microsoft.com/en-us/windows/win32/menurc/resourceheader +pub const MemoryFlags = packed struct(u16) { + value: u16, + + pub const MOVEABLE: u16 = 0x10; + // TODO: SHARED and PURE seem to be the same thing? Testing seems to confirm this but + // would like to find mention of it somewhere. + pub const SHARED: u16 = 0x20; + pub const PURE: u16 = 0x20; + pub const PRELOAD: u16 = 0x40; + pub const DISCARDABLE: u16 = 0x1000; + + /// Note: The defaults can have combinations that are not possible to specify within + /// an .rc file, as the .rc attributes imply other values (i.e. specifying + /// DISCARDABLE always implies MOVEABLE and PURE/SHARED, and yet RT_ICON + /// has a default of only MOVEABLE | DISCARDABLE). + pub fn defaults(predefined_resource_type: ?RT) MemoryFlags { + if (predefined_resource_type == null) { + return MemoryFlags{ .value = MOVEABLE | SHARED }; + } else { + return switch (predefined_resource_type.?) { + // zig fmt: off + .RCDATA, .BITMAP, .HTML, .MANIFEST, + .ACCELERATOR, .VERSION, .MESSAGETABLE, + .DLGINIT, .TOOLBAR, .PLUGPLAY, + .VXD, => MemoryFlags{ .value = MOVEABLE | SHARED }, + + .GROUP_ICON, .GROUP_CURSOR, + .STRING, .FONT, .DIALOG, .MENU, + .DLGINCLUDE, => MemoryFlags{ .value = MOVEABLE | SHARED | DISCARDABLE }, + + .ICON, .CURSOR, .ANIICON, .ANICURSOR => MemoryFlags{ .value = MOVEABLE | DISCARDABLE }, + .FONTDIR => MemoryFlags{ .value = MOVEABLE | PRELOAD }, + // zig fmt: on + // Same as predefined_resource_type == null + _ => return MemoryFlags{ .value = MOVEABLE | SHARED }, + }; + } + } + + pub fn set(self: *MemoryFlags, attribute: CommonResourceAttributes) void { + switch (attribute) { + .preload => self.value |= PRELOAD, + .loadoncall => self.value &= ~PRELOAD, + .moveable => self.value |= MOVEABLE, + .fixed => self.value &= ~(MOVEABLE | DISCARDABLE), + .shared => self.value |= SHARED, + .nonshared => self.value &= ~(SHARED | DISCARDABLE), + .pure => self.value |= PURE, + .impure => self.value &= ~(PURE | DISCARDABLE), + .discardable => self.value |= DISCARDABLE | MOVEABLE | PURE, + } + } + + pub fn setGroup(self: *MemoryFlags, attribute: CommonResourceAttributes, implied_shared_or_pure: bool) void { + switch (attribute) { + .preload => { + self.value |= PRELOAD; + if (implied_shared_or_pure) self.value &= ~SHARED; + }, + .loadoncall => { + self.value &= ~PRELOAD; + if (implied_shared_or_pure) self.value |= SHARED; + }, + else => self.set(attribute), + } + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers +pub const Language = packed struct(u16) { + // Note: This is the default no matter what locale the current system is set to, + // e.g. even if the system's locale is en-GB, en-US will still be the + // default language for resources in the Win32 rc compiler. + primary_language_id: u10 = lang.LANG_ENGLISH, + sublanguage_id: u6 = lang.SUBLANG_ENGLISH_US, + + /// Default language ID as a u16 + pub const default: u16 = (Language{}).asInt(); + + pub fn fromInt(int: u16) Language { + return @bitCast(int); + } + + pub fn asInt(self: Language) u16 { + return @bitCast(self); + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-dlgitemtemplate#remarks +pub const ControlClass = enum(u16) { + button = 0x80, + edit = 0x81, + static = 0x82, + listbox = 0x83, + scrollbar = 0x84, + combobox = 0x85, + + pub fn fromControl(control: rc.Control) ?ControlClass { + return switch (control) { + // zig fmt: off + .auto3state, .autocheckbox, .autoradiobutton, + .checkbox, .defpushbutton, .groupbox, .pushbox, + .pushbutton, .radiobutton, .state3, .userbutton => .button, + // zig fmt: on + .combobox => .combobox, + .control => null, + .ctext, .icon, .ltext, .rtext => .static, + .edittext, .hedit, .iedit => .edit, + .listbox => .listbox, + .scrollbar => .scrollbar, + }; + } + + pub fn getImpliedStyle(control: rc.Control) u32 { + var style = WS.CHILD | WS.VISIBLE; + switch (control) { + .auto3state => style |= BS.AUTO3STATE | WS.TABSTOP, + .autocheckbox => style |= BS.AUTOCHECKBOX | WS.TABSTOP, + .autoradiobutton => style |= BS.AUTORADIOBUTTON, + .checkbox => style |= BS.CHECKBOX | WS.TABSTOP, + .combobox => {}, + .control => {}, + .ctext => style |= SS.CENTER | WS.GROUP, + .defpushbutton => style |= BS.DEFPUSHBUTTON | WS.TABSTOP, + .edittext, .hedit, .iedit => style |= WS.TABSTOP | WS.BORDER, + .groupbox => style |= BS.GROUPBOX, + .icon => style |= SS.ICON, + .listbox => style |= LBS.NOTIFY | WS.BORDER, + .ltext => style |= WS.GROUP, + .pushbox => style |= BS.PUSHBOX | WS.TABSTOP, + .pushbutton => style |= WS.TABSTOP, + .radiobutton => style |= BS.RADIOBUTTON, + .rtext => style |= SS.RIGHT | WS.GROUP, + .scrollbar => {}, + .state3 => style |= BS.@"3STATE" | WS.TABSTOP, + .userbutton => style |= BS.USERBUTTON | WS.TABSTOP, + } + return style; + } +}; + +pub const NameOrOrdinal = union(enum) { + name: [:0]const u16, + ordinal: u16, + + pub fn deinit(self: NameOrOrdinal, allocator: Allocator) void { + switch (self) { + .name => |name| { + allocator.free(name); + }, + .ordinal => {}, + } + } + + /// Returns the full length of the amount of bytes that would be written by `write` + /// (e.g. for an ordinal it will return the length including the 0xFFFF indicator) + pub fn byteLen(self: NameOrOrdinal) usize { + switch (self) { + .name => |name| { + // + 1 for 0-terminated + return (name.len + 1) * @sizeOf(u16); + }, + .ordinal => return 4, + } + } + + pub fn write(self: NameOrOrdinal, writer: anytype) !void { + switch (self) { + .name => |name| { + for (name[0 .. name.len + 1]) |code_unit| { + try writer.writeIntLittle(u16, code_unit); + } + }, + .ordinal => |ordinal| { + try writer.writeIntLittle(u16, 0xffff); + try writer.writeIntLittle(u16, ordinal); + }, + } + } + + pub fn writeEmpty(writer: anytype) !void { + try writer.writeIntLittle(u16, 0); + } + + pub fn fromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal { + if (maybeOrdinalFromString(bytes)) |ordinal| { + return ordinal; + } + return nameFromString(allocator, bytes); + } + + pub fn nameFromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal { + // Names have a limit of 256 UTF-16 code units + null terminator + var buf = try std.ArrayList(u16).initCapacity(allocator, @min(257, bytes.slice.len)); + errdefer buf.deinit(); + + var i: usize = 0; + while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) { + if (buf.items.len == 256) break; + + const c = codepoint.value; + if (c == Codepoint.invalid) { + try buf.append(std.mem.nativeToLittle(u16, '�')); + } else if (c < 0x7F) { + // ASCII chars in names are always converted to uppercase + try buf.append(std.ascii.toUpper(@intCast(c))); + } else if (c < 0x10000) { + const short: u16 = @intCast(c); + try buf.append(std.mem.nativeToLittle(u16, short)); + } else { + const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800; + try buf.append(std.mem.nativeToLittle(u16, high)); + + // Note: This can cut-off in the middle of a UTF-16 surrogate pair, + // i.e. it can make the string end with an unpaired high surrogate + if (buf.items.len == 256) break; + + const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00; + try buf.append(std.mem.nativeToLittle(u16, low)); + } + } + + return NameOrOrdinal{ .name = try buf.toOwnedSliceSentinel(0) }; + } + + /// Returns `null` if the bytes do not form a valid number. + /// Does not allow non-ASCII digits (which the Win32 RC compiler does allow + /// in base 10 numbers, see `maybeNonAsciiOrdinalFromString`). + pub fn maybeOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal { + var buf = bytes.slice; + var radix: u8 = 10; + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + '0'...'9' => {}, + 'x', 'X' => { + radix = 16; + buf = buf[2..]; + // only the first 4 hex digits matter, anything else is ignored + // i.e. 0x12345 is treated as if it were 0x1234 + buf.len = @min(buf.len, 4); + }, + else => return null, + } + } + + var i: usize = 0; + var result: u16 = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + const digit: u8 = switch (c) { + 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch switch (radix) { + 10 => return null, + // non-hex-digits are treated as a terminator rather than invalidating + // the number (note: if there are no valid hex digits then the result + // will be zero which is not treated as a valid number) + 16 => break, + else => unreachable, + }, + else => if (radix == 10) return null else break, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + // Anything that resolves to zero is not interpretted as a number + if (result == 0) return null; + return NameOrOrdinal{ .ordinal = result }; + } + + /// The Win32 RC compiler uses `iswdigit` for digit detection for base 10 + /// numbers, which means that non-ASCII digits are 'accepted' but handled + /// in a totally unintuitive manner, leading to arbitrary results. + /// + /// This function will return the value that such an ordinal 'would' have + /// if it was run through the Win32 RC compiler. This allows us to disallow + /// non-ASCII digits in number literals but still detect when the Win32 + /// RC compiler would have allowed them, so that a proper warning/error + /// can be emitted. + pub fn maybeNonAsciiOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal { + var buf = bytes.slice; + const radix = 10; + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + // We only care about base 10 numbers here + 'x', 'X' => return null, + else => {}, + } + } + + var i: usize = 0; + var result: u16 = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + const digit: u16 = digit: { + const is_digit = (c >= '0' and c <= '9') or isNonAsciiDigit(c); + if (!is_digit) return null; + break :digit @intCast(c - '0'); + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + // Anything that resolves to zero is not interpretted as a number + if (result == 0) return null; + return NameOrOrdinal{ .ordinal = result }; + } + + pub fn predefinedResourceType(self: NameOrOrdinal) ?RT { + switch (self) { + .ordinal => |ordinal| { + if (ordinal >= 256) return null; + switch (@as(RT, @enumFromInt(ordinal))) { + .ACCELERATOR, + .ANICURSOR, + .ANIICON, + .BITMAP, + .CURSOR, + .DIALOG, + .DLGINCLUDE, + .DLGINIT, + .FONT, + .FONTDIR, + .GROUP_CURSOR, + .GROUP_ICON, + .HTML, + .ICON, + .MANIFEST, + .MENU, + .MESSAGETABLE, + .PLUGPLAY, + .RCDATA, + .STRING, + .TOOLBAR, + .VERSION, + .VXD, + => |rt| return rt, + _ => return null, + } + }, + .name => return null, + } + } +}; + +fn expectNameOrOrdinal(expected: NameOrOrdinal, actual: NameOrOrdinal) !void { + switch (expected) { + .name => { + if (actual != .name) return error.TestExpectedEqual; + try std.testing.expectEqualSlices(u16, expected.name, actual.name); + }, + .ordinal => { + if (actual != .ordinal) return error.TestExpectedEqual; + try std.testing.expectEqual(expected.ordinal, actual.ordinal); + }, + } +} + +test "NameOrOrdinal" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const allocator = arena.allocator(); + + // zero is treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0", .code_page = .windows1252 }), + ); + // any non-digit byte invalidates the number + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1A") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1a", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1ÿ") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1\xff", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1€") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1€", .code_page = .utf8 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1�") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1\x80", .code_page = .utf8 }), + ); + // same with overflow that resolves to 0 + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("65536") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "65536", .code_page = .windows1252 }), + ); + // hex zero is also treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0X0") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x0", .code_page = .windows1252 }), + ); + // hex numbers work + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x100 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x100", .code_page = .windows1252 }), + ); + // only the first 4 hex digits matter + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x1234 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0X12345", .code_page = .windows1252 }), + ); + // octal is not supported so it gets treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0O1234") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0o1234", .code_page = .windows1252 }), + ); + // overflow wraps + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = @truncate(65635) }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "65635", .code_page = .windows1252 }), + ); + // non-hex-digits in a hex literal are treated as a terminator + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x4 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x4n", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0xFA }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0xFAZ92348", .code_page = .windows1252 }), + ); + // 0 at the start is allowed + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 50 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "050", .code_page = .windows1252 }), + ); + // limit of 256 UTF-16 code units, can cut off between a surrogate pair + { + var expected = blk: { + // the input before the 𐐷 character, but uppercased + var expected_u8_bytes = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528QFFL7SHNSIETG0QKLR1UYPBTUV1PMFQRRA0VJDG354GQEDJMUPGPP1W1EXVNTZVEIZ6K3IPQM1AWGEYALMEODYVEZGOD3MFMGEY8FNR4JUETTB1PZDEWSNDRGZUA8SNXP3NGO"; + var buf: [256:0]u16 = undefined; + for (expected_u8_bytes, 0..) |byte, i| { + buf[i] = byte; + } + // surrogate pair that is now orphaned + buf[255] = 0xD801; + break :blk buf; + }; + try expectNameOrOrdinal( + NameOrOrdinal{ .name = &expected }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528qffL7ShnSIETg0qkLr1UYpbtuv1PMFQRRa0VjDG354GQedJmUPgpp1w1ExVnTzVEiz6K3iPqM1AWGeYALmeODyvEZGOD3MfmGey8fnR4jUeTtB1PzdeWsNDrGzuA8Snxp3NGO𐐷", + .code_page = .utf8, + }), + ); + } +} + +test "NameOrOrdinal code page awareness" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const allocator = arena.allocator(); + + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("��𐐷") }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "\xF0\x80\x80𐐷", + .code_page = .utf8, + }), + ); + try expectNameOrOrdinal( + // The UTF-8 representation of 𐐷 is 0xF0 0x90 0x90 0xB7. In order to provide valid + // UTF-8 to utf8ToUtf16LeStringLiteral, it uses the UTF-8 representation of the codepoint + // which is 0xC2 0x90. The code units in the expected UTF-16 string are: + // { 0x00F0, 0x20AC, 0x20AC, 0x00F0, 0x0090, 0x0090, 0x00B7 } + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("ð€€ð\xC2\x90\xC2\x90·") }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "\xF0\x80\x80𐐷", + .code_page = .windows1252, + }), + ); +} + +/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-accel#members +/// https://devblogs.microsoft.com/oldnewthing/20070316-00/?p=27593 +pub const AcceleratorModifiers = struct { + value: u8 = 0, + explicit_ascii_or_virtkey: bool = false, + + pub const ASCII = 0; + pub const VIRTKEY = 1; + pub const NOINVERT = 1 << 1; + pub const SHIFT = 1 << 2; + pub const CONTROL = 1 << 3; + pub const ALT = 1 << 4; + /// Marker for the last accelerator in an accelerator table + pub const last_accelerator_in_table = 1 << 7; + + pub fn apply(self: *AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) void { + if (modifier == .ascii or modifier == .virtkey) self.explicit_ascii_or_virtkey = true; + self.value |= modifierValue(modifier); + } + + pub fn isSet(self: AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) bool { + // ASCII is set whenever VIRTKEY is not + if (modifier == .ascii) return self.value & modifierValue(.virtkey) == 0; + return self.value & modifierValue(modifier) != 0; + } + + fn modifierValue(modifier: rc.AcceleratorTypeAndOptions) u8 { + return switch (modifier) { + .ascii => ASCII, + .virtkey => VIRTKEY, + .noinvert => NOINVERT, + .shift => SHIFT, + .control => CONTROL, + .alt => ALT, + }; + } + + pub fn markLast(self: *AcceleratorModifiers) void { + self.value |= last_accelerator_in_table; + } +}; + +const AcceleratorKeyCodepointTranslator = struct { + string_type: literals.StringType, + + pub fn translate(self: @This(), maybe_parsed: ?literals.IterativeStringParser.ParsedCodepoint) ?u21 { + const parsed = maybe_parsed orelse return null; + if (parsed.codepoint == Codepoint.invalid) return 0xFFFD; + if (parsed.from_escaped_integer and self.string_type == .ascii) { + return windows1252.toCodepoint(@intCast(parsed.codepoint)); + } + return parsed.codepoint; + } +}; + +pub const ParseAcceleratorKeyStringError = error{ EmptyAccelerator, AcceleratorTooLong, InvalidControlCharacter, ControlCharacterOutOfRange }; + +/// Expects bytes to be the full bytes of a string literal token (e.g. including the "" or L""). +pub fn parseAcceleratorKeyString(bytes: SourceBytes, is_virt: bool, options: literals.StringParseOptions) (ParseAcceleratorKeyStringError || Allocator.Error)!u16 { + if (bytes.slice.len == 0) { + return error.EmptyAccelerator; + } + + var parser = literals.IterativeStringParser.init(bytes, options); + var translator = AcceleratorKeyCodepointTranslator{ .string_type = parser.declared_string_type }; + + const first_codepoint = translator.translate(try parser.next()) orelse return error.EmptyAccelerator; + // 0 is treated as a terminator, so this is equivalent to an empty string + if (first_codepoint == 0) return error.EmptyAccelerator; + + if (first_codepoint == '^') { + // Note: Emitting this warning unconditonally whenever ^ is the first character + // matches the Win32 RC behavior, but it's questionable whether or not + // the warning should be emitted for ^^ since that results in the ASCII + // character ^ being written to the .res. + if (is_virt and options.diagnostics != null) { + try options.diagnostics.?.diagnostics.append(.{ + .err = .ascii_character_not_equivalent_to_virtual_key_code, + .type = .warning, + .token = options.diagnostics.?.token, + }); + } + + const c = translator.translate(try parser.next()) orelse return error.InvalidControlCharacter; + switch (c) { + '^' => return '^', // special case + 'a'...'z', 'A'...'Z' => return std.ascii.toUpper(@intCast(c)) - 0x40, + // Note: The Windows RC compiler allows more than just A-Z, but what it allows + // seems to be tied to some sort of Unicode-aware 'is character' function or something. + // The full list of codepoints that trigger an out-of-range error can be found here: + // https://gist.github.com/squeek502/2e9d0a4728a83eed074ad9785a209fd0 + // For codepoints >= 0x80 that don't trigger the error, the Windows RC compiler takes the + // codepoint and does the `- 0x40` transformation as if it were A-Z which couldn't lead + // to anything useable, so there's no point in emulating that behavior--erroring for + // all non-[a-zA-Z] makes much more sense and is what was probably intended by the + // Windows RC compiler. + else => return error.ControlCharacterOutOfRange, + } + @compileError("this should be unreachable"); + } + + const second_codepoint = translator.translate(try parser.next()); + + var result: u32 = initial_value: { + if (first_codepoint >= 0x10000) { + if (second_codepoint != null and second_codepoint.? != 0) return error.AcceleratorTooLong; + // No idea why it works this way, but this seems to match the Windows RC + // behavior for codepoints >= 0x10000 + const low = @as(u16, @intCast(first_codepoint & 0x3FF)) + 0xDC00; + const extra = (first_codepoint - 0x10000) / 0x400; + break :initial_value low + extra * 0x100; + } + break :initial_value first_codepoint; + }; + + // 0 is treated as a terminator + if (second_codepoint != null and second_codepoint.? == 0) return @truncate(result); + + const third_codepoint = translator.translate(try parser.next()); + // 0 is treated as a terminator, so a 0 in the third position is fine but + // anything else is too many codepoints for an accelerator + if (third_codepoint != null and third_codepoint.? != 0) return error.AcceleratorTooLong; + + if (second_codepoint) |c| { + if (c >= 0x10000) return error.AcceleratorTooLong; + result <<= 8; + result += c; + } else if (is_virt) { + switch (result) { + 'a'...'z' => result -= 0x20, // toUpper + else => {}, + } + } + return @truncate(result); +} + +test "accelerator keys" { + try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString( + .{ .slice = "\"^a\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString( + .{ .slice = "\"^A\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 26), try parseAcceleratorKeyString( + .{ .slice = "\"^Z\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, '^'), try parseAcceleratorKeyString( + .{ .slice = "\"^^\"", .code_page = .windows1252 }, + false, + .{}, + )); + + try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString( + .{ .slice = "\"a\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x6162), try parseAcceleratorKeyString( + .{ .slice = "\"ab\"", .code_page = .windows1252 }, + false, + .{}, + )); + + try std.testing.expectEqual(@as(u16, 'C'), try parseAcceleratorKeyString( + .{ .slice = "\"c\"", .code_page = .windows1252 }, + true, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x6363), try parseAcceleratorKeyString( + .{ .slice = "\"cc\"", .code_page = .windows1252 }, + true, + .{}, + )); + + // \x00 or any escape that evaluates to zero acts as a terminator, everything past it + // is ignored + try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString( + .{ .slice = "\"a\\0bcdef\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // \x80 is € in Windows-1252, which is Unicode codepoint 20AC + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // This depends on the code page, though, with codepage 65001, \x80 + // on its own is invalid UTF-8 so it gets converted to the replacement character + try std.testing.expectEqual(@as(u16, 0xFFFD), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // This also behaves the same with escaped characters + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // Even with utf8 code page + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // Wide string with the actual characters behaves like the ASCII string version + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "L\"\x80\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // But wide string with escapes behaves differently + try std.testing.expectEqual(@as(u16, 0x8080), try parseAcceleratorKeyString( + .{ .slice = "L\"\\x80\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // and invalid escapes within wide strings get skipped + try std.testing.expectEqual(@as(u16, 'z'), try parseAcceleratorKeyString( + .{ .slice = "L\"\\Hz\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // any non-A-Z codepoints are illegal + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^\x83\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^1\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.InvalidControlCharacter, parseAcceleratorKeyString( + .{ .slice = "\"^\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.EmptyAccelerator, parseAcceleratorKeyString( + .{ .slice = "\"\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"hello\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // Invalid UTF-8 gets converted to 0xFFFD, multiple invalids get shifted and added together + // The behavior is the same for ascii and wide strings + try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString( + .{ .slice = "L\"\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + + // Codepoints >= 0x10000 + try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString( + .{ .slice = "\"\xF0\x90\x84\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString( + .{ .slice = "L\"\xF0\x90\x84\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x9C01), try parseAcceleratorKeyString( + .{ .slice = "\"\xF4\x80\x80\x81\"", .code_page = .utf8 }, + false, + .{}, + )); + // anything before or after a codepoint >= 0x10000 causes an error + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"a\xF0\x90\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"\xF0\x90\x80\x80a\"", .code_page = .utf8 }, + false, + .{}, + )); +} + +pub const ForcedOrdinal = struct { + pub fn fromBytes(bytes: SourceBytes) u16 { + var i: usize = 0; + var result: u21 = 0; + while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) { + const c = switch (codepoint.value) { + // Codepoints that would need a surrogate pair in UTF-16 are + // broken up into their UTF-16 code units and each code unit + // is interpreted as a digit. + 0x10000...0x10FFFF => { + const high = @as(u16, @intCast((codepoint.value - 0x10000) >> 10)) + 0xD800; + if (result != 0) result *%= 10; + result +%= high -% '0'; + + const low = @as(u16, @intCast(codepoint.value & 0x3FF)) + 0xDC00; + if (result != 0) result *%= 10; + result +%= low -% '0'; + continue; + }, + Codepoint.invalid => 0xFFFD, + else => codepoint.value, + }; + if (result != 0) result *%= 10; + result +%= c -% '0'; + } + return @truncate(result); + } + + pub fn fromUtf16Le(utf16: [:0]const u16) u16 { + var result: u16 = 0; + for (utf16) |code_unit| { + if (result != 0) result *%= 10; + result +%= code_unit -% '0'; + } + return result; + } +}; + +test "forced ordinal" { + try std.testing.expectEqual(@as(u16, 3200), ForcedOrdinal.fromBytes(.{ .slice = "3200", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 0x33), ForcedOrdinal.fromBytes(.{ .slice = "1+1", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 65531), ForcedOrdinal.fromBytes(.{ .slice = "1!", .code_page = .windows1252 })); + + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0\x8C", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0Œ", .code_page = .utf8 })); + + // invalid UTF-8 gets converted to 0xFFFD (replacement char) and then interpreted as a digit + try std.testing.expectEqual(@as(u16, 0xFFCD), ForcedOrdinal.fromBytes(.{ .slice = "0\x81", .code_page = .utf8 })); + // codepoints >= 0x10000 + try std.testing.expectEqual(@as(u16, 0x49F2), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10002}", .code_page = .utf8 })); + try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10100}", .code_page = .utf8 })); + + // From UTF-16 + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromUtf16Le(&[_:0]u16{ '0', 'Œ' })); + try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromUtf16Le(std.unicode.utf8ToUtf16LeStringLiteral("0\u{10100}"))); +} + +/// https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo +pub const FixedFileInfo = struct { + file_version: Version = .{}, + product_version: Version = .{}, + file_flags_mask: u32 = 0, + file_flags: u32 = 0, + file_os: u32 = 0, + file_type: u32 = 0, + file_subtype: u32 = 0, + file_date: Version = .{}, // TODO: I think this is always all zeroes? + + pub const signature = 0xFEEF04BD; + // Note: This corresponds to a version of 1.0 + pub const version = 0x00010000; + + pub const byte_len = 0x34; + pub const key = std.unicode.utf8ToUtf16LeStringLiteral("VS_VERSION_INFO"); + + pub const Version = struct { + parts: [4]u16 = [_]u16{0} ** 4, + + pub fn mostSignificantCombinedParts(self: Version) u32 { + return (@as(u32, self.parts[0]) << 16) + self.parts[1]; + } + + pub fn leastSignificantCombinedParts(self: Version) u32 { + return (@as(u32, self.parts[2]) << 16) + self.parts[3]; + } + }; + + pub fn write(self: FixedFileInfo, writer: anytype) !void { + try writer.writeIntLittle(u32, signature); + try writer.writeIntLittle(u32, version); + try writer.writeIntLittle(u32, self.file_version.mostSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.file_version.leastSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.product_version.mostSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.product_version.leastSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.file_flags_mask); + try writer.writeIntLittle(u32, self.file_flags); + try writer.writeIntLittle(u32, self.file_os); + try writer.writeIntLittle(u32, self.file_type); + try writer.writeIntLittle(u32, self.file_subtype); + try writer.writeIntLittle(u32, self.file_date.mostSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.file_date.leastSignificantCombinedParts()); + } +}; + +test "FixedFileInfo.Version" { + const version = FixedFileInfo.Version{ + .parts = .{ 1, 2, 3, 4 }, + }; + try std.testing.expectEqual(@as(u32, 0x00010002), version.mostSignificantCombinedParts()); + try std.testing.expectEqual(@as(u32, 0x00030004), version.leastSignificantCombinedParts()); +} + +pub const VersionNode = struct { + pub const type_string: u16 = 1; + pub const type_binary: u16 = 0; +}; + +pub const MenuItemFlags = struct { + value: u16 = 0, + + pub fn apply(self: *MenuItemFlags, option: rc.MenuItem.Option) void { + self.value |= optionValue(option); + } + + pub fn isSet(self: MenuItemFlags, option: rc.MenuItem.Option) bool { + return self.value & optionValue(option) != 0; + } + + fn optionValue(option: rc.MenuItem.Option) u16 { + return @intCast(switch (option) { + .checked => MF.CHECKED, + .grayed => MF.GRAYED, + .help => MF.HELP, + .inactive => MF.DISABLED, + .menubarbreak => MF.MENUBARBREAK, + .menubreak => MF.MENUBREAK, + }); + } + + pub fn markLast(self: *MenuItemFlags) void { + self.value |= @intCast(MF.END); + } +}; + +/// Menu Flags from WinUser.h +/// This is not complete, it only contains what is needed +pub const MF = struct { + pub const GRAYED: u32 = 0x00000001; + pub const DISABLED: u32 = 0x00000002; + pub const CHECKED: u32 = 0x00000008; + pub const POPUP: u32 = 0x00000010; + pub const MENUBARBREAK: u32 = 0x00000020; + pub const MENUBREAK: u32 = 0x00000040; + pub const HELP: u32 = 0x00004000; + pub const END: u32 = 0x00000080; +}; + +/// Window Styles from WinUser.h +pub const WS = struct { + pub const OVERLAPPED: u32 = 0x00000000; + pub const POPUP: u32 = 0x80000000; + pub const CHILD: u32 = 0x40000000; + pub const MINIMIZE: u32 = 0x20000000; + pub const VISIBLE: u32 = 0x10000000; + pub const DISABLED: u32 = 0x08000000; + pub const CLIPSIBLINGS: u32 = 0x04000000; + pub const CLIPCHILDREN: u32 = 0x02000000; + pub const MAXIMIZE: u32 = 0x01000000; + pub const CAPTION: u32 = BORDER | DLGFRAME; + pub const BORDER: u32 = 0x00800000; + pub const DLGFRAME: u32 = 0x00400000; + pub const VSCROLL: u32 = 0x00200000; + pub const HSCROLL: u32 = 0x00100000; + pub const SYSMENU: u32 = 0x00080000; + pub const THICKFRAME: u32 = 0x00040000; + pub const GROUP: u32 = 0x00020000; + pub const TABSTOP: u32 = 0x00010000; + + pub const MINIMIZEBOX: u32 = 0x00020000; + pub const MAXIMIZEBOX: u32 = 0x00010000; + + pub const TILED: u32 = OVERLAPPED; + pub const ICONIC: u32 = MINIMIZE; + pub const SIZEBOX: u32 = THICKFRAME; + pub const TILEDWINDOW: u32 = OVERLAPPEDWINDOW; + + // Common Window Styles + pub const OVERLAPPEDWINDOW: u32 = OVERLAPPED | CAPTION | SYSMENU | THICKFRAME | MINIMIZEBOX | MAXIMIZEBOX; + pub const POPUPWINDOW: u32 = POPUP | BORDER | SYSMENU; + pub const CHILDWINDOW: u32 = CHILD; +}; + +/// Dialog Box Template Styles from WinUser.h +pub const DS = struct { + pub const SETFONT: u32 = 0x40; +}; + +/// Button Control Styles from WinUser.h +/// This is not complete, it only contains what is needed +pub const BS = struct { + pub const PUSHBUTTON: u32 = 0x00000000; + pub const DEFPUSHBUTTON: u32 = 0x00000001; + pub const CHECKBOX: u32 = 0x00000002; + pub const AUTOCHECKBOX: u32 = 0x00000003; + pub const RADIOBUTTON: u32 = 0x00000004; + pub const @"3STATE": u32 = 0x00000005; + pub const AUTO3STATE: u32 = 0x00000006; + pub const GROUPBOX: u32 = 0x00000007; + pub const USERBUTTON: u32 = 0x00000008; + pub const AUTORADIOBUTTON: u32 = 0x00000009; + pub const PUSHBOX: u32 = 0x0000000A; + pub const OWNERDRAW: u32 = 0x0000000B; + pub const TYPEMASK: u32 = 0x0000000F; + pub const LEFTTEXT: u32 = 0x00000020; +}; + +/// Static Control Constants from WinUser.h +/// This is not complete, it only contains what is needed +pub const SS = struct { + pub const LEFT: u32 = 0x00000000; + pub const CENTER: u32 = 0x00000001; + pub const RIGHT: u32 = 0x00000002; + pub const ICON: u32 = 0x00000003; +}; + +/// Listbox Styles from WinUser.h +/// This is not complete, it only contains what is needed +pub const LBS = struct { + pub const NOTIFY: u32 = 0x0001; +}; diff --git a/src/resinator/source_mapping.zig b/src/resinator/source_mapping.zig new file mode 100644 index 000000000000..babd41295b99 --- /dev/null +++ b/src/resinator/source_mapping.zig @@ -0,0 +1,684 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter; +const parseQuotedAsciiString = @import("literals.zig").parseQuotedAsciiString; +const lex = @import("lex.zig"); + +pub const ParseLineCommandsResult = struct { + result: []u8, + mappings: SourceMappings, +}; + +const CurrentMapping = struct { + line_num: usize = 1, + filename: std.ArrayListUnmanaged(u8) = .{}, + pending: bool = true, + ignore_contents: bool = false, +}; + +pub const ParseAndRemoveLineCommandsOptions = struct { + initial_filename: ?[]const u8 = null, +}; + +/// Parses and removes #line commands as well as all source code that is within a file +/// with .c or .h extensions. +/// +/// > RC treats files with the .c and .h extensions in a special manner. It +/// > assumes that a file with one of these extensions does not contain +/// > resources. If a file has the .c or .h file name extension, RC ignores all +/// > lines in the file except the preprocessor directives. Therefore, to +/// > include a file that contains resources in another resource script, give +/// > the file to be included an extension other than .c or .h. +/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives +/// +/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping +/// between the lines and their corresponding lines in their original files. +/// +/// `buf` must be at least as long as `source` +/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) +/// +/// If `options.initial_filename` is provided, that filename is guaranteed to be +/// within the `mappings.files` table and `root_filename_offset` will be set appropriately. +pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { + var parse_result = ParseLineCommandsResult{ + .result = undefined, + .mappings = .{}, + }; + errdefer parse_result.mappings.deinit(allocator); + + var current_mapping: CurrentMapping = .{}; + defer current_mapping.filename.deinit(allocator); + + if (options.initial_filename) |initial_filename| { + try current_mapping.filename.appendSlice(allocator, initial_filename); + parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename); + } + + std.debug.assert(buf.len >= source.len); + var result = UncheckedSliceWriter{ .slice = buf }; + const State = enum { + line_start, + preprocessor, + non_preprocessor, + }; + var state: State = .line_start; + var index: usize = 0; + var pending_start: ?usize = null; + var preprocessor_start: usize = 0; + var line_number: usize = 1; + while (index < source.len) : (index += 1) { + const c = source[index]; + switch (state) { + .line_start => switch (c) { + '#' => { + preprocessor_start = index; + state = .preprocessor; + if (pending_start == null) { + pending_start = index; + } + }, + '\r', '\n' => { + const is_crlf = formsLineEndingPair(source, c, index + 1); + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + if (!current_mapping.ignore_contents) { + result.write(c); + if (is_crlf) result.write(source[index + 1]); + line_number += 1; + } + if (is_crlf) index += 1; + pending_start = null; + }, + ' ', '\t', '\x0b', '\x0c' => { + if (pending_start == null) { + pending_start = index; + } + }, + else => { + state = .non_preprocessor; + if (pending_start != null) { + if (!current_mapping.ignore_contents) { + result.writeSlice(source[pending_start.? .. index + 1]); + } + pending_start = null; + continue; + } + if (!current_mapping.ignore_contents) { + result.write(c); + } + }, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + // Now that we have the full line we can decide what to do with it + const preprocessor_str = source[preprocessor_start..index]; + const is_crlf = formsLineEndingPair(source, c, index + 1); + if (std.mem.startsWith(u8, preprocessor_str, "#line")) { + try handleLineCommand(allocator, preprocessor_str, ¤t_mapping); + } else { + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + if (!current_mapping.ignore_contents) { + const line_ending_len: usize = if (is_crlf) 2 else 1; + result.writeSlice(source[pending_start.? .. index + line_ending_len]); + line_number += 1; + } + } + if (is_crlf) index += 1; + state = .line_start; + pending_start = null; + }, + else => {}, + }, + .non_preprocessor => switch (c) { + '\r', '\n' => { + const is_crlf = formsLineEndingPair(source, c, index + 1); + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + if (!current_mapping.ignore_contents) { + result.write(c); + if (is_crlf) result.write(source[index + 1]); + line_number += 1; + } + if (is_crlf) index += 1; + state = .line_start; + pending_start = null; + }, + else => { + if (!current_mapping.ignore_contents) { + result.write(c); + } + }, + }, + } + } else { + switch (state) { + .line_start => {}, + .non_preprocessor => { + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + }, + .preprocessor => { + // Now that we have the full line we can decide what to do with it + const preprocessor_str = source[preprocessor_start..index]; + if (std.mem.startsWith(u8, preprocessor_str, "#line")) { + try handleLineCommand(allocator, preprocessor_str, ¤t_mapping); + } else { + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + if (!current_mapping.ignore_contents) { + result.writeSlice(source[pending_start.?..index]); + } + } + }, + } + } + + parse_result.result = result.getWritten(); + + // Remove whitespace from the end of the result. This avoids issues when the + // preprocessor adds a newline to the end of the file, since then the + // post-preprocessed source could have more lines than the corresponding input source and + // the inserted line can't be mapped to any lines in the original file. + // There's no way that whitespace at the end of a file can affect the parsing + // of the RC script so this is okay to do unconditionally. + // TODO: There might be a better way around this + while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) { + parse_result.result.len -= 1; + } + + // If there have been no line mappings at all, then we're dealing with an empty file. + // In this case, we want to fake a line mapping just so that we return something + // that is useable in the same way that a non-empty mapping would be. + if (parse_result.mappings.mapping.items.len == 0) { + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + } + + return parse_result; +} + +/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair +pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool { + if (next_index >= source.len) return false; + + const next_ending = source[next_index]; + if (next_ending != '\r' and next_ending != '\n') return false; + + // can't be \n\n or \r\r + if (line_ending == next_ending) return false; + + return true; +} + +pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void { + const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items); + + try mapping.set(allocator, post_processed_line_number, .{ + .start_line = current_mapping.line_num, + .end_line = current_mapping.line_num, + .filename_offset = filename_offset, + }); + + current_mapping.line_num += 1; + current_mapping.pending = false; +} + +// TODO: Might want to provide diagnostics on invalid line commands instead of just returning +pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{OutOfMemory}!void { + // TODO: Are there other whitespace characters that should be included? + var tokenizer = std.mem.tokenize(u8, line_command, " \t"); + const line_directive = tokenizer.next() orelse return; // #line + if (!std.mem.eql(u8, line_directive, "#line")) return; + const linenum_str = tokenizer.next() orelse return; + const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return; + + var filename_literal = tokenizer.rest(); + while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) { + filename_literal.len -= 1; + } + if (filename_literal.len < 2) return; + const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"'; + if (!is_quoted) return; + const filename = parseFilename(allocator, filename_literal[1 .. filename_literal.len - 1]) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return, + }; + defer allocator.free(filename); + + current_mapping.line_num = linenum; + current_mapping.filename.clearRetainingCapacity(); + try current_mapping.filename.appendSlice(allocator, filename); + current_mapping.pending = true; + current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h"); +} + +pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { + var buf = try allocator.alloc(u8, source.len); + errdefer allocator.free(buf); + var result = try parseAndRemoveLineCommands(allocator, source, buf, options); + result.result = try allocator.realloc(buf, result.result.len); + return result; +} + +/// C-style string parsing with a few caveats: +/// - The str cannot contain newlines or carriage returns +/// - Hex and octal escape are limited to u8 +/// - No handling/support for L, u, or U prefixed strings +/// - The start and end double quotes should be omitted from the `str` +/// - Other than the above, does not assume any validity of the strings (i.e. there +/// may be unescaped double quotes within the str) and will return error.InvalidString +/// on any problems found. +/// +/// The result is a UTF-8 encoded string. +fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 { + const State = enum { + string, + escape, + escape_hex, + escape_octal, + escape_u, + }; + + var filename = try std.ArrayList(u8).initCapacity(allocator, str.len); + errdefer filename.deinit(); + var state: State = .string; + var index: usize = 0; + var escape_len: usize = undefined; + var escape_val: u64 = undefined; + var escape_expected_len: u8 = undefined; + while (index < str.len) : (index += 1) { + const c = str[index]; + switch (state) { + .string => switch (c) { + '\\' => state = .escape, + '"' => return error.InvalidString, + else => filename.appendAssumeCapacity(c), + }, + .escape => switch (c) { + '\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => { + const escaped_c = switch (c) { + '\'', '"', '\\', '?' => c, + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'a' => '\x07', + 'b' => '\x08', + 'e' => '\x1b', // non-standard + 'f' => '\x0c', + 'v' => '\x0b', + else => unreachable, + }; + filename.appendAssumeCapacity(escaped_c); + state = .string; + }, + 'x' => { + escape_val = 0; + escape_len = 0; + state = .escape_hex; + }, + '0'...'7' => { + escape_val = std.fmt.charToDigit(c, 8) catch unreachable; + escape_len = 1; + state = .escape_octal; + }, + 'u' => { + escape_val = 0; + escape_len = 0; + state = .escape_u; + escape_expected_len = 4; + }, + 'U' => { + escape_val = 0; + escape_len = 0; + state = .escape_u; + escape_expected_len = 8; + }, + else => return error.InvalidString, + }, + .escape_hex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + const digit = std.fmt.charToDigit(c, 16) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString; + escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + }, + else => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + index -= 1; // reconsume + }, + }, + .escape_octal => switch (c) { + '0'...'7' => { + const digit = std.fmt.charToDigit(c, 8) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString; + escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + if (escape_len == 3) { + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + } + }, + else => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + index -= 1; // reconsume + }, + }, + .escape_u => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + const digit = std.fmt.charToDigit(c, 16) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString; + escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + if (escape_len == escape_expected_len) { + var buf: [4]u8 = undefined; + const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString; + filename.appendSliceAssumeCapacity(buf[0..utf8_len]); + state = .string; + } + }, + // Requires escape_expected_len valid hex digits + else => return error.InvalidString, + }, + } + } else { + switch (state) { + .string => {}, + .escape, .escape_u => return error.InvalidString, + .escape_hex => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + }, + .escape_octal => { + filename.appendAssumeCapacity(@intCast(escape_val)); + }, + } + } + + return filename.toOwnedSlice(); +} + +fn testParseFilename(expected: []const u8, input: []const u8) !void { + const parsed = try parseFilename(std.testing.allocator, input); + defer std.testing.allocator.free(parsed); + + return std.testing.expectEqualSlices(u8, expected, parsed); +} + +test parseFilename { + try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11"); + try testParseFilename("\xABz\x53", "\\xABz\\123"); + try testParseFilename("⚡⚡", "\\u26A1\\U000026A1"); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\"")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777")); +} + +pub const SourceMappings = struct { + /// line number -> span where the index is (line number - 1) + mapping: std.ArrayListUnmanaged(SourceSpan) = .{}, + files: StringTable = .{}, + /// The default assumes that the first filename added is the root file. + /// The value should be set to the correct offset if that assumption does not hold. + root_filename_offset: u32 = 0, + + pub const SourceSpan = struct { + start_line: usize, + end_line: usize, + filename_offset: u32, + }; + + pub fn deinit(self: *SourceMappings, allocator: Allocator) void { + self.files.deinit(allocator); + self.mapping.deinit(allocator); + } + + pub fn set(self: *SourceMappings, allocator: Allocator, line_num: usize, span: SourceSpan) !void { + var ptr = try self.expandAndGet(allocator, line_num); + ptr.* = span; + } + + pub fn has(self: *SourceMappings, line_num: usize) bool { + return self.mapping.items.len >= line_num; + } + + /// Note: `line_num` is 1-indexed + pub fn get(self: SourceMappings, line_num: usize) SourceSpan { + return self.mapping.items[line_num - 1]; + } + + pub fn getPtr(self: SourceMappings, line_num: usize) *SourceSpan { + return &self.mapping.items[line_num - 1]; + } + + /// Expands the number of lines in the mapping to include the requested + /// line number (if necessary) and returns a pointer to the value at that + /// line number. + /// + /// Note: `line_num` is 1-indexed + pub fn expandAndGet(self: *SourceMappings, allocator: Allocator, line_num: usize) !*SourceSpan { + try self.mapping.resize(allocator, line_num); + return &self.mapping.items[line_num - 1]; + } + + pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) void { + std.debug.assert(num_following_lines_to_collapse > 0); + + var span_to_collapse_into = self.getPtr(line_num); + const last_collapsed_span = self.get(line_num + num_following_lines_to_collapse); + span_to_collapse_into.end_line = last_collapsed_span.end_line; + + const after_collapsed_start = line_num + num_following_lines_to_collapse; + const new_num_lines = self.mapping.items.len - num_following_lines_to_collapse; + std.mem.copy(SourceSpan, self.mapping.items[line_num..new_num_lines], self.mapping.items[after_collapsed_start..]); + + self.mapping.items.len = new_num_lines; + } + + /// Returns true if the line is from the main/root file (i.e. not a file that has been + /// `#include`d). + pub fn isRootFile(self: *SourceMappings, line_num: usize) bool { + const line_mapping = self.get(line_num); + if (line_mapping.filename_offset == self.root_filename_offset) return true; + return false; + } +}; + +test "SourceMappings collapse" { + const allocator = std.testing.allocator; + + var mappings = SourceMappings{}; + defer mappings.deinit(allocator); + const filename_offset = try mappings.files.put(allocator, "test.rc"); + + try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = filename_offset }); + try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 3, .filename_offset = filename_offset }); + try mappings.set(allocator, 3, .{ .start_line = 4, .end_line = 4, .filename_offset = filename_offset }); + try mappings.set(allocator, 4, .{ .start_line = 5, .end_line = 5, .filename_offset = filename_offset }); + + mappings.collapse(1, 2); + + try std.testing.expectEqual(@as(usize, 2), mappings.mapping.items.len); + try std.testing.expectEqual(@as(usize, 4), mappings.mapping.items[0].end_line); + try std.testing.expectEqual(@as(usize, 5), mappings.mapping.items[1].end_line); +} + +/// Same thing as StringTable in Zig's src/Wasm.zig +pub const StringTable = struct { + data: std.ArrayListUnmanaged(u8) = .{}, + map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, + + pub fn deinit(self: *StringTable, allocator: Allocator) void { + self.data.deinit(allocator); + self.map.deinit(allocator); + } + + pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 { + const result = try self.map.getOrPutContextAdapted( + allocator, + value, + std.hash_map.StringIndexAdapter{ .bytes = &self.data }, + .{ .bytes = &self.data }, + ); + if (result.found_existing) { + return result.key_ptr.*; + } + + try self.data.ensureUnusedCapacity(allocator, value.len + 1); + const offset: u32 = @intCast(self.data.items.len); + + self.data.appendSliceAssumeCapacity(value); + self.data.appendAssumeCapacity(0); + + result.key_ptr.* = offset; + + return offset; + } + + pub fn get(self: StringTable, offset: u32) []const u8 { + std.debug.assert(offset < self.data.items.len); + return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0); + } + + pub fn getOffset(self: *StringTable, value: []const u8) ?u32 { + return self.map.getKeyAdapted( + value, + std.hash_map.StringIndexAdapter{ .bytes = &self.data }, + ); + } +}; + +const ExpectedSourceSpan = struct { + start_line: usize, + end_line: usize, + filename: []const u8, +}; + +fn testParseAndRemoveLineCommands( + expected: []const u8, + comptime expected_spans: []const ExpectedSourceSpan, + source: []const u8, + options: ParseAndRemoveLineCommandsOptions, +) !void { + var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options); + defer std.testing.allocator.free(results.result); + defer results.mappings.deinit(std.testing.allocator); + + try std.testing.expectEqualStrings(expected, results.result); + + expectEqualMappings(expected_spans, results.mappings) catch |err| { + std.debug.print("\nexpected mappings:\n", .{}); + for (expected_spans, 0..) |span, i| { + const line_num = i + 1; + std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line }); + } + std.debug.print("\nactual mappings:\n", .{}); + for (results.mappings.mapping.items, 0..) |span, i| { + const line_num = i + 1; + const filename = results.mappings.files.get(span.filename_offset); + std.debug.print("{}: {s}:{}-{}\n", .{ line_num, filename, span.start_line, span.end_line }); + } + std.debug.print("\n", .{}); + return err; + }; +} + +fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void { + try std.testing.expectEqual(expected_spans.len, mappings.mapping.items.len); + for (expected_spans, 0..) |expected_span, i| { + const line_num = i + 1; + const span = mappings.get(line_num); + const filename = mappings.files.get(span.filename_offset); + try std.testing.expectEqual(expected_span.start_line, span.start_line); + try std.testing.expectEqual(expected_span.end_line, span.end_line); + try std.testing.expectEqualStrings(expected_span.filename, filename); + } +} + +test "basic" { + try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, "#line 1 \"blah.rc\"", .{}); +} + +test "only removes line commands" { + try testParseAndRemoveLineCommands( + \\#pragma code_page(65001) + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, + \\#line 1 "blah.rc" + \\#pragma code_page(65001) + , .{}); +} + +test "whitespace and line endings" { + try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, "#line \t 1 \t \"blah.rc\"\r\n", .{}); +} + +test "example" { + try testParseAndRemoveLineCommands( + \\ + \\included RCDATA {"hello"} + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "./included.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "./included.rc" }, + }, + \\#line 1 "rcdata.rc" + \\#line 1 "" + \\#line 1 "" + \\#line 355 "" + \\#line 1 "" + \\#line 1 "" + \\#line 1 "rcdata.rc" + \\#line 1 "./header.h" + \\ + \\ + \\2 RCDATA {"blah"} + \\ + \\ + \\#line 1 "./included.rc" + \\ + \\included RCDATA {"hello"} + \\#line 7 "./header.h" + \\#line 1 "rcdata.rc" + , .{}); +} + +test "CRLF and other line endings" { + try testParseAndRemoveLineCommands( + "hello\r\n#pragma code_page(65001)\r\nworld", + &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" }, + .{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" }, + }, + "#line 1 \"crlf.rc\"\r\n#line 1 \"\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n", + .{}, + ); +} + +test "no line commands" { + try testParseAndRemoveLineCommands( + \\1 RCDATA {"blah"} + \\2 RCDATA {"blah"} + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, + }, + \\1 RCDATA {"blah"} + \\2 RCDATA {"blah"} + , .{ .initial_filename = "blah.rc" }); +} + +test "in place" { + var mut_source = "#line 1 \"blah.rc\"".*; + var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}); + defer result.mappings.deinit(std.testing.allocator); + try std.testing.expectEqualStrings("", result.result); +} diff --git a/src/resinator/utils.zig b/src/resinator/utils.zig new file mode 100644 index 000000000000..a29f068aeaf8 --- /dev/null +++ b/src/resinator/utils.zig @@ -0,0 +1,83 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +/// Like std.io.FixedBufferStream but does no bounds checking +pub const UncheckedSliceWriter = struct { + const Self = @This(); + + pos: usize = 0, + slice: []u8, + + pub fn write(self: *Self, char: u8) void { + self.slice[self.pos] = char; + self.pos += 1; + } + + pub fn writeSlice(self: *Self, slice: []const u8) void { + for (slice) |c| { + self.write(c); + } + } + + pub fn getWritten(self: Self) []u8 { + return self.slice[0..self.pos]; + } +}; + +/// Cross-platform 'std.fs.Dir.openFile' wrapper that will always return IsDir if +/// a directory is attempted to be opened. +/// TODO: Remove once https://github.com/ziglang/zig/issues/5732 is addressed. +pub fn openFileNotDir(cwd: std.fs.Dir, path: []const u8, flags: std.fs.File.OpenFlags) std.fs.File.OpenError!std.fs.File { + const file = try cwd.openFile(path, flags); + errdefer file.close(); + // https://github.com/ziglang/zig/issues/5732 + if (builtin.os.tag != .windows) { + const stat = try file.stat(); + + if (stat.kind == .directory) + return error.IsDir; + } + return file; +} + +/// Emulates the Windows implementation of `iswdigit`, but only returns true +/// for the non-ASCII digits that `iswdigit` on Windows would return true for. +pub fn isNonAsciiDigit(c: u21) bool { + return switch (c) { + '²', + '³', + '¹', + '\u{660}'...'\u{669}', + '\u{6F0}'...'\u{6F9}', + '\u{7C0}'...'\u{7C9}', + '\u{966}'...'\u{96F}', + '\u{9E6}'...'\u{9EF}', + '\u{A66}'...'\u{A6F}', + '\u{AE6}'...'\u{AEF}', + '\u{B66}'...'\u{B6F}', + '\u{BE6}'...'\u{BEF}', + '\u{C66}'...'\u{C6F}', + '\u{CE6}'...'\u{CEF}', + '\u{D66}'...'\u{D6F}', + '\u{E50}'...'\u{E59}', + '\u{ED0}'...'\u{ED9}', + '\u{F20}'...'\u{F29}', + '\u{1040}'...'\u{1049}', + '\u{1090}'...'\u{1099}', + '\u{17E0}'...'\u{17E9}', + '\u{1810}'...'\u{1819}', + '\u{1946}'...'\u{194F}', + '\u{19D0}'...'\u{19D9}', + '\u{1B50}'...'\u{1B59}', + '\u{1BB0}'...'\u{1BB9}', + '\u{1C40}'...'\u{1C49}', + '\u{1C50}'...'\u{1C59}', + '\u{A620}'...'\u{A629}', + '\u{A8D0}'...'\u{A8D9}', + '\u{A900}'...'\u{A909}', + '\u{AA50}'...'\u{AA59}', + '\u{FF10}'...'\u{FF19}', + => true, + else => false, + }; +} diff --git a/src/resinator/windows1252.zig b/src/resinator/windows1252.zig new file mode 100644 index 000000000000..81e4dfa4549e --- /dev/null +++ b/src/resinator/windows1252.zig @@ -0,0 +1,588 @@ +const std = @import("std"); + +pub fn windows1252ToUtf8Stream(writer: anytype, reader: anytype) !usize { + var bytes_written: usize = 0; + var utf8_buf: [3]u8 = undefined; + while (true) { + const c = reader.readByte() catch |err| switch (err) { + error.EndOfStream => return bytes_written, + else => |e| return e, + }; + const codepoint = toCodepoint(c); + if (codepoint <= 0x7F) { + try writer.writeByte(c); + bytes_written += 1; + } else { + const utf8_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch unreachable; + try writer.writeAll(utf8_buf[0..utf8_len]); + bytes_written += utf8_len; + } + } +} + +/// Returns the number of code units written to the writer +pub fn windows1252ToUtf16AllocZ(allocator: std.mem.Allocator, win1252_str: []const u8) ![:0]u16 { + // Guaranteed to need exactly the same number of code units as Windows-1252 bytes + var utf16_slice = try allocator.allocSentinel(u16, win1252_str.len, 0); + errdefer allocator.free(utf16_slice); + for (win1252_str, 0..) |c, i| { + utf16_slice[i] = toCodepoint(c); + } + return utf16_slice; +} + +/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt +pub fn toCodepoint(c: u8) u16 { + return switch (c) { + 0x80 => 0x20ac, // Euro Sign + 0x82 => 0x201a, // Single Low-9 Quotation Mark + 0x83 => 0x0192, // Latin Small Letter F With Hook + 0x84 => 0x201e, // Double Low-9 Quotation Mark + 0x85 => 0x2026, // Horizontal Ellipsis + 0x86 => 0x2020, // Dagger + 0x87 => 0x2021, // Double Dagger + 0x88 => 0x02c6, // Modifier Letter Circumflex Accent + 0x89 => 0x2030, // Per Mille Sign + 0x8a => 0x0160, // Latin Capital Letter S With Caron + 0x8b => 0x2039, // Single Left-Pointing Angle Quotation Mark + 0x8c => 0x0152, // Latin Capital Ligature Oe + 0x8e => 0x017d, // Latin Capital Letter Z With Caron + 0x91 => 0x2018, // Left Single Quotation Mark + 0x92 => 0x2019, // Right Single Quotation Mark + 0x93 => 0x201c, // Left Double Quotation Mark + 0x94 => 0x201d, // Right Double Quotation Mark + 0x95 => 0x2022, // Bullet + 0x96 => 0x2013, // En Dash + 0x97 => 0x2014, // Em Dash + 0x98 => 0x02dc, // Small Tilde + 0x99 => 0x2122, // Trade Mark Sign + 0x9a => 0x0161, // Latin Small Letter S With Caron + 0x9b => 0x203a, // Single Right-Pointing Angle Quotation Mark + 0x9c => 0x0153, // Latin Small Ligature Oe + 0x9e => 0x017e, // Latin Small Letter Z With Caron + 0x9f => 0x0178, // Latin Capital Letter Y With Diaeresis + else => c, + }; +} + +/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt +/// Plus some mappings found empirically by iterating all codepoints: +/// 0x2007 => 0xA0, // Figure Space +/// 0x2008 => ' ', // Punctuation Space +/// 0x2009 => ' ', // Thin Space +/// 0x200A => ' ', // Hair Space +/// 0x2012 => '-', // Figure Dash +/// 0x2015 => '-', // Horizontal Bar +/// 0x201B => '\'', // Single High-reversed-9 Quotation Mark +/// 0x201F => '"', // Double High-reversed-9 Quotation Mark +/// 0x202F => 0xA0, // Narrow No-Break Space +/// 0x2033 => '"', // Double Prime +/// 0x2036 => '"', // Reversed Double Prime +pub fn bestFitFromCodepoint(codepoint: u21) ?u8 { + return switch (codepoint) { + 0x00...0x7F, + 0x81, + 0x8D, + 0x8F, + 0x90, + 0x9D, + 0xA0...0xFF, + => @intCast(codepoint), + 0x0100 => 0x41, // Latin Capital Letter A With Macron + 0x0101 => 0x61, // Latin Small Letter A With Macron + 0x0102 => 0x41, // Latin Capital Letter A With Breve + 0x0103 => 0x61, // Latin Small Letter A With Breve + 0x0104 => 0x41, // Latin Capital Letter A With Ogonek + 0x0105 => 0x61, // Latin Small Letter A With Ogonek + 0x0106 => 0x43, // Latin Capital Letter C With Acute + 0x0107 => 0x63, // Latin Small Letter C With Acute + 0x0108 => 0x43, // Latin Capital Letter C With Circumflex + 0x0109 => 0x63, // Latin Small Letter C With Circumflex + 0x010a => 0x43, // Latin Capital Letter C With Dot Above + 0x010b => 0x63, // Latin Small Letter C With Dot Above + 0x010c => 0x43, // Latin Capital Letter C With Caron + 0x010d => 0x63, // Latin Small Letter C With Caron + 0x010e => 0x44, // Latin Capital Letter D With Caron + 0x010f => 0x64, // Latin Small Letter D With Caron + 0x0110 => 0xd0, // Latin Capital Letter D With Stroke + 0x0111 => 0x64, // Latin Small Letter D With Stroke + 0x0112 => 0x45, // Latin Capital Letter E With Macron + 0x0113 => 0x65, // Latin Small Letter E With Macron + 0x0114 => 0x45, // Latin Capital Letter E With Breve + 0x0115 => 0x65, // Latin Small Letter E With Breve + 0x0116 => 0x45, // Latin Capital Letter E With Dot Above + 0x0117 => 0x65, // Latin Small Letter E With Dot Above + 0x0118 => 0x45, // Latin Capital Letter E With Ogonek + 0x0119 => 0x65, // Latin Small Letter E With Ogonek + 0x011a => 0x45, // Latin Capital Letter E With Caron + 0x011b => 0x65, // Latin Small Letter E With Caron + 0x011c => 0x47, // Latin Capital Letter G With Circumflex + 0x011d => 0x67, // Latin Small Letter G With Circumflex + 0x011e => 0x47, // Latin Capital Letter G With Breve + 0x011f => 0x67, // Latin Small Letter G With Breve + 0x0120 => 0x47, // Latin Capital Letter G With Dot Above + 0x0121 => 0x67, // Latin Small Letter G With Dot Above + 0x0122 => 0x47, // Latin Capital Letter G With Cedilla + 0x0123 => 0x67, // Latin Small Letter G With Cedilla + 0x0124 => 0x48, // Latin Capital Letter H With Circumflex + 0x0125 => 0x68, // Latin Small Letter H With Circumflex + 0x0126 => 0x48, // Latin Capital Letter H With Stroke + 0x0127 => 0x68, // Latin Small Letter H With Stroke + 0x0128 => 0x49, // Latin Capital Letter I With Tilde + 0x0129 => 0x69, // Latin Small Letter I With Tilde + 0x012a => 0x49, // Latin Capital Letter I With Macron + 0x012b => 0x69, // Latin Small Letter I With Macron + 0x012c => 0x49, // Latin Capital Letter I With Breve + 0x012d => 0x69, // Latin Small Letter I With Breve + 0x012e => 0x49, // Latin Capital Letter I With Ogonek + 0x012f => 0x69, // Latin Small Letter I With Ogonek + 0x0130 => 0x49, // Latin Capital Letter I With Dot Above + 0x0131 => 0x69, // Latin Small Letter Dotless I + 0x0134 => 0x4a, // Latin Capital Letter J With Circumflex + 0x0135 => 0x6a, // Latin Small Letter J With Circumflex + 0x0136 => 0x4b, // Latin Capital Letter K With Cedilla + 0x0137 => 0x6b, // Latin Small Letter K With Cedilla + 0x0139 => 0x4c, // Latin Capital Letter L With Acute + 0x013a => 0x6c, // Latin Small Letter L With Acute + 0x013b => 0x4c, // Latin Capital Letter L With Cedilla + 0x013c => 0x6c, // Latin Small Letter L With Cedilla + 0x013d => 0x4c, // Latin Capital Letter L With Caron + 0x013e => 0x6c, // Latin Small Letter L With Caron + 0x0141 => 0x4c, // Latin Capital Letter L With Stroke + 0x0142 => 0x6c, // Latin Small Letter L With Stroke + 0x0143 => 0x4e, // Latin Capital Letter N With Acute + 0x0144 => 0x6e, // Latin Small Letter N With Acute + 0x0145 => 0x4e, // Latin Capital Letter N With Cedilla + 0x0146 => 0x6e, // Latin Small Letter N With Cedilla + 0x0147 => 0x4e, // Latin Capital Letter N With Caron + 0x0148 => 0x6e, // Latin Small Letter N With Caron + 0x014c => 0x4f, // Latin Capital Letter O With Macron + 0x014d => 0x6f, // Latin Small Letter O With Macron + 0x014e => 0x4f, // Latin Capital Letter O With Breve + 0x014f => 0x6f, // Latin Small Letter O With Breve + 0x0150 => 0x4f, // Latin Capital Letter O With Double Acute + 0x0151 => 0x6f, // Latin Small Letter O With Double Acute + 0x0152 => 0x8c, // Latin Capital Ligature Oe + 0x0153 => 0x9c, // Latin Small Ligature Oe + 0x0154 => 0x52, // Latin Capital Letter R With Acute + 0x0155 => 0x72, // Latin Small Letter R With Acute + 0x0156 => 0x52, // Latin Capital Letter R With Cedilla + 0x0157 => 0x72, // Latin Small Letter R With Cedilla + 0x0158 => 0x52, // Latin Capital Letter R With Caron + 0x0159 => 0x72, // Latin Small Letter R With Caron + 0x015a => 0x53, // Latin Capital Letter S With Acute + 0x015b => 0x73, // Latin Small Letter S With Acute + 0x015c => 0x53, // Latin Capital Letter S With Circumflex + 0x015d => 0x73, // Latin Small Letter S With Circumflex + 0x015e => 0x53, // Latin Capital Letter S With Cedilla + 0x015f => 0x73, // Latin Small Letter S With Cedilla + 0x0160 => 0x8a, // Latin Capital Letter S With Caron + 0x0161 => 0x9a, // Latin Small Letter S With Caron + 0x0162 => 0x54, // Latin Capital Letter T With Cedilla + 0x0163 => 0x74, // Latin Small Letter T With Cedilla + 0x0164 => 0x54, // Latin Capital Letter T With Caron + 0x0165 => 0x74, // Latin Small Letter T With Caron + 0x0166 => 0x54, // Latin Capital Letter T With Stroke + 0x0167 => 0x74, // Latin Small Letter T With Stroke + 0x0168 => 0x55, // Latin Capital Letter U With Tilde + 0x0169 => 0x75, // Latin Small Letter U With Tilde + 0x016a => 0x55, // Latin Capital Letter U With Macron + 0x016b => 0x75, // Latin Small Letter U With Macron + 0x016c => 0x55, // Latin Capital Letter U With Breve + 0x016d => 0x75, // Latin Small Letter U With Breve + 0x016e => 0x55, // Latin Capital Letter U With Ring Above + 0x016f => 0x75, // Latin Small Letter U With Ring Above + 0x0170 => 0x55, // Latin Capital Letter U With Double Acute + 0x0171 => 0x75, // Latin Small Letter U With Double Acute + 0x0172 => 0x55, // Latin Capital Letter U With Ogonek + 0x0173 => 0x75, // Latin Small Letter U With Ogonek + 0x0174 => 0x57, // Latin Capital Letter W With Circumflex + 0x0175 => 0x77, // Latin Small Letter W With Circumflex + 0x0176 => 0x59, // Latin Capital Letter Y With Circumflex + 0x0177 => 0x79, // Latin Small Letter Y With Circumflex + 0x0178 => 0x9f, // Latin Capital Letter Y With Diaeresis + 0x0179 => 0x5a, // Latin Capital Letter Z With Acute + 0x017a => 0x7a, // Latin Small Letter Z With Acute + 0x017b => 0x5a, // Latin Capital Letter Z With Dot Above + 0x017c => 0x7a, // Latin Small Letter Z With Dot Above + 0x017d => 0x8e, // Latin Capital Letter Z With Caron + 0x017e => 0x9e, // Latin Small Letter Z With Caron + 0x0180 => 0x62, // Latin Small Letter B With Stroke + 0x0189 => 0xd0, // Latin Capital Letter African D + 0x0191 => 0x83, // Latin Capital Letter F With Hook + 0x0192 => 0x83, // Latin Small Letter F With Hook + 0x0197 => 0x49, // Latin Capital Letter I With Stroke + 0x019a => 0x6c, // Latin Small Letter L With Bar + 0x019f => 0x4f, // Latin Capital Letter O With Middle Tilde + 0x01a0 => 0x4f, // Latin Capital Letter O With Horn + 0x01a1 => 0x6f, // Latin Small Letter O With Horn + 0x01ab => 0x74, // Latin Small Letter T With Palatal Hook + 0x01ae => 0x54, // Latin Capital Letter T With Retroflex Hook + 0x01af => 0x55, // Latin Capital Letter U With Horn + 0x01b0 => 0x75, // Latin Small Letter U With Horn + 0x01b6 => 0x7a, // Latin Small Letter Z With Stroke + 0x01c0 => 0x7c, // Latin Letter Dental Click + 0x01c3 => 0x21, // Latin Letter Retroflex Click + 0x01cd => 0x41, // Latin Capital Letter A With Caron + 0x01ce => 0x61, // Latin Small Letter A With Caron + 0x01cf => 0x49, // Latin Capital Letter I With Caron + 0x01d0 => 0x69, // Latin Small Letter I With Caron + 0x01d1 => 0x4f, // Latin Capital Letter O With Caron + 0x01d2 => 0x6f, // Latin Small Letter O With Caron + 0x01d3 => 0x55, // Latin Capital Letter U With Caron + 0x01d4 => 0x75, // Latin Small Letter U With Caron + 0x01d5 => 0x55, // Latin Capital Letter U With Diaeresis And Macron + 0x01d6 => 0x75, // Latin Small Letter U With Diaeresis And Macron + 0x01d7 => 0x55, // Latin Capital Letter U With Diaeresis And Acute + 0x01d8 => 0x75, // Latin Small Letter U With Diaeresis And Acute + 0x01d9 => 0x55, // Latin Capital Letter U With Diaeresis And Caron + 0x01da => 0x75, // Latin Small Letter U With Diaeresis And Caron + 0x01db => 0x55, // Latin Capital Letter U With Diaeresis And Grave + 0x01dc => 0x75, // Latin Small Letter U With Diaeresis And Grave + 0x01de => 0x41, // Latin Capital Letter A With Diaeresis And Macron + 0x01df => 0x61, // Latin Small Letter A With Diaeresis And Macron + 0x01e4 => 0x47, // Latin Capital Letter G With Stroke + 0x01e5 => 0x67, // Latin Small Letter G With Stroke + 0x01e6 => 0x47, // Latin Capital Letter G With Caron + 0x01e7 => 0x67, // Latin Small Letter G With Caron + 0x01e8 => 0x4b, // Latin Capital Letter K With Caron + 0x01e9 => 0x6b, // Latin Small Letter K With Caron + 0x01ea => 0x4f, // Latin Capital Letter O With Ogonek + 0x01eb => 0x6f, // Latin Small Letter O With Ogonek + 0x01ec => 0x4f, // Latin Capital Letter O With Ogonek And Macron + 0x01ed => 0x6f, // Latin Small Letter O With Ogonek And Macron + 0x01f0 => 0x6a, // Latin Small Letter J With Caron + 0x0261 => 0x67, // Latin Small Letter Script G + 0x02b9 => 0x27, // Modifier Letter Prime + 0x02ba => 0x22, // Modifier Letter Double Prime + 0x02bc => 0x27, // Modifier Letter Apostrophe + 0x02c4 => 0x5e, // Modifier Letter Up Arrowhead + 0x02c6 => 0x88, // Modifier Letter Circumflex Accent + 0x02c8 => 0x27, // Modifier Letter Vertical Line + 0x02c9 => 0xaf, // Modifier Letter Macron + 0x02ca => 0xb4, // Modifier Letter Acute Accent + 0x02cb => 0x60, // Modifier Letter Grave Accent + 0x02cd => 0x5f, // Modifier Letter Low Macron + 0x02da => 0xb0, // Ring Above + 0x02dc => 0x98, // Small Tilde + 0x0300 => 0x60, // Combining Grave Accent + 0x0301 => 0xb4, // Combining Acute Accent + 0x0302 => 0x5e, // Combining Circumflex Accent + 0x0303 => 0x7e, // Combining Tilde + 0x0304 => 0xaf, // Combining Macron + 0x0305 => 0xaf, // Combining Overline + 0x0308 => 0xa8, // Combining Diaeresis + 0x030a => 0xb0, // Combining Ring Above + 0x030e => 0x22, // Combining Double Vertical Line Above + 0x0327 => 0xb8, // Combining Cedilla + 0x0331 => 0x5f, // Combining Macron Below + 0x0332 => 0x5f, // Combining Low Line + 0x037e => 0x3b, // Greek Question Mark + 0x0393 => 0x47, // Greek Capital Letter Gamma + 0x0398 => 0x54, // Greek Capital Letter Theta + 0x03a3 => 0x53, // Greek Capital Letter Sigma + 0x03a6 => 0x46, // Greek Capital Letter Phi + 0x03a9 => 0x4f, // Greek Capital Letter Omega + 0x03b1 => 0x61, // Greek Small Letter Alpha + 0x03b2 => 0xdf, // Greek Small Letter Beta + 0x03b4 => 0x64, // Greek Small Letter Delta + 0x03b5 => 0x65, // Greek Small Letter Epsilon + 0x03bc => 0xb5, // Greek Small Letter Mu + 0x03c0 => 0x70, // Greek Small Letter Pi + 0x03c3 => 0x73, // Greek Small Letter Sigma + 0x03c4 => 0x74, // Greek Small Letter Tau + 0x03c6 => 0x66, // Greek Small Letter Phi + 0x04bb => 0x68, // Cyrillic Small Letter Shha + 0x0589 => 0x3a, // Armenian Full Stop + 0x066a => 0x25, // Arabic Percent Sign + 0x2000 => 0x20, // En Quad + 0x2001 => 0x20, // Em Quad + 0x2002 => 0x20, // En Space + 0x2003 => 0x20, // Em Space + 0x2004 => 0x20, // Three-Per-Em Space + 0x2005 => 0x20, // Four-Per-Em Space + 0x2006 => 0x20, // Six-Per-Em Space + 0x2010 => 0x2d, // Hyphen + 0x2011 => 0x2d, // Non-Breaking Hyphen + 0x2013 => 0x96, // En Dash + 0x2014 => 0x97, // Em Dash + 0x2017 => 0x3d, // Double Low Line + 0x2018 => 0x91, // Left Single Quotation Mark + 0x2019 => 0x92, // Right Single Quotation Mark + 0x201a => 0x82, // Single Low-9 Quotation Mark + 0x201c => 0x93, // Left Double Quotation Mark + 0x201d => 0x94, // Right Double Quotation Mark + 0x201e => 0x84, // Double Low-9 Quotation Mark + 0x2020 => 0x86, // Dagger + 0x2021 => 0x87, // Double Dagger + 0x2022 => 0x95, // Bullet + 0x2024 => 0xb7, // One Dot Leader + 0x2026 => 0x85, // Horizontal Ellipsis + 0x2030 => 0x89, // Per Mille Sign + 0x2032 => 0x27, // Prime + 0x2035 => 0x60, // Reversed Prime + 0x2039 => 0x8b, // Single Left-Pointing Angle Quotation Mark + 0x203a => 0x9b, // Single Right-Pointing Angle Quotation Mark + 0x2044 => 0x2f, // Fraction Slash + 0x2070 => 0xb0, // Superscript Zero + 0x2074 => 0x34, // Superscript Four + 0x2075 => 0x35, // Superscript Five + 0x2076 => 0x36, // Superscript Six + 0x2077 => 0x37, // Superscript Seven + 0x2078 => 0x38, // Superscript Eight + 0x207f => 0x6e, // Superscript Latin Small Letter N + 0x2080 => 0x30, // Subscript Zero + 0x2081 => 0x31, // Subscript One + 0x2082 => 0x32, // Subscript Two + 0x2083 => 0x33, // Subscript Three + 0x2084 => 0x34, // Subscript Four + 0x2085 => 0x35, // Subscript Five + 0x2086 => 0x36, // Subscript Six + 0x2087 => 0x37, // Subscript Seven + 0x2088 => 0x38, // Subscript Eight + 0x2089 => 0x39, // Subscript Nine + 0x20ac => 0x80, // Euro Sign + 0x20a1 => 0xa2, // Colon Sign + 0x20a4 => 0xa3, // Lira Sign + 0x20a7 => 0x50, // Peseta Sign + 0x2102 => 0x43, // Double-Struck Capital C + 0x2107 => 0x45, // Euler Constant + 0x210a => 0x67, // Script Small G + 0x210b => 0x48, // Script Capital H + 0x210c => 0x48, // Black-Letter Capital H + 0x210d => 0x48, // Double-Struck Capital H + 0x210e => 0x68, // Planck Constant + 0x2110 => 0x49, // Script Capital I + 0x2111 => 0x49, // Black-Letter Capital I + 0x2112 => 0x4c, // Script Capital L + 0x2113 => 0x6c, // Script Small L + 0x2115 => 0x4e, // Double-Struck Capital N + 0x2118 => 0x50, // Script Capital P + 0x2119 => 0x50, // Double-Struck Capital P + 0x211a => 0x51, // Double-Struck Capital Q + 0x211b => 0x52, // Script Capital R + 0x211c => 0x52, // Black-Letter Capital R + 0x211d => 0x52, // Double-Struck Capital R + 0x2122 => 0x99, // Trade Mark Sign + 0x2124 => 0x5a, // Double-Struck Capital Z + 0x2128 => 0x5a, // Black-Letter Capital Z + 0x212a => 0x4b, // Kelvin Sign + 0x212b => 0xc5, // Angstrom Sign + 0x212c => 0x42, // Script Capital B + 0x212d => 0x43, // Black-Letter Capital C + 0x212e => 0x65, // Estimated Symbol + 0x212f => 0x65, // Script Small E + 0x2130 => 0x45, // Script Capital E + 0x2131 => 0x46, // Script Capital F + 0x2133 => 0x4d, // Script Capital M + 0x2134 => 0x6f, // Script Small O + 0x2205 => 0xd8, // Empty Set + 0x2212 => 0x2d, // Minus Sign + 0x2213 => 0xb1, // Minus-Or-Plus Sign + 0x2215 => 0x2f, // Division Slash + 0x2216 => 0x5c, // Set Minus + 0x2217 => 0x2a, // Asterisk Operator + 0x2218 => 0xb0, // Ring Operator + 0x2219 => 0xb7, // Bullet Operator + 0x221a => 0x76, // Square Root + 0x221e => 0x38, // Infinity + 0x2223 => 0x7c, // Divides + 0x2229 => 0x6e, // Intersection + 0x2236 => 0x3a, // Ratio + 0x223c => 0x7e, // Tilde Operator + 0x2248 => 0x98, // Almost Equal To + 0x2261 => 0x3d, // Identical To + 0x2264 => 0x3d, // Less-Than Or Equal To + 0x2265 => 0x3d, // Greater-Than Or Equal To + 0x226a => 0xab, // Much Less-Than + 0x226b => 0xbb, // Much Greater-Than + 0x22c5 => 0xb7, // Dot Operator + 0x2302 => 0xa6, // House + 0x2303 => 0x5e, // Up Arrowhead + 0x2310 => 0xac, // Reversed Not Sign + 0x2320 => 0x28, // Top Half Integral + 0x2321 => 0x29, // Bottom Half Integral + 0x2329 => 0x3c, // Left-Pointing Angle Bracket + 0x232a => 0x3e, // Right-Pointing Angle Bracket + 0x2500 => 0x2d, // Box Drawings Light Horizontal + 0x2502 => 0xa6, // Box Drawings Light Vertical + 0x250c => 0x2b, // Box Drawings Light Down And Right + 0x2510 => 0x2b, // Box Drawings Light Down And Left + 0x2514 => 0x2b, // Box Drawings Light Up And Right + 0x2518 => 0x2b, // Box Drawings Light Up And Left + 0x251c => 0x2b, // Box Drawings Light Vertical And Right + 0x2524 => 0xa6, // Box Drawings Light Vertical And Left + 0x252c => 0x2d, // Box Drawings Light Down And Horizontal + 0x2534 => 0x2d, // Box Drawings Light Up And Horizontal + 0x253c => 0x2b, // Box Drawings Light Vertical And Horizontal + 0x2550 => 0x2d, // Box Drawings Double Horizontal + 0x2551 => 0xa6, // Box Drawings Double Vertical + 0x2552 => 0x2b, // Box Drawings Down Single And Right Double + 0x2553 => 0x2b, // Box Drawings Down Double And Right Single + 0x2554 => 0x2b, // Box Drawings Double Down And Right + 0x2555 => 0x2b, // Box Drawings Down Single And Left Double + 0x2556 => 0x2b, // Box Drawings Down Double And Left Single + 0x2557 => 0x2b, // Box Drawings Double Down And Left + 0x2558 => 0x2b, // Box Drawings Up Single And Right Double + 0x2559 => 0x2b, // Box Drawings Up Double And Right Single + 0x255a => 0x2b, // Box Drawings Double Up And Right + 0x255b => 0x2b, // Box Drawings Up Single And Left Double + 0x255c => 0x2b, // Box Drawings Up Double And Left Single + 0x255d => 0x2b, // Box Drawings Double Up And Left + 0x255e => 0xa6, // Box Drawings Vertical Single And Right Double + 0x255f => 0xa6, // Box Drawings Vertical Double And Right Single + 0x2560 => 0xa6, // Box Drawings Double Vertical And Right + 0x2561 => 0xa6, // Box Drawings Vertical Single And Left Double + 0x2562 => 0xa6, // Box Drawings Vertical Double And Left Single + 0x2563 => 0xa6, // Box Drawings Double Vertical And Left + 0x2564 => 0x2d, // Box Drawings Down Single And Horizontal Double + 0x2565 => 0x2d, // Box Drawings Down Double And Horizontal Single + 0x2566 => 0x2d, // Box Drawings Double Down And Horizontal + 0x2567 => 0x2d, // Box Drawings Up Single And Horizontal Double + 0x2568 => 0x2d, // Box Drawings Up Double And Horizontal Single + 0x2569 => 0x2d, // Box Drawings Double Up And Horizontal + 0x256a => 0x2b, // Box Drawings Vertical Single And Horizontal Double + 0x256b => 0x2b, // Box Drawings Vertical Double And Horizontal Single + 0x256c => 0x2b, // Box Drawings Double Vertical And Horizontal + 0x2580 => 0xaf, // Upper Half Block + 0x2584 => 0x5f, // Lower Half Block + 0x2588 => 0xa6, // Full Block + 0x258c => 0xa6, // Left Half Block + 0x2590 => 0xa6, // Right Half Block + 0x2591 => 0xa6, // Light Shade + 0x2592 => 0xa6, // Medium Shade + 0x2593 => 0xa6, // Dark Shade + 0x25a0 => 0xa6, // Black Square + 0x263c => 0xa4, // White Sun With Rays + 0x2758 => 0x7c, // Light Vertical Bar + 0x3000 => 0x20, // Ideographic Space + 0x3008 => 0x3c, // Left Angle Bracket + 0x3009 => 0x3e, // Right Angle Bracket + 0x300a => 0xab, // Left Double Angle Bracket + 0x300b => 0xbb, // Right Double Angle Bracket + 0x301a => 0x5b, // Left White Square Bracket + 0x301b => 0x5d, // Right White Square Bracket + 0x30fb => 0xb7, // Katakana Middle Dot + 0xff01 => 0x21, // Fullwidth Exclamation Mark + 0xff02 => 0x22, // Fullwidth Quotation Mark + 0xff03 => 0x23, // Fullwidth Number Sign + 0xff04 => 0x24, // Fullwidth Dollar Sign + 0xff05 => 0x25, // Fullwidth Percent Sign + 0xff06 => 0x26, // Fullwidth Ampersand + 0xff07 => 0x27, // Fullwidth Apostrophe + 0xff08 => 0x28, // Fullwidth Left Parenthesis + 0xff09 => 0x29, // Fullwidth Right Parenthesis + 0xff0a => 0x2a, // Fullwidth Asterisk + 0xff0b => 0x2b, // Fullwidth Plus Sign + 0xff0c => 0x2c, // Fullwidth Comma + 0xff0d => 0x2d, // Fullwidth Hyphen-Minus + 0xff0e => 0x2e, // Fullwidth Full Stop + 0xff0f => 0x2f, // Fullwidth Solidus + 0xff10 => 0x30, // Fullwidth Digit Zero + 0xff11 => 0x31, // Fullwidth Digit One + 0xff12 => 0x32, // Fullwidth Digit Two + 0xff13 => 0x33, // Fullwidth Digit Three + 0xff14 => 0x34, // Fullwidth Digit Four + 0xff15 => 0x35, // Fullwidth Digit Five + 0xff16 => 0x36, // Fullwidth Digit Six + 0xff17 => 0x37, // Fullwidth Digit Seven + 0xff18 => 0x38, // Fullwidth Digit Eight + 0xff19 => 0x39, // Fullwidth Digit Nine + 0xff1a => 0x3a, // Fullwidth Colon + 0xff1b => 0x3b, // Fullwidth Semicolon + 0xff1c => 0x3c, // Fullwidth Less-Than Sign + 0xff1d => 0x3d, // Fullwidth Equals Sign + 0xff1e => 0x3e, // Fullwidth Greater-Than Sign + 0xff1f => 0x3f, // Fullwidth Question Mark + 0xff20 => 0x40, // Fullwidth Commercial At + 0xff21 => 0x41, // Fullwidth Latin Capital Letter A + 0xff22 => 0x42, // Fullwidth Latin Capital Letter B + 0xff23 => 0x43, // Fullwidth Latin Capital Letter C + 0xff24 => 0x44, // Fullwidth Latin Capital Letter D + 0xff25 => 0x45, // Fullwidth Latin Capital Letter E + 0xff26 => 0x46, // Fullwidth Latin Capital Letter F + 0xff27 => 0x47, // Fullwidth Latin Capital Letter G + 0xff28 => 0x48, // Fullwidth Latin Capital Letter H + 0xff29 => 0x49, // Fullwidth Latin Capital Letter I + 0xff2a => 0x4a, // Fullwidth Latin Capital Letter J + 0xff2b => 0x4b, // Fullwidth Latin Capital Letter K + 0xff2c => 0x4c, // Fullwidth Latin Capital Letter L + 0xff2d => 0x4d, // Fullwidth Latin Capital Letter M + 0xff2e => 0x4e, // Fullwidth Latin Capital Letter N + 0xff2f => 0x4f, // Fullwidth Latin Capital Letter O + 0xff30 => 0x50, // Fullwidth Latin Capital Letter P + 0xff31 => 0x51, // Fullwidth Latin Capital Letter Q + 0xff32 => 0x52, // Fullwidth Latin Capital Letter R + 0xff33 => 0x53, // Fullwidth Latin Capital Letter S + 0xff34 => 0x54, // Fullwidth Latin Capital Letter T + 0xff35 => 0x55, // Fullwidth Latin Capital Letter U + 0xff36 => 0x56, // Fullwidth Latin Capital Letter V + 0xff37 => 0x57, // Fullwidth Latin Capital Letter W + 0xff38 => 0x58, // Fullwidth Latin Capital Letter X + 0xff39 => 0x59, // Fullwidth Latin Capital Letter Y + 0xff3a => 0x5a, // Fullwidth Latin Capital Letter Z + 0xff3b => 0x5b, // Fullwidth Left Square Bracket + 0xff3c => 0x5c, // Fullwidth Reverse Solidus + 0xff3d => 0x5d, // Fullwidth Right Square Bracket + 0xff3e => 0x5e, // Fullwidth Circumflex Accent + 0xff3f => 0x5f, // Fullwidth Low Line + 0xff40 => 0x60, // Fullwidth Grave Accent + 0xff41 => 0x61, // Fullwidth Latin Small Letter A + 0xff42 => 0x62, // Fullwidth Latin Small Letter B + 0xff43 => 0x63, // Fullwidth Latin Small Letter C + 0xff44 => 0x64, // Fullwidth Latin Small Letter D + 0xff45 => 0x65, // Fullwidth Latin Small Letter E + 0xff46 => 0x66, // Fullwidth Latin Small Letter F + 0xff47 => 0x67, // Fullwidth Latin Small Letter G + 0xff48 => 0x68, // Fullwidth Latin Small Letter H + 0xff49 => 0x69, // Fullwidth Latin Small Letter I + 0xff4a => 0x6a, // Fullwidth Latin Small Letter J + 0xff4b => 0x6b, // Fullwidth Latin Small Letter K + 0xff4c => 0x6c, // Fullwidth Latin Small Letter L + 0xff4d => 0x6d, // Fullwidth Latin Small Letter M + 0xff4e => 0x6e, // Fullwidth Latin Small Letter N + 0xff4f => 0x6f, // Fullwidth Latin Small Letter O + 0xff50 => 0x70, // Fullwidth Latin Small Letter P + 0xff51 => 0x71, // Fullwidth Latin Small Letter Q + 0xff52 => 0x72, // Fullwidth Latin Small Letter R + 0xff53 => 0x73, // Fullwidth Latin Small Letter S + 0xff54 => 0x74, // Fullwidth Latin Small Letter T + 0xff55 => 0x75, // Fullwidth Latin Small Letter U + 0xff56 => 0x76, // Fullwidth Latin Small Letter V + 0xff57 => 0x77, // Fullwidth Latin Small Letter W + 0xff58 => 0x78, // Fullwidth Latin Small Letter X + 0xff59 => 0x79, // Fullwidth Latin Small Letter Y + 0xff5a => 0x7a, // Fullwidth Latin Small Letter Z + 0xff5b => 0x7b, // Fullwidth Left Curly Bracket + 0xff5c => 0x7c, // Fullwidth Vertical Line + 0xff5d => 0x7d, // Fullwidth Right Curly Bracket + 0xff5e => 0x7e, // Fullwidth Tilde + // Not in the best fit mapping, but RC uses these mappings too + 0x2007 => 0xA0, // Figure Space + 0x2008 => ' ', // Punctuation Space + 0x2009 => ' ', // Thin Space + 0x200A => ' ', // Hair Space + 0x2012 => '-', // Figure Dash + 0x2015 => '-', // Horizontal Bar + 0x201B => '\'', // Single High-reversed-9 Quotation Mark + 0x201F => '"', // Double High-reversed-9 Quotation Mark + 0x202F => 0xA0, // Narrow No-Break Space + 0x2033 => '"', // Double Prime + 0x2036 => '"', // Reversed Double Prime + else => null, + }; +} + +test "windows-1252 to utf8" { + var buf = std.ArrayList(u8).init(std.testing.allocator); + defer buf.deinit(); + + const input_windows1252 = "\x81pqrstuvwxyz{|}~\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8e\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9e\x9f\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + const expected_utf8 = "\xc2\x81pqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"; + + var fbs = std.io.fixedBufferStream(input_windows1252); + const bytes_written = try windows1252ToUtf8Stream(buf.writer(), fbs.reader()); + + try std.testing.expectEqualStrings(expected_utf8, buf.items); + try std.testing.expectEqual(expected_utf8.len, bytes_written); +} From 28f6559947b7acd2ef3c983c90e1215a0d75ae15 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Tue, 5 Sep 2023 23:00:28 -0700 Subject: [PATCH 04/11] Add the ATLMFC include directory to the libc include dir list https://learn.microsoft.com/en-us/cpp/mfc/mfc-and-atl Note that this include directory gets added to %INCLUDE% by vcvarsall.bat, and is especially crucial when working with resource files (many .rc files within the https://github.com/microsoft/Windows-classic-samples/ set reference files from the ATLMFC include directory). --- src/Compilation.zig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Compilation.zig b/src/Compilation.zig index 2135ab824580..fdc154e0ebbe 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -5636,6 +5636,13 @@ fn detectLibCFromLibCInstallation(arena: Allocator, target: Target, lci: *const if (!is_redundant) list.appendAssumeCapacity(lci.sys_include_dir.?); if (target.os.tag == .windows) { + if (std.fs.path.dirname(lci.sys_include_dir.?)) |sys_include_dir_parent| { + // This include path will only exist when the optional "Desktop development with C++" + // is installed. It contains headers, .rc files, and resources. It is especially + // necessary when working with Windows resources. + const atlmfc_dir = try std.fs.path.join(arena, &[_][]const u8{ sys_include_dir_parent, "atlmfc", "include" }); + list.appendAssumeCapacity(atlmfc_dir); + } if (std.fs.path.dirname(lci.include_dir.?)) |include_dir_parent| { const um_dir = try std.fs.path.join(arena, &[_][]const u8{ include_dir_parent, "um" }); list.appendAssumeCapacity(um_dir); From 01fc6a05ef4b4de3d351ebbc64f35eef7f3afa7a Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Tue, 5 Sep 2023 23:33:38 -0700 Subject: [PATCH 05/11] Add a standalone test for Windows resource file compilation --- test/standalone.zig | 4 ++ test/standalone/windows_resources/build.zig | 33 +++++++++++++++ test/standalone/windows_resources/main.zig | 5 +++ .../windows_resources/res/hello.bin | 1 + .../windows_resources/res/sub/sub.rc | 1 + test/standalone/windows_resources/res/zig.ico | Bin 0 -> 179271 bytes test/standalone/windows_resources/res/zig.rc | 40 ++++++++++++++++++ 7 files changed, 84 insertions(+) create mode 100644 test/standalone/windows_resources/build.zig create mode 100644 test/standalone/windows_resources/main.zig create mode 100644 test/standalone/windows_resources/res/hello.bin create mode 100644 test/standalone/windows_resources/res/sub/sub.rc create mode 100644 test/standalone/windows_resources/res/zig.ico create mode 100644 test/standalone/windows_resources/res/zig.rc diff --git a/test/standalone.zig b/test/standalone.zig index 22b9dfba49be..87022f8bfcce 100644 --- a/test/standalone.zig +++ b/test/standalone.zig @@ -194,6 +194,10 @@ pub const build_cases = [_]BuildCase{ .build_root = "test/standalone/load_dynamic_library", .import = @import("standalone/load_dynamic_library/build.zig"), }, + .{ + .build_root = "test/standalone/windows_resources", + .import = @import("standalone/windows_resources/build.zig"), + }, .{ .build_root = "test/standalone/windows_spawn", .import = @import("standalone/windows_spawn/build.zig"), diff --git a/test/standalone/windows_resources/build.zig b/test/standalone/windows_resources/build.zig new file mode 100644 index 000000000000..4c2854ffc951 --- /dev/null +++ b/test/standalone/windows_resources/build.zig @@ -0,0 +1,33 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const test_step = b.step("test", "Test it"); + b.default_step = test_step; + + const native_target: std.zig.CrossTarget = .{}; + const cross_target = .{ + .cpu_arch = .x86_64, + .os_tag = .windows, + .abi = .gnu, + }; + + add(b, native_target, test_step); + add(b, cross_target, test_step); +} + +fn add(b: *std.Build, target: std.zig.CrossTarget, test_step: *std.Build.Step) void { + const exe = b.addExecutable(.{ + .name = "zig_resource_test", + .root_source_file = .{ .path = "main.zig" }, + .target = target, + .optimize = .Debug, + }); + exe.addWin32ResourceFile(.{ + .file = .{ .path = "res/zig.rc" }, + .flags = &.{"/c65001"}, // UTF-8 code page + }); + + _ = exe.getEmittedBin(); + + test_step.dependOn(&exe.step); +} diff --git a/test/standalone/windows_resources/main.zig b/test/standalone/windows_resources/main.zig new file mode 100644 index 000000000000..f92e18124bb8 --- /dev/null +++ b/test/standalone/windows_resources/main.zig @@ -0,0 +1,5 @@ +const std = @import("std"); + +pub fn main() !void { + std.debug.print("All your {s} are belong to us.\n", .{"codebase"}); +} diff --git a/test/standalone/windows_resources/res/hello.bin b/test/standalone/windows_resources/res/hello.bin new file mode 100644 index 000000000000..dda6eb4b7b86 --- /dev/null +++ b/test/standalone/windows_resources/res/hello.bin @@ -0,0 +1 @@ +abcdefg \ No newline at end of file diff --git a/test/standalone/windows_resources/res/sub/sub.rc b/test/standalone/windows_resources/res/sub/sub.rc new file mode 100644 index 000000000000..b15ce306043b --- /dev/null +++ b/test/standalone/windows_resources/res/sub/sub.rc @@ -0,0 +1 @@ +2 RCDATA hello.bin diff --git a/test/standalone/windows_resources/res/zig.ico b/test/standalone/windows_resources/res/zig.ico new file mode 100644 index 0000000000000000000000000000000000000000..64610cc33226e0715e4f34ade3509e4083d443ba GIT binary patch literal 179271 zcmeEP2VfLM7v3Z!gc^F<6I>u zpeUgz7MdUmC=!%j1BBcq0rG#}>}}Ybmt4tRNJ19A?9ATm%$xUSr@WbYBS~&j1*uv! z33t8pd^t&4BuSE=pX{GiL6UCZnO-mZ6A|w1s*=>Ag~?w}lA7xzscTo+->|zR{oX^8 zdi65B-yiS4ElER%n!$sCV7cP^nZRg(am)nN^wWI;p!^m9CxK%C^RDE9VfdW+ zS__!JPyb=yabTC}cRn)*%i5zrD!{Vxh6>Z*?gmiS>gTV+KOZ;%Fuqve5nwz(e%K1~ zW-_oB7#kFmVaq2f;>LHXPNSZ9Ki1IPEyt!PEC$L6lSDm(s!l?6Ch8v%5Otn~dZr=L zzpeF*h_`@0+8Co7;$``u>|Ox^0Fh43l{vWE#;g32FvaDCdpJND?g3r^ zd;v9Y9d$QAJbDwp$DL{L+xZ1}4rm0_2N+HbKC1v!1;}IdZ;Cqv&rbvY0FMEzla2#S zgJAu01~_E_e2?E7(`Vg$7Ep)d@9OU*6@Hfg;eZiP(kSSjfp9~ChXM8Xs=UK*S#k5e z4Pf3DnLSsBF?XLt*zUk2K&g|IFsvsOH}Cwu9s>LU=2`t)7W^r|3SgBApTo_#YXNTq zM*!tL^Z3VVqDB2EAxsBA{re60-v_(_4>Oe0#%1vx;zJ<6_+K`KQNlh8zd;pV@Y}3T zP{Jzr4^-jR^f68I_`UElaQvA4Eji2A5D!d=un*b%xyl)05BN5Il zyiM_I<1VSwxC?QSZgm=q;V;iLOl_JH-lq6Tld4VSyH5k;zmkR#?vYy5PjdX3cE@j) zZ%@Sco+^H(a{}lBAntTq;(v)@17mW_sPc|-uZiC`07@Qs{}Ipv4C4-{%Y%9O`3}FM zzhbP0`*>h3Fk6Lp;Qj(I|4t+DjP)GLUn-!aZyrCN^Ly_Ki~;yfDPaq`XPd&a+**sD z@vt7M3)oW+v7YS#3>LQkzmcT-Q<(28M-z4H>8F%Cgjb+=f zrYpk@{yPBjmG%6CLFn@q_R=iP{cc5iy0}cH!}^3})z2*l9j$4$bhf%;m_|sF7f{{` zVL8_%iBWUBrw6(M90!UE13(M?Fh9`4e#@P@DEa$F-$@DrLXvHvPmbErzos_lL8Q+z zIuod;iN7j%rhy!s)gR1{KBOn<;zb#l1~==4zkn%#UK=Bq6K!;cq_weconJcnJx4l9 zd1O7d3{bwO-{GDJXaFUQ^?RHwJ3HQC`{2p31Mc+!egg^(ywlG*GXN+D$l)#D%LPVU z5oO0pI3*6o_a*QZFbAjzux?St(Fz@u3+Z53YZ$DJ&h$VyerJp~zwqnu+sA;C#&x(S z>*RXpHMrRaR(`YRejM+00-68^ti-{xrjGj}{Oo6v4+&Pds_y(|+{(A@`sw>vLbORy44nX}b z-B*Dpwb1P_tQK{Tr3+yg2m2Ho0d*Xf@}HZ&HXeP>Sh+30dp!a6mDKOkeFSI=2wF*t z`DOae?(`WC+d}58LDV(IKMt@|mcPSuKT|s9$qTyhn?va{GDQw(D_Ji@0_HrRpZzf} z0O{rBf&9$z^htv$4&Llbqx40!gOHcqr7QV{OSh395~Ri ze~EEc0Kx(D^y&Z1^t+Na@3yAD0AWpKF1PMb$D?)!BJ4K6oCba)FQZ?TTXx~Mwf>1h zSRG{Tj&jUAvQN^$bf;U5y6`^>Tn5Z(IE`mL0Mg6%2LiU#zZnSkH*mjcJh}|-v%sl* zz;rCN52S~6rXIKhI0Tr}XsP~F!t6!(P=Iw?s)cuzaMtc@hgok70@BUmDJXr$n*^}^ zc^tR`SWC;&?`)@Dg}${M?sLrIu$DgC>&Y08u?>9_FmJytrDy3G`;&cu>cDzH2}7A2 zE&wbCKLf1+_GMT$j{w%*vBax=c&{GN0{97_OxYd>Sb+w=Gi{dR$^gH)g5Kfp?Dw(U zntvB7@XQCO2{2q8fbqHl-iViNHp@^NK%P^6VF7X1I2IOZvQBg66%w6=cEZ#K$!W0O zV|$SRTmtwF3<%=b4sD97F8yc!p~DO5b_Mu7ZU+dr}PEHzdO(ab{1?P%}0=asM z(EsWw&0CipgkVwj6M?cB*x3^${!<}WMAHZ+50`8vz3cdN=+4d`O*m~zXae$8j zRWrr|tT(8SA`Q02!9Lg*fH__J@vIp@8F0M#JzyTj8UE{tH%u?@uU*Y{9X|K49jxU9bg_f zF4+Q@$KfnL>I!28%Bc-;+h zr~&sHK>fQVcR}=%&WXTi@V*JZ12O+6>$X|8EXAihGvaraw-JrqR$FL;mj za&A|I{Zvz3=a1*#0r}C+=PZA32gPQhJhQ)dGoIWCtvK*LdDk9r3jI*B zys1Dzd0-p)IOkgfL4Td(y!vY^u{r?hvBvh?-%a72PCpY+>AQWA&PKpm9v0wx zW`6OmZRN<7^h=v^$Iav^>xjRBg7Uy}_B7|cSP$3z=teG|^K&4)OUjRu9_hfmIM#oO zbe=$Z`PCCjzETmUJ>(z-Vl9WY%hmKVebR4A#}oH?fI@42_Y{Poo>jb8P##QSTvGnk z=_<6KZmkXXS3rKWE6*2W4WXdW-p<>fz5nm6?!wUGXofSO)Q?g+@Nl1yvfcHvggPu-$e z^DXD&4$yGWN*=QC`zxAwwkMDu-cznZ($D_0=!=`pn+RE(;b_T!q zV!IHu_@elhRPnQI%&%QZDInh8@EhL^2L0~1lRrMVH?|YhZ6yE-t@+(|Aq>YXOowgB zLjcDObj)8Rm@*vhloyT4MyR+OJ1HZ0|F}P8W3HJ?c==s_T0p$E&{lTip45o5$-6Kg<0L zvEc3rAg4$+Yh3ariF-&Y;;m4%SMZFz2f}ABXUJOH;qF5!|dhNH5bP?^u@YhM-;0 zZ~wbD(_??nsD*z#uouWL9~@KE2L3~*($^^JJNr2TpeazMD+u(B1AYZq_SG_=dmiRI zJ%iB4z14GuYF=^fR>yu1^FZBFjHnx4hM)Zjbw1MJe~Nu{G1f@LhK^!%i9Hd}Z;56+xhB`)BWn|x0}V)Szv7w@cj zcqco#POB>r2IQ{O!UOaDWTv?ze*44m{rCl$)&R2%2BLhH_KWkd_ztOOSOBB|Pr#4< zrFbs+M_&=&YiZ?!PM$AO+Bf#wIKKTB7zjT2uuW!ru8+KqKAB~GUP{40F8jAzLhFZ9UO(PO_E+dfpfrGgE} zpVFTD!=2yU9rYlwKo{({GtJv&in5@KN-yzz(8lMk{ksc!{2Nf@Z+@qaZz<40p39f} zbNXdhP(S3=SC=HFXreKXDcXKf7KS2!R|?<(d2$+9g8H!y>ICTaa_5UmM7BY@DD&ob z+VjBpkL7zlU@m)0{u2nd5czM3v7kw>Z8>i9w5vmY)5Cu#17$0d>5NxTheFUiA|Wi2Mgda}AiWKb}o>LfnkwA;3}| z)}YT*6TgYR5AK|=_%oldmfm;fvm;2iJ@r(2tg}J>(GKUk$J*X>lk&+E^#ZeXu^C?c z4gwQJn?D54*aq60hBLx%MxCT(KlC$<7(v5mfc>+=#vNZk9IW@SuEX>#$$LNM;@OP? z-YG0S1-%~*9&nt({IidOx~9;y6qJ9`k_vQYdw@Gp2lx^wh)!4i#(9zn{;{UIHBRi` zrv9@q=~>G^w>0nKXuj-u?>EqL2OzfzvYiI@#q5bg32STqvk?F582@taQVL2G`={Q9ThX5? z@fFn_-=L9tLySMf9MUAXNu#~#+miolh?jNVd0+tB1Kf#Pz*@lGG@TKi^ej-uVVHBN z41a_JwAhk=HNC$h%{H{PgtYh#q8#QshpK+h6>iAe_(DK@rrg(z5%c)p+sOl4^3QU` z^#|++&qLp^Pe=mxt5Jt7uSa+k&o2V@4=~L;b@@=zVeU%OAsp;W5{_3Xp&7&@qpR@w++CY!3^0%0O5~+=+Jw{O{Hbr`9 zo2>F&;Q{N_>6*AK5AiKQj?TIeZ;|D{RyfKX=1EQ8-@{0o?Szs?YxfHXI{@Q4@xHV6 zy%uHuDKmZt?2iP-iv9t|m6WZuyeq%|fv|UIApgLGY^M}~qRu~w<~Yez2C5mZQ-fcPuwXzg}04&WB# zA7vPNbe-Hk>IWL^sS7S6{1dPRh&@H=Xk#6(4{ufeDJV^xZ2t`TV884Qfb(hA4eH9dW{z)o!?L$-5Hfedrv@GyjU+ZH2eC+>1m0HOa8cjWxNZ@nT=lbjA)IaQ^XG zeNwhq6BZbgWlOem!xV@7lNk2vxE7{&deIheye#xboHta;xGncB2(K0S0UvF>ic|i%RzRgc<(Q9SKU?yyg!u&P4=XBt zY+HCxobpe4xh_)r__(OgL0d0?bB0R(tlbR=`(mBs^LXDN`A1rB6A}KF*Z<~nYLIvx zG-XJBahKdc-*|xfY-@Q}em?;o_TZj0=|$^n4T@v_<#HhF4)Hxy0gX|1SpEjX5=bthY`}R-<0uyhrE_fL*0=CGXVc3uA%Qi&=+wGQg zCivai_1{hXC+Z;5Po1kNokHEQ{;)ja`2?U&-&P*%LD+_(48)_4NF6AEXKFZ0|H;1S zV*vGgp%~w~tMbqJ&^v(Ug#hF22mB4#ntz5%#y%hFT99YazY2hRIx8VTGhYr&z}vpU!#uDS$QI zTVe9gJgf(NS^tRoAl7gn{Pv7(oXH2)zk^jagxEGD0|li~*!+|4a{}XKy>)QHRrY7b z0rvD~3Zl>6-`GC2mgi$26Rcx|zo0T%*!+_|!FSrh5_OPlC+!nFx0iNjh2Ma*eF4*Y zQE|>meszH}@=v-CV@%RUu_p}vYw+w_z*%{+Cmzv0A?@-UAJhrYq8@%4z&RH4coOi8 zWu~y8-u@%#y(`OeA4304J^l{>>&e2#|0m+O2$<8KpP%j1v%zsZ0|h$BII%~YvR2bV znz>ejZI?4p3;Fk;?w>l(x}S;l2&{vg5jXqU_W;x%=2w=@d2tkZ@5s79kNy#K>6I*B8hnTmc6`I(&#&JB z{i$(u(Go3`7DhW)@Mt>G1t)Vk>}vG@!XeY;2OS#zz(2~9`i`0HUDav3cI+XJfI&W_UECV zj5CZxJ5WQk1;SteHn4V)9g?uq0Kc%)kdqT8TNKDiF$G991xPamNV{q2L7lAY+%Ue- zB?t3~I{Y`?eZ)Nsl<|3rR9@UuK_UIAppbW1Nf39q#U0iXrtvxMU3k9%@Q(R4dyi%p|;top@;(jAnin2Kl1Q5@2jHY{z zuoKY>alZ%T+KG7Rzjh+<&2at|I(OLkz&xBbf#^~5;2W~Sabyy7h8!Sh$hTgy_K!>je*xq_-*qTg@!RKU^X>uGfvQoAqXx8*=D*aN8`^A+|$P)C*Ks};`I3yH#*Q*PIKBLC*&Z8eYR<{UTm&Cp|J zQhz1ZQIzJoiS~h59)e@V`F3NFXItil&LbT3n;D(4mfnl})nI%d1eD~9^0owF50G_H zoY>pH6k*KEO@98f2pf+2se>juo%#-o@fYej>dBB5IpS;-w5#m9IX{h?bhiVo=4hg$ z4#Tvz2j2?ySKER7cw*@}`v{8x&L2=GWS)lO{y3vHFftwEYRq-W{Hf{mG-P^%5ii?q zHH`3Mj>CX;q3Ab9Vx6%pALf&~9!tC^=o#Zv$6r`C`yHF{{pv*>N!?rw?W7yG;5qxW zg^gc%pK*Nvz69`yb9F*@hutHn5S7@l#>nIrL8ddC`n zSjMITHVtl+Yr|H+QhY`IjBUX~Y-@2R*uR(qSeo;8gf?sZVVsgZe^1>so=U`txA4#RQtNZ5;|{sH4 z`_^J?6fM>Q*28oD)&}?iu*MtyZV!L%LwJ5m0vwx>Kf$pkG}xmH)#uBlrY%{KQ%C3wmU@KinA}of6Ms8 zyqNSLq`@Ef8c_0X>zz8thcz6-fIsN(Ih93-`4T~JvoF8m>lGJnKek1y!@3Q)?1t@kv%Gf3!}6SAZx zPFNRFocKc?X}ES%t`l41eh9FYH%d6xU)X;h%7s zd>Ra}OelG`^}Yf9E4GJ1|E0w3$enQjx1|4*AHA$!*`DcRxd%|3k$fF@TY1B9th?sR z;{}d6E%#CLOJDsQ@f17$kUm`$=VLJziW7Ts*4oJ*@`(NMXX>rW68%s|Y*Uer;>sV| ztg5|%V`t;yQ(d$$L-+BCi)6;L&&B z4|Kd78>dSFSW98g#rJ?>RsX2zr_PG?k$hf-IP2I}J&d}nEkYe6+kHy1uP!R?{1N2{ z^{a`_TDY$U)OokXO&!(-1N#A6_@WN;FUDux zSQoWflq2~@+xDHA(k=GX1$oY^!0bW>-m&c7}0AE1*9W}d05v*jvD{0TPM zr^eb&t`V9JI8qlyz{W^r+HvE&ptY+c@rQX~KSmy-3p?g3K))^JMjgh8cgFcAWw=ql z8(?K`SxMs$&$ko$VaOhRBhCr$b$~~lBkj#^%olp_g4Xw!IR2mv=_689;~nHBx7HV=!=SS$+D@2E9Q6ZznO*+d!jI> z!+Ha{Jxy%x*@|i!3gd2owD8T?%_DxB59#o&`tnRQ@&~pn@T?)gxj~NoTrodLIc)?! z1HyrOsk89MK2@FIjS+n{&gqJCC6_wk&NLjUt4Sx`Gd9H?a)Vv>7|a*N8eRi9H+~hM zF2L5lgst;m>T*o?4ghO6QGU3WH!f2H|2KenxLf641zKu~^<>}^=6S2bpX>mAj`D~3 zj?}~(D{~D8%?nD|pEAF+&%36*{@x^#P6WY&T#}dWGSS5owLZz8&m&Cx3)) zhwU@hOxi<(tHP#%jzK(!oAZpOweGA#&)7jl2X&rsvyabj{Z)Xxb5*|W ziT42LXid8(v?q@>rdSJXm<-t3e<&<}NGsQh$m2xt2|AuC@UxD#Cm*g1`xRv9XIi&O zeTY5%2WRkydHWRUSK=5*KGVpc$FnWPgh$lxzl%9t&Q*$Xjyv1?U4S*YIEz09r1gS( zB6L>R8)f?5I^o&hHqc;=MrZtf8FUQQ#-5esC$HhY56`UCMb6?6^YAawRjk{joRWpC zp29QsJDicn!p3nJbhH!Pdz=yN82NdHzqE3Y4gK0a31Uz~|&w)V#g z%L8lge+ybPTtC4wjU=4OeX?$P*v7LqJI{XQ3KDG6+lCNw7Kf}N9c=G z06kpGMH<;J+LsS~921asXJI+wibnatSOuIY6n?opX<)Jt^#4g>H~G9jh(B=$v@X=#=~HqHI2f zJa9dsJ$yo4iTz^GH|mqFp$y6Pjx6!QT+dJv{94#d@t1Y`XfG*;O!jFW1-=3}2DRne z+=g&^K-2>zA>a1W$g?o~VtCGsX?<~A2J72LfG+`C_!N%*BJ}(GhMc?B(cZkcI=on; zF2)gTo4}_S!^7aw=K%XY`IRRl_%hZfF$em6t_QU~2jc2{xH5k7NSxcm{Y`S)P#L^o z-Mgj$KApsGy{O+8V-VP2E6rEv!W=QrqfZw41W_h%7I$Un5jl3JUdU2^bqDxaAHQKO z4(6+!u$HeR@R;NF2C!8r=GfrIHw1eshQ8nf`x(@KsK2{-^x4Fj&g|dbP8aj*_laz? znPmiJr4sJFfJERTKt56DF+y?Dg(K%0i_GqZ0`yKB(xRRmA|2gC#96W0Me zNL`U{63z>A;@b3*R37y>8z&gL2BG)lI83&K+{47D@9+&a*Q4)v&s%3oQns8kQ9KVb+00~^lw1pobemqlbH#op z!=x!M3%AKcBi#6+r->BcE|Hf{7!6OR3fg^AXd%?(#x2(kdz{mh=V~`{-$i*2+nhSZ zP5YcGw~yimC(Gx$nXoA>m@ak8b*CU<k{RToB-LvO|~m;Sb_D)b;H0RPtm;pp>r42sP3f}XF;9;_>Pu8tFZnab;+3yTwLDLBSI3!DNLL!a0_ zP(Dvj3QowBYQ$ckoYZfZxt+iPZJcat3v0c+!1t+uG7m{xH}33jaKyr9LSW>t2o0==sNQoAI`DIh}uOZI{3nFU}Bv9GZaF zJI!Q3d2p`mZRpsVRgbym#O_mMyM3jYgjkT%=9P11#n{gc$sowgtY z&U+t#4Bq7YPQCc+?i!5wYRumt@oe(4t8g6Po-yi3an=Uc*G~eVE6!6FDfQBIJpTiD z9s1*thO5N+SVG@k+I8-QkSs27Dj zxl1n!eIM%jKHw|oW0W#q(4Fm?GgvuQ|=|w#AUPZ z{|aEwTt4Nn85jzEt*g!m(lQ#tSeb$QPTw+FVymJDuRMu^I#&r>^PzOB-{#R!GW{n1?{?7bR z<|f-f_%5-po@K#u{+*h(WnOnf8kW!<&s)G=eL02yL9lJ$7k7zk)yD$%v;~&r!g=zQ z0Cj#{0ZTME`&oDNt()l*uX&LF!XBE;e|PY|8EEK^yR*~GFFx{D18^K=2^{A_-;mLP z`nI~!upx*(Df%=7*F!}BmiTJ#Gp1uHeOEjqZ5T6Vv=w=ZLVm#ifTdT!f8&ezjdf&E z(L-8Z01D<;4D9X27_@=-Mq949F6ow{KM8#U`jnw~_j{}FTZ{kJ{JtJ}3SyqBEIlvI zmDI*&SHQS^0rFFnYpyBtg846(hsC;>^h#_m*>^-gMhe6_2&{*=2k%oyZ7q)C_B+3= z4|UOUo<#qG^L;Vmocq;9m>*mCPu_8Fc{pR};gb062Va<7Iq0ekb|2CCxuCjiS-QPAwn zG>(JjKFm`9*FW)`1>VIOWrgbF*-E25{AZlMfYxTL|NYSphb%%}6iIq{4otDx7iMeT zm2f{JPfeL8KlUxaf7l!C@@1Yl7T~&4J=TH1zX)*UckKxOspp&>5OuYD0Qk=Ha(P~PNVI4RM&UPG z*5|v5kB-L6JiX)3Jc0l9<4k9S#Tj&X{te)0yw=`xg#V2568Z|mYb?b2{b<-)0ROo* z6yGk_wSNQ@WgpI39&P2@SpkH zhPJl>_h)FZK1lZ|))BESh!ba{{OtmI9pS0H?`%Py>WMuk$d~x8qlFFFDaa3X>h{K0 zPpj_{xQ5z4>-aE~^}UQ=Pu#*1sc*)J7kN1Uf5&Jiq$Z2`;q>&TNQ^HeqI4Eh-u zo5f^M=YntEX)5pw*aPeYe!@4p0DEZsxsOJ|-mEsC9>Jcp zD;!I90@zkNgJ1ma4gyNrOy3ny;{OzLryA~eVINPAaRJU~X5IQSu*;M-@2P+z`E!*2 zUm(5mYQR>M7kCd{ioBP!Hs;z{57kY&f<5PkVF*i^IYOiQ9n^Kk34zf(uP$%;uJEI* zmk00N$$wtlH%|mb^X$&t^yz*QP}6Cv+fn|nL3%9PH-Y&?@?POT>1E%+q$A>3@;yM^ zZre(uc{rA}9swwe+-H=B1~olSa+CK8|CuLw-c(s1m7D(Kpu?O_Tl|IMKk2b{k^k&Z zW6n1aG;;pe7P_p3`2qEvu&e1GdFhs}JvGU{*z(^mB2)5u4|_Zkg}sgTp!blIWDtpY zQ6J79`$b%`Rt#MBdvWJKGhQnk>#A{XfY)qc&*KTue9;LqU>|W(Kmyh#nbrunjL8K= zToU+C@`YX=(*{E`j87%~Cpoa&%$1Hscabe z8L%#U2WutEWA8Z5=14DyeTDuFX(_Z9R5#}Lo6GeN)-o7CeHQgv(V5ie%>dYz=0~rs z&(jdclUy5MuD`KHkG;Pmof6G|=9hC!G7NQ)pVdy_KjpL^akk-@gLX7sb(IGGlQ;fi zzDCUTwLqTsI!OlWQU0O7M>~)8dbF|U?5ni!zsXYA_QLpS&~~wpcmVQs-U%|GzI%Q^ z^i^-J6$?zlK1?THrG@`wiyr)Eza9DnFXSr>a7J6ed7t6+Kcu}x#_im?s5J4PGGKmq z-X_k`sSWpXz}7ampm2XeKhcrR{f7I2&9)3(lGvtqRu?6wg z*5AIxIx)SN4?>^R-ZNq?@t)60BmZd!1h%oI8u2HYzlVS`PLKiD%`AegkV@j*6(`w7 zK%AtJV;5V?fjZnR@Bimlu4LQmSo0BUEQh>J0i4kma6S1;0Wmk+F^8UC(sn&Ze5I2A zWCGVdiT-ZuJI9m3V_h?h+KF&o1u+Qs2pXhesKV=}Hil7Yy?9Xb1eE;GE{!F!vN3-TTd{E&eUNwViB-ySVeau*lTj1>BAh*jc{4lgMBPsO=O>_p z%?9=v1LJuHYJv6v?U4uDlQ;CS*URDTH>@?ceI3#5#(&Cy{1^F-6LvtN;C4n^K)cHW zf?~L4o^zMB$>4V6KV=Z`Ir^ov$r3Bhzxfe&d&+x$VRvKh{toigKT@7QW?BVZw=4f? z%daWUg6EnmtaJY#_;AcgGFT<{Vq;yeSW5@{Vs;agzeOXTxcJTFLow*Ka*{@FtO##qo)4g0{PkYyMn zVLvwPhAQ?5C@%}|6#HjfqaC0_p1S_N6XUCaF^CzGI z`7idRVBe7reIY!16yO-AFub@`@BR(CJJT*ul{c}k7I`B~{fqAlktb)Df59J&VU|Ld zSS0{DW7r82?Xf0a?3bMaT4}f7R`Ibg>3)s1T-qR%B`+uf>n2>0YDWosv>baJcQ8*b z;Xi1}0A9hFw;p0WKF$-PZ5r5=;(SL0;0%31VR-4tyQFy$Z0qw(TI_{q`N#U+L}8DX zV-H8#0$1^$`TGaxfpt9SL*u;l#FN72FWQGtJa<+<-4Q+(=ACr(Cq^}$n&YO0?Qmfy z2>nx(F}4Lu0ej?XUy%+M7RDgii(Cn7`*< zGmMXy%HKA|Ivi`-TdrKqf9C%aO>AZ*>J!CU2K1$alVI-`eF}hm3s>;S)p2e{9TdVo zX>Hh?(j;PB2D|zwS3QxRV|L`JDEN;xH^vwJRv6r2*AaE5Np~D=pbd>#0LR9z=9eqt z{|x;0LD`aY(3Nt1bwKh}mMe}6*#6m)W>N5;bQ}e`^E{afKM9*O9LLk9lrNsIvXNF> z@>7&y*pC}qe*wk?OSw*reNwau(MBJ9=eD*5Ma6&eB?Ua!b6sIauJu^)JJvHm7BLxI z*R!*P%7ArEUtJW}^};q#62EWsH=>1Ya*l(nm4~9_KWT~7#%01*oI&C~V3kcN&gDAV zhqjhSSN{$^XEX-=(9hkFG!fX#1KkV83LF=l1PV(3CH#Jf4v;yejUheBXmX zsDF4y2=kM>PM`q(GXJ;h6@b1YbqU<3iaHRs-V7bV|Br!x9hAc!yw^g}xdbFQwx;oR zl1Fe%wiFU`U6fb$U9<^?^7t-TFMR|!QZFS#p0zml5sJB*vdeZ3h<=N(=?C38&s~qn zXa^ZA0jP7xuii0#&bHvKz=SO9Av8*UON+)1h3&a4-XVEjC~YZoJOw^V!Qok+=+|_B zdjVi;y>t%qG>_F?4mpZ-L^t~z#XI-Ki#pch9NSW7f;!0yYffn=WFcUS?(jE+Yb(ZM zOVG|ImVE~fNta{w`%Gs`Lx+fSpvqw$4C`Ht^TA*0*v;!7wf_r@HT@7q(k!)qZM8%6 zmg1MhITP4JtYhCblJ>?$UuQ1(%dwQDIVG-#nTNF?6*%U%XI*tEW@DkUMq8R6o{2-h zm1_~9b4W1I{$)$JX9JXjy8P44^Dc)qTY5?Ir(G`ecgrs4aDe?KJ#0#Vzpy2Ry-{(7 z=5WsdsM}C~4+l|Sb>KIkh1|+6=W;--hkz{DX9|tK27BFxrY1S4*W7GGyP*|rI@W~H z_Eg#BJPw3lJ$AMD8&ZQrjOAD_Ez7`qSg}@dI%I+JFXovzrmLjEn(eAsKVNpah68!W zakO8bQ6GGN6WEUa=!hy`7~N|xFRO=K!+kkcmT^sDmeBF>d#JT4hvx{iL>uu8*5>QE zK3`nZa&j-JORh6VDu2@*=5f=VBHeVSO1fN6s^pXFPLpoFfYv_uIkX|UFTkDq!VO9F z0*K0eAxz38NhVfuGu=!0t(;81o;NSq-2yi^)=6EE{r|YbWY&r2DKhJXJ5>^b5^gjo z6q@3u41^mZa8ul1d#)R7m!_G*sNE(a#Jf3Rf*{$QA`tL2&Fl71o(m#OVFVE-HxlL^TJbL3T>;6Pe=NA= z$0gveyNX5+wL8q@0jGG8)v^nJ)$UYL4&Suw^8N3=GG8|gaQ1r-6TC-xZI?zRV4x0FaPCw7XWK(4Ub{W!8XOaF3!W2 zZMnd%HthD4U8R=;RX@*W31*u%3bNk@jD@~3R2!95UdOfhKo0(Cl5gF47^Ro{#UN*! zV3!N~tF&>KIj6fAFakR;hc+@O+E8Dce9?pJ{5YRRlwGBd0~|LuPCU)?jC5E-@D1co z9n&?;QEY+#IgG&@NU@uRP9!7&d)CkoF1tz>2RLWWejId(T_F1drZT{CkPU1=`|v#a z2X$-VTqN4s<5)-88(X?KaeL;5I%Gm0mBlqy&jIHE%3lHZVtk4EU?k+Ofg5Y$xGoOm zrtB&e98k&t?p2}Rcn>n>{%1x0%KajqaS!eD+61FN*8`w#kQ!niM%i3Y$u9;z%PtFB zxtJfDaPi<{a z7w1K}?)yvV!ynKkWLFA^On2kCY22?JV6yj8EHhh1ltm73j7J-)kSW)$b3KTqzJR(O zp!+(&c3~NG2Awo9S1bEQ8r;}Fpxp#lo>^is6J-wz#R1g)-1EY@UmaxnIbbgD{QO)a zgtOx^K1Mlc3;9>A8GA~M7rB1g5u2oi%1GJE4sn3;Z-g^kY9$K$s9ZVuhhza7>N zR@OyQe`J(eFTlJ4bkJp2@#O&J&vpQ9IQBK+96sCn0{OKKhp=AaEz~XTA%AZ@?3id0 zu($8y3&ocKWt54&8_v3cuE$8--x?r4`P%xN=docOnXz5yGOVM)JhVRILXn?!T_#%F zIl%o_{wM?J54MBsoMEq!Vq)YyU;iJ~cN_z-xZTRzaK71TUW5({$MtiDwMM7C<20 zUk(&T=9b>2&FP&OFT4ub2WsOp-E`1L6_xF4TX>>`0d2wtoVj!`R^oUGyroSw>pZ?i zVX7iss{r4BOq2t}JsLFl;Z9woReFmo zOq{WjHX+MHluHwDlyYE_Kk7(fpRxGRe3O43;1kF4*6=U#Hwm)$)X-Kn^!dDaeiYAQ z$2n@SO>7u~c%0Q2FfTKlTY$YYVdwR<4fI;ekF$QKp8I>Of9Rx5#Js19H{=iGUk;cC z`Ex(3v(j`{JSRX?3&5P$mi=*tzp+NRXVv$i2f#QjnEO6);oFx2mSOy!Z1hI|{cXf& ztBp3x>DWq7u?m-w4BlvkJz{(t3U5M^FjkJv^oQPL9r(g=PI1aJ*L1b9i>4UER+Q() zez5%;it}W|IT|bk(BW!ea}9eS>ma_BKyk~ftNG5kj#>CFssv&UOlYjUCp{<$d*dkmw7t2up)YuHq!{tV5&4rhtly^vV~i2R*n(l zmigcbd4H?(QyBS^_PxkoN3QE2luNNqhu;DaCK0$D`vOtOzaQnV@P@if0{qYe_N=3D zRXHiF{7Kh3qgZZTh+A%FIJ7en?nDE~m59Uv}@2eA563i?sZ5!{b3uF?w>SNW5!(~$jO|9C?= zHE($C-96wB>V}6A##Xy;uE?h|dPcu^i?nLWFkPlR$kd93cGULCy2f;@^>fj{Vm9+ zJv?zm*i7)|6K!;sk3IpmYga&Dj;lYItT<18)V{zl#((nRO* ztYfjyR(v0<2V!9BF~%?qJmK64&{%SSk zF&bdoSX6l}F6sOT-n8=Pc`hd2K<=y;IUb*lGJL>mKvpWek6as_LPgeLMZT<)QZDD^mj%}wQ0Vs-Z z!&&lYJY2iSyU5#jAYnC>cT+Ih($zP#UDFbZVmG(U*Fea-e?ML}9lI2gk z>FfjfMPz%S?wf$Hmx~~;3XR3(jyWLXD9^= zcnJCL8_G3fU&-Hw2IswLF{c_FeZ>p?fl=U#@*TN?MrX^P^E{NJ0_woiF&DfIdx-Dj zKH}UfEJqp@`K1&Ri9P1AhK2}N6!jh3RE4(Ua5sXUHCP+enER@fOFp|zoZnU*`jl9N zVZCZjuQSSnv*kYmbcOOha~js5`SU&oBz2JrU=)cq`r`9&CO&z&uQ(j~?+ zdG65vQ1<5XXFj=3j({C&)^{)BJy+_&F|IPs_m7az>B~c-CAy2~8D(!Sf5@G(m+@aZ z>VbCF9oFP)?RRI(pM4gJ)vk-@P$3x%3a|M8sbb)q$ zU#tU4k^Bu(&5yG%AHsFy84WRJOhNt}m9H)D?S~$&TjRyhLqPsqw_j4O(kOqyd-Mlu zqP=Uh;vCAiu@~}r*u`aFJ<32mp4*~N#5slNis=lMrE*aT^?3U#yG7r5lY zGQhFQ2fFwyp8)9LLVhWezY}=vQ2Vd`?r`Q6+5w>(;2gn60LMMH)aj0dq2B4Gnn~Fn z!O+2%gbVbRy7u3k@408(#E7xvcP^Aa%MA838SfQkt_1W0ZU_0ZJXBhdCe_7x<5)k* z@%o=GE(0m(12*OugzNN*^L4w(zaG{Q>QLAD#bLcbtdV*pte4$f&J(=`*?Z?_*GHzP=>Oi%^4G>+#CY%q z>Vd0T@L_`s<6-tsm90FVEC71C7D;Ehr}l2} z;dj8^Z`+>myU_Nu368mrb?X;%?OfPOuPegc&hjTeX>X46;=xhb)C)Wde$f7sJu@xHN0yz#X|LIv*h}5n)Z+H2VS<1c=U38|R7jVX0cz)%A>lkOE3}B58`h&MvH&iBn z`Eyaq0PKzFAQ$QdxVA4p85Z{WX{0wu7nKS7)VEeQSSJ6Q)nzrDQC1be-nn+*i>vhl z`>;mqF0p3^`oUXumC64m=ecL1N%(asXxSCm-lQGs3|Hy}a!}8F3Ei+p7k5qajk+R* zE-ZTeaGCsX%AYQMBKHu$R(zbXGI;YA;L5dwTys0$FFL)VKkVLCIFOo`m?H3%$v-cX zc|MsI`%0h_05bI8&q|=M?R`P-pTj!!;SJ{HxM^YQ2J;1Z339QoO#c66uvj~cHN-lx zj@r-(Jlf;pGVmMX@4|J1&_h5!U3C3{GWq{k{>qaUpWcw9FV0dwHWWAw6!aYz_P4c& zKO_+QwKO=>u&6Gi$^4b7_216o9s7gYh%2zIa;ZG(=y$*;XY7&5ubg1bh;ae-wpQhy zJ)$VCGWi$GW!lvVh~l2YOh52yIgp?Hobep%M~$Nb!>^VL_!MW{qWrn~DwDr;4)c7f zy4+I~YitdEx#~;`rTiR08ol+a(~2>8&!z*I(|1{0z;%gBu%ED&SgV5luCBU} zPFerYM&{O7dP%mk8JD5-1zb5tz`Z16VCTde_721u*EW)ASFkeq+s-bYQPyPHRjGCq z#_4fa?*#9haaN_$?pwMaLmIuHk8}^fm{75IWIM&yLzc!MU@r;w z6FG5DS(*GDlRwAh!X`c10_Xw$ReVfGEC4diK`7mj$mT_*oRvryO>(#GQqTlq|iIPlb!dI2Nie=8*RdU0UuE)loOkALY9kF& z+ZRr-PJIBN?#7uiH;-!%){l0ioj;BV9KFiq?VjR$D2)?9AR z^sj>KgTL|1Epx8W|9Ki8(dgY&?uaNVE=8NOPU7a-$(wSd3hT3RHo}!kC>Ki*hV_Lr(ld{vDCEyH+2?s``i?M3Iw_ZJ&Wq>F z!#b@<%*VtUx&sFQbNX%v|3S$9p2ll&CNR%_fsG4Z4P$T?R4n(l8uj4IdKdAgsN_#M z?!$N9NsBqykcD!eM~uI)hpNE`=eSODD1h^VZp$*T5%W=@TG$y0PB<^*FZQ)z&OjHQ z#_yvm@P`X{Q`#R2tYfby+#C%KybTF}*74iEt;CAc_P!6BzS=oe{2M)?>{A)|Pw1&cQJ7#TjR# z+$#A~Mpsb=V6VgItx@dmL+8NhBQW-iT+WB;5dskA*2InDP4UD~7J|OX` zRA(J*k>jEc6>C=sAN>9vP}simt&~4yaUAl0%qRIO>NVsG<$xFW@P@92{tIVhqpkQ7 z;Yy0!oJr>~d>;=8uqx0{!$3|Lc3>$N%L0ApS1r1*|+k?JX}y*cCLPq zX9G9-R`h+_BFyeW=Cd$7x|Q$#jJp1gpqMN%Cm_lI>d1i4v3?cb2I>vL|UobyYYb3gHz5A@SutfY*WsW%Hs z!ng7DNjH>%=Mcuxeo}FfKi1$HrvxV9Jb35>l?(InT&phyMPuJitl>3)do1(gRcU=r zok^G`Aq%$1VfPl_2(KWXl?enSWK=~NSb#9s9NFLw4q~G?WYSW;N=1tq%SFnpV|Aey zqW+}vM>pJ|Ghkl;dk<~?K8ll*Dp@zJA7&ShtROBJ=@a=Zfbh2SHb;OUTCF zwyaR_WuuK{fa^-y0Dl%U`~~?Z!`8Z9ZEu}xbEMd}F$BK;iz=Lrgu?E(DW9N_+kVg<;z9N?ywKidJy zpU?OkXf9XG==Q{z)f=+88+1hgESs!jT~W`h$NZ39n{-uby6U7@FQ|{Fog1;%nmQBA z6Ihig!?=;JWmn-jpoJZB>aeJ<4UU5ChZrOEbF8D-FTV~rvmBu9`ojBjT(ZlwhPjI5+JwNAco-nA3tSAjk^qI7B(1E^8+6S0T#*`xZ+$ z7vP(eDb>dOP}x;{IiSJ5PuPUPcnxb_#adWHJLq$!6|x*$!kBqnr8PP3wJ3k+h03nt z%K_i$D^iuH^U&F$F2ER08!dEtocnkK;CR82e&S)QnYll774*YPvc!7Y;>&?D%E$q( z?W^}5)^nhLpihE+GZyDe%KOlHmQ^ZXuO4X=-f7Cd2G><%zeO3Ni#rEUSJz(!`*6_V zQf|$@I>#|X18Q0TrjzEKTRApDm;eZZnI`o+9W;qCs zx#AWWmFcO8&g8uJT!4GPt^(G|fdTKoqDeG*>(JK-DeDiGoC2Vtl=C_toQ}u7k4&l3 zylkxXGB&`x*DT0?FJNt7_$icu`G3CnpIxhf7&f1rbYhwA`qb^DuhJ)FCH6W9h= z>L*e!kbw0xeq1ABK9{D7y3AREI1mK8^4euM^Avpq?|1*8TxqvzvZ)-H&o`o-FizD) zXI7;AIX6*ul@1QDUq^7BY3wR7hNS*r9FPL2=Npe>9Pw!VPcz&pf9|&`yGjQKc*adl z*oX*09}#^+DP*P59s1=akpD}-R#iFJ1^1ny5tt`Jy-{|RJ`UuWUr5Nt`U68llnJg0 z{SnB9dyO_W-B%xp@c?WLm0hKi1DZtWeo@BaTAl}~QkOjbG%0TcM#&@3@92%>@n6F-KlKAx&CeTwB;}<58JVVjAhk&-OT`!H~iw#%C~YRuT)ibxGR1y*^#@#PWd6s6hQHJ zHTjjB;#XcAs{9ZpJ6b6IuCl{V@#|$rE`imQAM)Onbd{&(ekEPycPY%|Qhw-~`-|+* zEB-LqVV=HD!dVXQDztrF8W0wRQW|AH2G0x<=MrrCJtqYLrXfMKzl)h)r}8tL z+E0Q>pz^*#u;M2{Bv|by!HQoYQ0?bOp!fyB(^UQxNf4a-JN;c$zw-y3%5Nf&j|HLT zei944)4J$K=obfGf-IE8nSP zs)3Z`B;|*&+$XZ5tC>I7Yx<#@@?fa!$R$v3<~Id2(IF{yWiA~i%01-QB*{d%LYLyt zt#OoaDT>2%H}|LB{2is(bTLpD#e;hq{5qANB2)V*vOFzCidoZx2=cUDMzf}f5vbzF zW_>e%YTlM52NZNMdm*nsB@cnjLLLGULHRuI-!Sv%q~swO5t#Xn9$Wp$!?A_#AyNk9(lM+wSOBKar$+mi6w@t^cG$=f8dR!=OK2OU=xF zX@B4FJwLXuF;wa$h5Bv&EcBsY7rr?Ed;NlMTdf<>Y4ht7cklZ6<5!+*J!0anrQrv| zpMCv@&tF|0)$73;^$soU`BQAeg?jhi)rLyl-qX9k|H`R*4ZjSSu<820-`%V4eplFq zsTU_4InsVgR`un{uP*!g!mFFIdv5;sQtNtIfA#QA9(qr7pE{$vzfk}FCA+^m)PL0M zE#n7xPSPFl+H=|X_u7@!f6uPpNxM7c$1WXxrO}nUEqrk0pye;dJvK6G?TQBP>>vAZ z@_l{Y9Md7WRrSW+9lO4M$N2m2kUA}`U$5f@>Dcxqi-vR_@b2fW!afcArdyq`MGrmM zs@ul><0l5Kx%-)2uUBD6ulUR7DjT|WeR0O1ANxNu=ivO+Tf=xDHoE41k5=bbj>wsMeooHz$NyM7`AFpUe>b!k;Vo?r`8|0}{T=Q#=j{)i z{!O)SYc8quY}Yv{lV|1x_&?$6)w^Z$<1cI<(DAR~7dmHWJ?W7)DPZ2-wQC=kvMxEX z@rLDI`{&%}{_%+4o~$zD-4?p_$r<~qZi_tIb?VC1aa#{v`y=O+$&4c@^?O$HKzQ-Sl$E)^u$>SxdLzwjHy|oW` zUYj}VhIhLcA|Ku+?a4GQy)t#vAIS2(_tfsQW?|oVyG?Lg-D5xGxNEz2V(gx_-HO#J!uIVv-~Lem!|&gU`p+n*FcG`wI`u8Z{*J(+iace4vSL zKeSWGm9C4E&OGtv5TE<*-d(R|YG_)>@7FvV>-3Y|yS`a2?A*WE&D&{LxlQU+C2akd z>pj+2eax%ry%XBDxc|M`T~B&Gu+w8%kK+R?4sP9N!ly6z@1MTn{FBn8PWRlGS-zE9 zwdvzD(;M&aK70P@M`mu_KKR{o&$K$d?oyv|wYNN+I`J>-OTC={+l5+Dr53M&I z>lgN7$ntVeG?{&{+t3fDue_Ghn*hRvLHLYX9+Zy*~eaN#Y-)y!NLq+L60dF;~*5ujy22UP4;m$?vM}7Rx>aOq2Y&T(AbC1E7`c0`_ z%`HOORPFs&|B1a4d!X^}>!dvyEAC(1dLqm6=AP19Av5;Ay}y0rUGA?o{%c0Y%yTuy zogSRlcucj&eEqJh?ceyo{zuloGvvg8hd=K7sc}Tr-~YWd#67FasFv+M>h#HZeZ^MK z4_!U|z4`z4e__Y?0W0s_P<6ap3*A?T`jsEFDs`EUdzacr6OvjCneh38P3H>8#G7 zgYC+Nb(%cVH$5P1dn>78zujJ+Rekc&o~vJ2e|q#^nY;Rx3+*{#I)v!!NV>^w(B&^z>cyy2p=q)_&Y;$D>PTR5}>6<0qfhKE}a& z+IWA{X812PKJJ$71$JvcYqH?wt(~u*yl-;W$PvG-tyA;+9?cH+k6aft{95deUwi9c zd1z7Mn7Jtr&ulr+c=hDgf397>7?suJ!v)nUKHPTbm^SX(vt54sstb#1^BLX`jh_2{ zsQ){kSHm|_A$`=02O{q|Jf};oW7DsuG)bAf{O;~!8U=fJZ2UerVB_|EdrnOYzu37` z8=vckgZlp&(z;gb7RM?Mf8?k5A0GR%=5+sAO{+aN<8!~oEAARO>z#pa>!dEyljYh* zeUg5C_g^0j{O+>hD8B0Qds@^=`mk5f*@b6*tMG5wGS3Y?fBm!P2DcHv_WSqENpCMK zcc8(R$^D;->3{e>zsKf9{xDk7?jF7```e$S$KI__@2%i%@lD4L?r~z$%6mLI)*P?h zGGKjb_~7Eq&j-Q9F6n!SPu$s@(IN_luj>P1v((&uD4sud|2t zZ|&Wro8O|hKC5!hJ+;n^?6Dh+AB^eU;qAV2|JYPxa=C+Z&MfP;WMjaRGd~^hK6=;0 zEt|BT+>!2{(ki;qs&-HK?oM{^KDgcVH;zwg-uh^#z*ApF{-NKowdeVrusvNZceA>%n1K$cxdD*Q~-8rqUq`5uVH)G9|{+qgNeXL&p zddo)r>G^S^C4;MPsG0ugU$OoEeKh;%Qr`io-f4IFWgltXYu(8~3kJ@)`-A5zER%kW z`QJOo=6>sy>VK(L#K+wl|5mGUtLcF~b}Ss!=R(sxBR^Yu?P_%SN!8O|u6=m%=`+g{ zLuWkQ`q)$FJi`)3{(7;gN7tzrgU&tmSFa6ml?Kd>>{Y+spm$qUToE$w{u-@3(>tWL z?e^05{>C%yPiLR{zP4e~<7>{YS-9$ptKI*ZsK30^`j5IbH{z?>)bFjnF&it=;>l)am}2ogb-``hMlv%@z*reP54- zjjQ(v_wV(?tTU5_cG$AN;V-qGsQ;t;kb^7kjhZv}%CxHKucbZ{8QD;3*6*BdQ8!d* z`?h%x^GI&C-GA-HiH`(qno@Dgu%Yc&mm6NS#gp$nsvmsu*Qsy6wSL^+%c>oIGqh*( znD`?f{nJM3xAl>%C!ZR$Qa{YC??UIM>gJ3?!;TowB~oYnJ6_z%Y>|1f=5 z(!=59cMLo_YRJsPqo3bVJLkS(L%aOEs>h_mUxdChVc<8Te)0@hcWrCGccyfGc=B`4 zeXHxUPakpm>pS;&RX(1udur5xFaG{&@1BN{4=?-1^WN3>_WwL_p63(#Q$dZ#HCXMp zcy7cG-=C@U?l&v$58LWKx@YFU<9}V*cU+G_Edy@cJ#hd3cKN+F_WOh3Q;yf@c%yPH zG=BkAH&58tyTO`851+2OW{2k!vsPDIvg*gCL)*59`}>)1%k@baobpu6*I&LmEP2o; z&lq1lpYvwUmLtpljO;$-Quls2j}2Km(XC2%AoAN`G71w(OEFQXd&fGSJE(VAhp~q={0KW*GzKX?^-sKl;+qO`6PsySlyR_3PSK zX0_=i&5a*>;TO$YA4KTey%<}m;f$0`s|@=_x>s&KWv!V{dxcXFMU69-dnT7?sBg>mJBNRQFOImXE*+<@u`=kN@u6sv-IiB?z)(bp{sWVWX#EU z`C{)k!;F8NoHhRa;61NJK2`q0Gu~&bO?DgebgcswuRYxJ$P1ZkS6zHE`^b$M^#&)5 zJv?FVsCQoIJY`I?-u2eCeERE|eZHR4tbd2<8~W}kKX!BDfq(z<)4o&N*4JP9^3g|g zp15~*_J=oqZ?Nmav6v%XojZHE^?zA&&;O1-```m7U+QY?^y9mc&%85k^^?+Py6+$F zGtmEN`=P%5Hg4PD;r~ERquu>_E<2F?QS7s8>wgHkJTGkDFt$_Oh-Xq>PYLM#=c{jS z$n1V?WMK50^-q2kcrY+N%sWVT{=G?m|F*{4$8A%E4zDGaKkJ#2+1_oV+w!(=H%qGZ z?eIs2Zkavyv4@WRx^qO(@*|IX^*kB9;>5(gqqdjp{oYg2X~V~UclM{&i;o<7<#fYc zr@rz(KEnIrfoDE`^Ta)04EfWLb9C1$4>Uf!TACfw^}S{B_iz4jN=o?ptQq$#-F(V3 ze*kIwAxtDUbwyL`Fc*i9lytZXn&lehd|2yQl z>KDQv-?7J_lQv4mrY%?2{iX5H@Ih<;nDJ1J^qP~$4Ljtu_xN`!22J>S&XjWN`wf{m zsBZ13w8~W~POf#VXKKBgzZ~82f$sU29+!`exI*Bw!Q83>!X1^`d0gH#xK2f$={7# z`om__rGd4dod4F2Z+bVMJ*(A{E)m-+cmCkmm2=|_-D>|@>;D1LH7&|fi;+~ZG8N@s zgnMvJNQNzydGrPXoomZGA@Cva3W2VdR5GA=e*l#P8&TDNj|GYSREL9w9+<{hky1C zNhUyk=WL^2;rlb`tNEAj*o%s77d1B^+*fNn+Bw`eFnTqB(`s2&;V`gD(4Pe8ehDp- zO9&VCUi=ygnVt%@UUCoM;cd{(uLahKa3uWA{Z&QKi%cs~MO10+otncX z_dEbRLY>0Hy#@eBfwc;2(@AN|iS*cb+=Ttmu5e!&{^8jNpV@gP34=geWP+&P0z8PD z{NCs{@=0)=eAbJyQj||Lpj`R>OpI|g3tP_c7U^@&3|Ij&hZ6QK$;pI$(R6WHp?Bcw z7uJQ*s{uGGKo!^`@FNj6AVHGW1gwGh!jDGP{TORIag8MJ!>Rs58nxwV81lG9=&<@ z!?Hi$Om=r0nKUidavRD6!1cH$C3Ryo3xK1bFCqG(B1cP=9!q8{f%adcLlT?8^_5!= zd>NQm@0;-q3kk3jSSzqLhHXZv^&Gx15C5y(aJ+?JUR5f4($i7Cf=de@Q|}uZtpZ># zt{}o2h~1{xl8{0)afQ3SbvoU*=PBp6O{`LhW5!OrmCDI?C#dR!AP?gVIivMCjwTY| zD9T&Fivl}C!S-qI*(c8vC-P@*U64#dv}qja8%|uU&Q^3QuBKt#OP7yE8$bzoU*K;A z*=&+bPwufp7L@Gcd&oY1y3(&cx5$-dLdd2A(qUjKC*K>dF0J>GT7G}s|A?Mpp$la* z%IjezSeyCdCEZ~uBvlG&dE(s3UtBQ*Gro$9uX6HtXOp_|c!w&tp?nFLkMsA}Jw$x8 zty~3M54JVHTLMSa#FSTDxHNhgW?UJEbU<9Hqyi(i0>2J&JL$O!rwUeTYkc;J zbB#B|%P2ql0-^!ho6Dtwa4yc*|1iF$-)Nx<>;c}gf&Qw{+4Je-b>p#tW%7o;V9)MT z2j^F~Eu#Vii2$L*;Z-=*{}u+nwAU3eTDAZQZ~|o$$j?R0CK0v$)%E*NsqZ1C7TRHC zrV;r%$~`EP@v8qQK~4g{5O_i0(8OI+la~GMm}>u1N303Onwf~+is*wl)xV(_EgC=p z_yAaKjCsFOEp)X{x*h=e(rf1&T>5X$c=HdwYcW|3v?>!+?JYRV?X?V3s-~yzoB-Yz zmDh~b_kuVmRN9I}<6RR!`6t^?9hiT1E&xhQCcKvSQZI9TamF}t2EV(3>uA6rvC$v^ zV$g%Y3u^dVYiy4MhTU6VyWk1QL6?sa%q?QAZAImN;7g#_(g3cTrl3FHSmR@XcLknD z*p#Z;9&F0StrvF_iz=1sMy`vewgK}+<;%EjV?!<)BV7O}&Tsp=ioUV`y8Sy+Jz^)E zn9A(zxpnjetIh|!*8@~n?^GwTpx%vz*>-l)ofrP!B|84qx;V~ zSf5)&MJ=F&@?C+>hG0_lQMBqp*K6h7<8;C_X+$4FumS4 zb1_D;25=I12Usn_rgVy~@{FStpPNP^_MEzYrKe|{HvfyHafKO{;|hAt8R5@rkZIc4 zZ-L|Vg@~gcDN?kW2N4uGZ%63k`8}k=7UxY;I~moxaqYYo(BQ>jN3sU6OI2P_mA6!- zyEmV*#u`j=ky{=&4qW}Oi8$lUy||*D-rql9ns)L76i_yytX7o`)i`~`b%VS3`adQ& znfBZyX=|ArqIxapH*oJwL+<;}WF)R?F>c@g2Joh;>e1?T;Qhb#UdGT9S_{ z_SjJoG3^+0J@6%5L&k=d;GRLF0_;I}1(nx9j;F$yzBMxY#JMYf@oAUl8z&VK;qMsPi-fGp|=9zW<^530P`(z0Mr4U7=O{x`i0Z#S5 z4=!5I(KB+0f&IV+L0-$W&GYLloIk+!!)LNGblzZY&D6jZOlM%Tew`CmG zx_(!}ND}}(xYAy)Ve~CAwnyWO-=eibxg5}uONAAUr&^cY_lWogo}&fp+&=n-!jBQQ z<$HEmYZIdA;-S7GkWL2_pZl5s-r_$xI0<~H$jV4qwKYxeg4))7eCuS+ zbQ1$qo}B|&R3W>xgUZI9HW68jEA6#}I_LLaDh+M=W|2n|#q9sN;s+7Jn z=e#HM$prbvAyg%&s>?yYDR7NA!PwN+EYE;YU({@CWW7{dVo>PxnzC zHv>r-icilT1Kfbfy|wh-hRM|O3@H&V$>&W`ece=}PggBnV86V1CvjXNhO<(`PuA-C zS*v8F6Ezi6t(K;%bNSV^uHTi=>>7ZI`}tJhM*?pNTFu!ACU?X1?(?5DcM2VU-*TeI zU_|GM=%c{hARQy%nG8LB4b+;O9^fV51r_$?XHT!*yJJ7vkKQLL#5NzqakT@N+WU8E zznhX~*8p6ewyVp&-h}D>&n)@_`*$|}8SHY6RpqwR)qRSKT{2b>#iYJh38zg=uCgw> z?=D<3Mqkc*APCJa08ZdK_^t-C5hK01Lq`#70q8ulC`>J7q>FCGl82phDCXi;W92rK z2XW0jnl?YAsRe*Q59muO{7m3rRLv(}-Tc%A-_6W!=l_pqPa&>EohY9JzJi!jk z6q`3DMJ6tz$XRv$`iLfW0tm(O42-$GmaMUqI&@_5)P*vzOKPp(Mc8MIwXUkCkp1ob z=bTXZ$fb>^T8c!UEM(#hn-_`lMbO)E{{H3_g*2uBh;S~w3ni2aPKs|<(UrhQk!?BA z@y+d&f6!0fh4TjrFi+tQQJLnhrM>vQ|Ah!^KZYiM*@45(P0T7_vyCWAsEB}GVDq1z z?6k&%Yd*)6<*#(uD19IBTewQl&Hg+ZQUJUKyo?LYJfAPVZwGn+SD$@fN=lS|Fx@3% zJie#D>nkqmYZb5^7w7LM|GC7tXtt#||MpCpD8S`2SZe-$L|+uxs7P7D(`!ubiSxfV zw(M?s^0HOawVHYW_yVq;efnF{iPN}%Pn?3bMe0*QO7q6Z7DXU_GZxdktjr$GA9wCvifcp=d+_5a3q+O71-M)~|%z zkwW3=*#L+rHZ)=qn-$fixQxi#Y2MY};wJ6}RaPF$XFo^>=_Hei=v+MDiQmUjTsyGK zp^PgI;st=Gu`$+WaZzo38IcWL(WW0Nz&hXuz?PX)yJI?fId*v1%R4lzw&?}HD5h`a z2e4|0swtDrlqVzHiR*TJ70vsO^a39Nt3>s!Q0=i|PZmyQ32lJfe{?_>IbX}KfpV(G zWCJIbHJnkxQy+|2lTKB~01HL+tGJ3t4g2B_fIYyEQC$Ue6@ql4Dp+sxJ&R_Q?w%_| zpPNB7hQL^`F_k2pHO4L#^dX$vuUV6#5?~+jCaP;u-60~?jE#}na@YRb{kn=r4PFBn z#Y8a_$^oS~8wxr_(7w><@%0|t6_5v3fxd{ajgC~ls?D9aFqC)=V3g4}7k(($^T<=1 zsa6bat;uZQb2!7_Y??PIsvFpdv(~?ZXgB$ouvQT!`{aP)L`D@}0E|+8{i%-#TdgE% zZHW@O3HS!C?Rm3K{utOQ$g9A*D*4@kX4Lps{G#}$Gs-Cach~n&G6_jl+BA{n2oK^W zf3xoUBb>kgEzp+**`20MwHBw7^DF-a@d99!P+Z|E+DK4E3?i-8*zRBsPW8VNR|K_L zt^UVx`Tc8+DZf{VCY5t(H|fJ~o~-xS8X9i|9Bssclu}|(gjZwbUWDZ|Z!r|b_3_&x z$n%1%>&-7Oho(%WI*UqmR=vm9@OTYi6j1p7Y*33LMA?F*N-g6=i+_&KSlM( z%{q}2t(zf;>piZf!D|2`n7-~unm}bz8>#McI*4?>DE9+5D+h2!zt@SYn<8u_ z7i38nU6!AGv);pN4!j013J{~DyGkiz?Ihqf;OoHrE4*wyiGU4={!m3fGRL~g`A?4Z zSJ2mU9=rxH0@2#aUycp`+nV|O={Mh{c(iq#7`nI zC(=F@5^G66vHgm_U^L-1fDufbviyhpw@UB6j;x^bMfK~z-84z#$_ua;(VqjWfkT-> zE<%KJ`oDPwgckrK3{`mJ^-lP^yC5BtLxWT%UMixG;HvvfraArn7MJ0-4p@!oHWjTz z*~+QHZnN9ME7}%O$=Ye^`qkfwD`VZP{{B49-~Sq_Zvb70&+r=z zyZ{&>6rZVu`lhN#TOv{pvRVpmK=~rBebunn!5SnY`kHyDz8TG(zC**1+}oQ-{ga+6 zzW8Xz3xJVEY|xV?2P_4>7vUC~*WaIr>IvXY5q=`*j_|+{0IIX5;C2O`E5QqZCMUnb z)$CM-&()oR~lE{?DG{^&Z=Zzzcv8gcUVL z%oJ6GjHnzQp!1Z)9TE4*0>%DX?!b=njW%$W>xCL`aj_Yg-0UH~*Xq82pa`jgXI z1ZD~HFs^O&xO$K63OEV8q@q7XIM|}?_C%_gW0xA2W>gPHJ^I`t07{yW%O7Lk#Kkhc zch4M@hjE>JW?s=3)R)7!V4pQAx*5nP8nLKw5_^HR@1D!Y3xFmjiAxk(CIw^j2iiqt z8R%~U*VTJmS8_|h2P*so*a&*knKuIl)`rh>sdxdb_1@AI25;m_c(2bPF5--Uoh# z(GL?V6`$NcJT&7AGb$-RGlRY|*Muo){rJFmT;KVJaZNn4^&aD8k{8$ntj6fO+7lkj z+|~s^YN@a2JJfgq&}0apNiq?*4dFiEMw)luUqRT1@=H;EDXLvIZHeCM^I&=bFw}Sf z(BM>_=__rm7$Q-Ew8GUO4+6`8N%bD%Ws(4Ufj5CQ=_mJo{Gu-73*XrXzh86p{XxBl zIP&oVph-{^B}jwJ0&c@~@ts}oab5Q9III8LC_hluPrCkQ4tM;`F(`RI|8U}sfDKB| zQ`6~rYC2Yx^y7a?CeHPLrZY%QK>I~tKY7DLN4U*T&Q z@ftvbV??N~_+1gC9k@o}t2mcmGiLbp`So`S`a@AxTIij6eTznUALvsLyav#K=y#S8 zRN6>Y#*hvIDshrd)Gk)#bGW*G<7v*nodrG?cwJy!+D5w%lygy3QIb$dLP37t*Yz7t zyav#q#4X)eNhnp)LE4y!xHev2qj`&a0Gt$fO@yDLvZt7BtZy6(^4q?!XFc!&pn)hp2*~%fL?H4d8WE-8X$~vnnwo4=(^3fbt5r@>_*OK_m$Rf?2==pmzaN zXwJWt;`0071lEYi$08D0b&7wBU;XP&Gg^{IECM7JF_^iE-UWI$a1G7b%FE*X{hLvK zCRR2Sf@0*2eT_vkt|}vn%CmEDb4AF6iHJz2Rc;h#^s0Z=DI@)U1gFO_VR%2Z?ys$T`}lje-{Q`{QAN8o3`Dn*W$$R$Rq z2trb87Kla?#Y`7tfl49QgWr>24m$*z)iR~|7Okd`viC$cvE0!hU3+M z7^&(0?Y^FWL*g}nVWj-@Eaaqn6ue zr#_2{02vis2&eMM2fe(S#RCGg2 z&y<6^k{y;*irJGFF?;f&dXLX@33v@)2uY~|%Ho56nNQgV(I`c#lq2aLbB)^IVUSxvX41S#KD&T7)phgvB=hHctMwj~XYlYEz#vds zG4s?~nFbSyjz#4b;9*>xe>0}|0-OYWSK&3_!vs!Ne$Qz-e&rt-cmXiT#BQH|lek2m zn`z#Ce=o2N;T0rViP&zjA!sv(E_g147XVj4>3g%?2UTp!iX@W4auu$k@69xCAHNva z%;R-nUFE>cyV)0cgA>Ohx*S*1yP5a>UBFsk zWt5nY#&i_&g|;4|a)ycbY^(R!JcEoE09QavjE$`5{>eFvodEUvQ8FM)T{ zu{~%y@<}n@fwgX@DEHTUh@Qd53xLZYu1o?^IwcNb3KK!^0e%gbQSY%`0mngKL-Yk; ze+)O&oF$w6Q>B5M%4RVB$w za{AR6`#ZbpJwDG6;RV0|DLq{a^)+1}v7lsB(~EEweV5{zc{H1oTyaT0ABxC}z(%7j zT~;GhVzRz{_lU;}fJ;O*05uul7C{~Yxe@2@Z??$~wGO`G{QYl;m;;HHPu24GjC{NR z=!ep?wTix0p(P~fNdg<{bae?n_u%&Z&1fRGzyVR$#1qv|l_V15_WQZV_t$%To?*aS zcP@q7#$p4VhH{Iz$-ju^bop7}D6kRHUkb_lrkaoN_54R2UIVxYrSH$-RF=<}Ppi`r1WF%<-i+GiJ|DBVAoHQGE#4 z%wuM~$9TE72J8lYg~*%fCD-lRxAG00F=Gsw^tyVF%~J=w0JsQon4)vj`yoOH%2Jfy zK)9aM?Xa5(3*>0sIC@;$a;-khcNF6Vz!~VbYac#1#_?(G1RY~(Efrc46Wk4a z9^q=5ci-O&tN~sXku90=bGq%}y|lMv!3y$dy@%(i2VMZ2No~6y-8_xXVuhglXsViN zoeg{uXZUNz$*;K8e;2|Fh^|uQWa-2XjJoRN;cxA#_ZU5O!V7>iDBXCq`%tc+3nnA` zk6b4rU&Iynm`d|LuMdHjfb|%=FA1Y4sAlUuCQk#pebCo$&(T4AcmM5aqJtfxAVy4_HWZ zs{Q~+fVXg=zMBx!3rW4l0eKpeM$`Z- zt~3>;XZnw~>8nyLpOIy`$1ncQmsZSm=c{6jp{i;^O(u!59N`|^zQ5Vx{1vC;?-zJY z;ML6g`?i#q%~q_f9pPLZ19`S1ozG@HW}oO^D=I&I_H!FxmGp+)cQv8_Fu3ly!}QQB z)+Q1noxpXVkD%O%G~MI}Cjfdx^%H@g8+1J>R8V}HPhDwPO}h~V!1chlaC_L#w$YZ) ze2#HB?LD|^w)=oXaPjwDpPPu)3aW-`VIL-URYq;~df@ZG5`<>#Do2VM-1FSgI0-dL;-L& zuENeh-r6c|!fyvY09FEr2Xbzq+)iK;O!lg@YBCwP7xZh0Osn?;xB>)dQ<0^(z64S2 z-@F8H^gW9*&O@>hcojH6KLX%17LX3yd-@2@P~1*!+j;?VKwG_U{(Ll|0BEiKT#O>l zFK6}VDS4Ho{C5BC}_sv`hjVJ&WN>lc>wq7K-rR;u1nHfx)aKcDHt{SDT$5r&bk>(8c@eB+LAkMb5OGVc!`f@hh zzuqQeNX{eoi%-os_41`fQI(WRG7Go`xQQm8{LuIUz=A}eW!xXG1;~JmQ%D333A_UO zvS@rXlj)`s_5Dqbo^!F%Z42CoN`zFCw1&ZQkS~a^5Ex7IW%zjpj=?SBUBH`y{sipy zQuQhYgW3$|duLD!2)&vPQWA|*(Yryvj;jLQ^faP%vd>7<_d5ou;(Xf{=bCHBRkhh9 z@Mw{sNJM=lYsz2!a_iR*HX-4h(t?!lS@#j3E0;qX>XL zpTEmeIkoRsKtj&mo1X+e23A_^nwa=_t}n=i$;hLJ&$@8&+36@2L9pqRkvJZsA~yjK z0@u>N!O_$FRB=5I*DCsYOtkZ4$3*Q&s-Tmdj``SO^gGK=9bGCxxqUA!Eo~W#od?{B z(-!B{`(_4{Mo@cIpeoKpXMs?KEJzk4#ku?PT_XHIL{|bQ+fA!AYB6dr_yvN1g2YKX zaJ`D$qat?#sUm7k)rYr|hHDWAyq#kpMM;tFEJP^_7Z z>N1qC;8J`&BcBM^ASypZ*BUX}oPLTk6fH#1Z0$Ww3B600i*R$W7;)+^97*(=QjSDql z7Q%8|1DFMj;K>hzC;<9q7JucK^943)n5pMB!|A-~l>oyY(8mrs$-*dY4; z%&{oTfv@5sxBTQE`E&zsBfO%p(F!L_Em1G~zNW8zj)1Td6KBG)D7WKW#EYS6 zvqA^Vpb3CMo>>-`t@SR-OQ4$sQ!)LdwU++-T*z2Np%Tk@wLFNEG3?a5b0i6xD@-J_OtjbkzH1hJ!}k0`!PW?_DL*2Qy7=vH^M>-W?yE7E)Tgmv&UA8qx_Ko+?d1uNiW}VVl zbFT*;G*u0;>dmBFaw97m-4-_THTP&YUU?u&Q)s1wBb1~7FQhRV?}?YYS&3b6jZE;jhej4nCj{|Rku1qxkpl!^3z0$P`5cuvKDdEa`y{6a=EmQvPK|U&BGS9--=KvtTvkQH05dise zfws1>kesAgskRuA8xihDxfz&T@0+Oyji3Q2E@X2Xsz1c)DnUy0yo-vW(tkO-)?6Vg z=nO&c!TJ1~JI?=%d`Qm)vA|)JpJDZykFx)0-k?muxre_3 zGOpe?Q!g4p1Bh`_{1t%>VQ=-o$#HRFszOqm{AaHIr$T)rc=hA?JaYRhl)7h41s(?O z0%p?e8C$-L=RnvAyaxJ4P>pw_5>qW@^DHY(M&XzGK!^ zz=Npv)q@zdrQjMt12_iyE5!a>MGsacWsVbgKqhFlBRU@oWFq>Ea%J5nKOO?&P0!Kac&%G}0-}B#GXME6;o_ z&fnkUjM{f`pmYQ4KwkyjM39`Q{9eu6W6yqP07~DP3#kI4V81_SoLX)N?gPDnQ4s0Z zAN3LdiZkH65BdV|0a2=3L?KAXKDqy#6ZXhI1>m3k&!3PU6O+o>R5g~Fz~^v{JV)(5 zZj{hh)@ujoFMu~tIZTv_)wodK+!^mpU+XhTS0S`Q$_Cn#UI6kX&^vH>AEQv@CwKls3rq4d0PPsT9i&=xdXTlVF`X8HySvC(->a?*#M?+32u_f zxS}2p;POKJIR>#HL9*>|S$%q-UjS;1B-BB^vMuG#DZxX4g%C;AMRApmM6 zJTILdTvore@QN?ASM(mvw&L>eE_HWdqy^8oTq5Di1~#e`?a=Jyl?|Z zP!tJMIow|VUYwU`5QD=mdk!=l!=(XKfGxnwAnVK3p2H%BJ%^8T!#_Ls;QV*z0H6sF zL1E@NX+6?CPE>El?c=V(H}e?jSYSWS=(jqP8TVd~f>R7i%Hg3Y1My z(P*Z+!X9_wf_+BDj2&byLjYXzbL?y3z1ph$*jlqQv4W9+>u-Gmc;>?26u2zEmSa5~ z0t-ZW1SbIeK5k@_;A;7ARFxkmR^F{vi`J^Cy)$sW_r@4YwL~b`u{f3gQCwx8L1yI0 zlHt$*4goJK@@gdtc8u+)REp&(&VYOLy>kxM@6I7nA+|!MqnDHW#)N58y-q~Gh>P^| z`?!%$9%u3YTdT6Ut*zQaTL;AhooBr>ePQ9{5@g#VNS7&|Xl*lry$j?K;5u9%NzVls zN-u!L`TIWtd0r$~mku>b;uO`WNSqWecw+cFJE^7;3WwWLscbY!&^t$Cv4&AfZQn1y zK+KEA$VnTg>Du3miyb?cIDxooq;PahPK3F@S3p09pZxtYbOJz$P~HMw0NH%(`JE?@ zpDf!_bUOY*9^HKUaCJX#ewyLf&nH7FAxtF`jj?@d|7?6c|IxrFz)yjdv>*In2xSDMt>dT7Ldt|;v}DeBBf_$4n^Ks zjItZ}b5;Ebum{z$)Y>uS9zVCb&xr@`2B7tudy$sJNHxeLL9$$he~hcs=397=Y!cuE z@Vr|6F0e&y+#?8*%AWJiY}aoy{e>T5$`w*k6echsu{;L+jyj{Cw-a5M9^j9F?+(5V zz@@ToQ}_kST2j*sE%-M3cOU@L?sG_uEtL`iKiXnAs9f9DI3QpgLuhuintdf!xolfu2D`@so-|8PNUF7B2uUjNqPIk3g3o;tv~+ZEla$_Hv?YwHAiqTFQ=Is36@&*S~@cp8Da zXaGr6K(gb>XFIcjO=gK};`uO*+03J_icySG1hyf%4&@b5HX$5Wp(KJW9y5ZVB8oB1 zu!Hf8c7{~|96hlOfR@}ijvwDjYukhcsD2sezireajS+5tzXf44unqWBRJVr3=y0Vy zlZe%`EKjO_)%P?9!zKVq&&+Z){X{U5GN~}zX*Ib8r}odIaVEdwtfUo?0&qytcR^Pp zvQ`cIO+p#P#@UnxvGY1*f4g6omB-U?466p9C~6f;xf8e_;Q?H8hDKBUj{=)e-dFe# zVGpp+26`aVtK~Q+s1YRb9w$$8GvorGTnqJ86vc=bGFZJD7wmHj&gfU~IR6+I#n|m? z&hZIq)~U)HMj!=7hkF=TIa<(a>yQ0Rn{tn*nHh2o zK+#&fXKEbiB3yBg+o*58c3xqpz&cE=<>^-A`ArSyQPfe$qBm~Bg z%Z($doXo1)>(uq}TS9%Bc$RT${a)ZxV2i4JAh0=m)!v<@kLO#Hs)ATLCngXHBtq^< z@6Pj#T!u^o$VU<5vstR;?hG(hU3uoO0W*j3{`$=HWnA>$dm{2WqVKBcr&x+usYVCJ zt1V|RVFV@_#^HH9^<_{6fPQBg0KJuN{@-Lj|Dj_(rE+DG2%kr|7dQEbtDb*fKe=7F z;GJ!_V4janD7%v$la~|_STqU9Z&}3TuRjNX+&@es^myvbpzd@}34qr91WDQ?lC*?k z?iBc=S{L6j46W5aIpgPuq8|dQ5nT(iUDR}mU9-K*WayKRdT=SZn}Lsk?TBv2=$_m+_jQ$5%5EV-pwS`HlFiR!*UvOD|a~YaKjTV=7TgLhFw}HMb$Vx#zRPFbkA6F~@$}8OJuL=p)7^DOAMqGODwG6($Kf!78y>Qnmb|%wyh_=kc76LDc{Z(hl4VdLJ&%f5H%q`53D20&gnvF2a7`sH%2V z7u{S*fAW5F6ch7!8i325DwSvF)IM3%Mg%5I#g>H%zbURC`;Av}{WIw6F?bXgw!2MK z-%({`j+to$fN87)(NuK{pOH`DrkA$g6Zu4^Z=74bkf&4%H#1| z9+xBl^gGMYZYAHIC+xBOXv0`ZYOtT`MOFjrM4~MMWz~$0Qc2RfYb;Fb1wfwIUhik-@r)QQNdTOfwgJ(AbZHzx6pTfZ z&x1aMtL)Qy36~OFF^+A(CPX%)x(#Fxus=!KdyGVg_%?4I&rmWT0Z@LrR^6xDAQ%yA zGD#3!i*xtg%D^i7B)E<_eR=uY5Z)G%HNb~5q5X&+>;(hq)DiNk{B`E2z(yb&TF9c{0oY17i0sl5og`s2OO4k*rkViz&F$Ic!rgW&bU)6 ze+^?zXpNnwqL1Q=do1PRtA7#a$3Fq=QgkD*8eyXvJ!Gs&&S|6N!*)HMVdIiC0Bh_N zT!PO%z+x^Q=O5wx`0t3w8wy)cJpde2(Vj306C3zFzQsoMMALvKGdtly9hnca9@(;%E90J>MWeScyTTB=Boa#T5v&(uN$K~Mf#<}%210PuICn4#AB?c60W5OT>K<2Ss^}cV9$HPT5 zfD}qO-5ChB6XAMwM!%cobP!J!bscp;)~M)e8|cSk=@F$QNR&`TFn~|k@pu}MQvyI= zbHBR_z2wqGQisP;Hen{E<__Si0@ve$eG0&?TFCAvC|eP>s>s&XZRw+xIaQKCk=mSB z6jder&7JkWSC7Z@8BS{ePCLgO8PB0D_cC_rhD>O6Ch!1oFHY@0jI->o6J(88ecQzL zP?8E_gyiJH3RaXL5ym{~>+X3xjm{|n@bSO=KLMEe_|tTZJ4{;j8sIA+%W&5H)xd{{ zek>}xMA@6Jav Date: Wed, 6 Sep 2023 02:26:26 -0700 Subject: [PATCH 06/11] Disallow .rc/.res files unless the object format is coff --- src/main.zig | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/main.zig b/src/main.zig index 0f90e110e17e..f08e8f643334 100644 --- a/src/main.zig +++ b/src/main.zig @@ -927,6 +927,7 @@ fn buildOutputType( var symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{}; var c_source_files = std.ArrayList(Compilation.CSourceFile).init(arena); var rc_source_files = std.ArrayList(Compilation.RcSourceFile).init(arena); + var res_files = std.ArrayList(Compilation.LinkObject).init(arena); var link_objects = std.ArrayList(Compilation.LinkObject).init(arena); var framework_dirs = std.ArrayList([]const u8).init(arena); var frameworks: std.StringArrayHashMapUnmanaged(Framework) = .{}; @@ -1602,7 +1603,8 @@ fn buildOutputType( } } else switch (file_ext orelse Compilation.classifyFileExt(arg)) { - .object, .static_library, .shared_library, .res => try link_objects.append(.{ .path = arg }), + .object, .static_library, .shared_library => try link_objects.append(.{ .path = arg }), + .res => try res_files.append(.{ .path = arg }), .assembly, .assembly_with_cpp, .c, .cpp, .h, .ll, .bc, .m, .mm, .cu => { try c_source_files.append(.{ .src_path = arg, @@ -1702,7 +1704,11 @@ fn buildOutputType( .ext = file_ext, // duped while parsing the args. }); }, - .unknown, .shared_library, .object, .static_library, .res => try link_objects.append(.{ + .unknown, .shared_library, .object, .static_library => try link_objects.append(.{ + .path = it.only_arg, + .must_link = must_link, + }), + .res => try res_files.append(.{ .path = it.only_arg, .must_link = must_link, }), @@ -2473,6 +2479,12 @@ fn buildOutputType( } else if (emit_bin == .yes) { const basename = fs.path.basename(emit_bin.yes); break :blk basename[0 .. basename.len - fs.path.extension(basename).len]; + } else if (rc_source_files.items.len >= 1) { + const basename = fs.path.basename(rc_source_files.items[0].src_path); + break :blk basename[0 .. basename.len - fs.path.extension(basename).len]; + } else if (res_files.items.len >= 1) { + const basename = fs.path.basename(res_files.items[0].path); + break :blk basename[0 .. basename.len - fs.path.extension(basename).len]; } else if (show_builtin) { break :blk "builtin"; } else if (arg_mode == .run) { @@ -2551,6 +2563,21 @@ fn buildOutputType( link_libcpp = true; } + if (target_info.target.ofmt == .coff) { + // Now that we know the target supports resources, + // we can add the res files as link objects. + for (res_files.items) |res_file| { + try link_objects.append(res_file); + } + } else { + if (rc_source_files.items.len != 0) { + fatal("rc files are not allowed unless the target object format is coff (Windows/UEFI)", .{}); + } + if (res_files.items.len != 0) { + fatal("res files are not allowed unless the target object format is coff (Windows/UEFI)", .{}); + } + } + if (target_info.target.cpu.arch.isWasm()) blk: { if (single_threaded == null) { single_threaded = true; From a94d830a48ce82ea3fdf29c6c604f0972f095e61 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Wed, 6 Sep 2023 02:26:48 -0700 Subject: [PATCH 07/11] addWin32ResourceFile: Ignore the resource file if the target object format is not coff --- lib/std/Build/Step/Compile.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index f721b52c8283..8fcdd6ec0597 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -932,6 +932,10 @@ pub fn addCSourceFile(self: *Compile, source: CSourceFile) void { } pub fn addWin32ResourceFile(self: *Compile, source: RcSourceFile) void { + // Only the PE/COFF format has a Resource Table, so for any other target + // the resource file is just ignored. + if (self.target.getObjectFormat() != .coff) return; + const b = self.step.owner; const rc_source_file = b.allocator.create(RcSourceFile) catch @panic("OOM"); rc_source_file.* = source.dupe(b); From 4fac7a5263b9b14f63d2459f795ac9d2eee51c85 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Fri, 8 Sep 2023 21:26:54 -0700 Subject: [PATCH 08/11] Only populate rc_include_dirs if there are .rc files in the compilation --- src/Compilation.zig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index fdc154e0ebbe..17d298b66de1 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1001,13 +1001,15 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { options.libc_installation, ); - // .rc preprocessor needs to know the libc dirs even if we are not linking libc const rc_dirs = try detectLibCIncludeDirs( arena, options.zig_lib_directory.path.?, options.target, options.is_native_abi, - true, + // Set "link libc" to true here whenever there are rc files to compile, since + // the .rc preprocessor will need to know the libc include dirs even if we + // are not linking libc + options.rc_source_files.len > 0, options.libc_installation, ); From 0168ed7bf1c7fc5010fa82eaf33ed1b3af817709 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Mon, 11 Sep 2023 23:05:48 -0700 Subject: [PATCH 09/11] rc compilation: Use MSVC includes if present, fallback to mingw The include directories used when preprocessing .rc files are now separate from the target, and by default will use the system MSVC include paths if the MSVC + Windows SDK are present, otherwise it will fall back to the MinGW includes distributed with Zig. This default behavior can be overridden by the `-rcincludes` option (possible values: any (the default), msvc, gnu, or none). This behavior is useful because Windows resource files may `#include` files that only exist with in the MSVC include dirs (e.g. in `/atlmfc/include` which can contain other .rc files, images, icons, cursors, etc). So, by defaulting to the `any` behavior (MSVC if present, MinGW fallback), users will by default get behavior that is most-likely-to-work. It also should be okay that the include directories used when compiling .rc files differ from the include directories used when compiling the main binary, since the .res format is not dependent on anything ABI-related. The only relevant differences would be things like `#define` constants being different values in the MinGW headers vs the MSVC headers, but any such differences would likely be a MinGW bug. --- lib/std/Build/Step/Compile.zig | 13 +++++ src/Compilation.zig | 86 ++++++++++++++++++++++++++++++---- src/main.zig | 16 +++++++ 3 files changed, 106 insertions(+), 9 deletions(-) diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index 8fcdd6ec0597..a4f1a279ed69 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -90,6 +90,14 @@ is_linking_libc: bool, is_linking_libcpp: bool, vcpkg_bin_path: ?[]const u8 = null, +// keep in sync with src/Compilation.zig:RcIncludes +/// Behavior of automatic detection of include directories when compiling .rc files. +/// any: Use MSVC if available, fall back to MinGW. +/// msvc: Use MSVC include paths (must be present on the system). +/// gnu: Use MinGW include paths (distributed with Zig). +/// none: Do not use any autodetected include paths. +rc_includes: enum { any, msvc, gnu, none } = .any, + installed_path: ?[]const u8, /// Base address for an executable image. @@ -1949,6 +1957,11 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { } } + if (self.rc_includes != .any) { + try zig_args.append("-rcincludes"); + try zig_args.append(@tagName(self.rc_includes)); + } + try addFlag(&zig_args, "valgrind", self.valgrind_support); try addFlag(&zig_args, "each-lib-rpath", self.each_lib_rpath); diff --git a/src/Compilation.zig b/src/Compilation.zig index 17d298b66de1..6a078e8581a8 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -243,6 +243,17 @@ pub const RcSourceFile = struct { extra_flags: []const []const u8 = &.{}, }; +pub const RcIncludes = enum { + /// Use MSVC if available, fall back to MinGW. + any, + /// Use MSVC include paths (MSVC install + Windows SDK, must be present on the system). + msvc, + /// Use MinGW include paths (distributed with Zig). + gnu, + /// Do not use any autodetected include paths. + none, +}; + const Job = union(enum) { /// Write the constant value for a Decl to the output file. codegen_decl: Module.Decl.Index, @@ -568,6 +579,7 @@ pub const InitOptions = struct { symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{}, c_source_files: []const CSourceFile = &[0]CSourceFile{}, rc_source_files: []const RcSourceFile = &[0]RcSourceFile{}, + rc_includes: RcIncludes = .any, link_objects: []LinkObject = &[0]LinkObject{}, framework_dirs: []const []const u8 = &[0][]const u8{}, frameworks: []const Framework = &.{}, @@ -1001,16 +1013,9 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { options.libc_installation, ); - const rc_dirs = try detectLibCIncludeDirs( + const rc_dirs = try detectWin32ResourceIncludeDirs( arena, - options.zig_lib_directory.path.?, - options.target, - options.is_native_abi, - // Set "link libc" to true here whenever there are rc files to compile, since - // the .rc preprocessor will need to know the libc include dirs even if we - // are not linking libc - options.rc_source_files.len > 0, - options.libc_installation, + options, ); const sysroot = options.sysroot orelse libc_dirs.sysroot; @@ -2450,6 +2455,8 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes man.hash.addListOfBytes(key.src.extra_flags); } + man.hash.addListOfBytes(comp.rc_include_dir_list); + cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_asm); cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_ir); cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_bc); @@ -5156,6 +5163,67 @@ fn failCObjWithOwnedErrorMsg( return error.AnalysisFail; } +/// The include directories used when preprocessing .rc files are separate from the +/// target. Which include directories are used is determined by `options.rc_includes`. +/// +/// Note: It should be okay that the include directories used when compiling .rc +/// files differ from the include directories used when compiling the main +/// binary, since the .res format is not dependent on anything ABI-related. The +/// only relevant differences would be things like `#define` constants being +/// different in the MinGW headers vs the MSVC headers, but any such +/// differences would likely be a MinGW bug. +fn detectWin32ResourceIncludeDirs(arena: Allocator, options: InitOptions) !LibCDirs { + // Set the includes to .none here when there are no rc files to compile + var includes = if (options.rc_source_files.len > 0) options.rc_includes else .none; + if (builtin.target.os.tag != .windows) { + switch (includes) { + // MSVC can't be found when the host isn't Windows, so short-circuit. + .msvc => return error.WindowsSdkNotFound, + // Skip straight to gnu since we won't be able to detect MSVC on non-Windows hosts. + .any => includes = .gnu, + .none, .gnu => {}, + } + } + while (true) { + switch (includes) { + .any, .msvc => return detectLibCIncludeDirs( + arena, + options.zig_lib_directory.path.?, + .{ + .cpu = options.target.cpu, + .os = options.target.os, + .abi = .msvc, + .ofmt = options.target.ofmt, + }, + options.is_native_abi, + // The .rc preprocessor will need to know the libc include dirs even if we + // are not linking libc, so force 'link_libc' to true + true, + options.libc_installation, + ) catch |err| { + if (includes == .any) { + // fall back to mingw + includes = .gnu; + continue; + } + return err; + }, + .gnu => return detectLibCFromBuilding(arena, options.zig_lib_directory.path.?, .{ + .cpu = options.target.cpu, + .os = options.target.os, + .abi = .gnu, + .ofmt = options.target.ofmt, + }), + .none => return LibCDirs{ + .libc_include_dir_list = &[0][]u8{}, + .libc_installation = null, + .libc_framework_dir_list = &.{}, + .sysroot = null, + }, + } + } +} + fn failWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, comptime format: []const u8, args: anytype) SemaError { @setCold(true); var bundle: ErrorBundle.Wip = undefined; diff --git a/src/main.zig b/src/main.zig index f08e8f643334..2913ac2ea2a6 100644 --- a/src/main.zig +++ b/src/main.zig @@ -473,6 +473,11 @@ const usage_build_generic = \\ --libc [file] Provide a file which specifies libc paths \\ -cflags [flags] -- Set extra flags for the next positional C source files \\ -rcflags [flags] -- Set extra flags for the next positional .rc source files + \\ -rcincludes=[type] Set the type of includes to use when compiling .rc source files + \\ any (default) Use msvc if available, fall back to gnu + \\ msvc Use msvc include paths (must be present on the system) + \\ gnu Use mingw include paths (distributed with Zig) + \\ none Do not use any autodetected include paths \\ \\Link Options: \\ -l[lib], --library [lib] Link against system library (only if actually used) @@ -927,6 +932,7 @@ fn buildOutputType( var symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{}; var c_source_files = std.ArrayList(Compilation.CSourceFile).init(arena); var rc_source_files = std.ArrayList(Compilation.RcSourceFile).init(arena); + var rc_includes: Compilation.RcIncludes = .any; var res_files = std.ArrayList(Compilation.LinkObject).init(arena); var link_objects = std.ArrayList(Compilation.LinkObject).init(arena); var framework_dirs = std.ArrayList([]const u8).init(arena); @@ -1046,6 +1052,10 @@ fn buildOutputType( if (mem.eql(u8, next_arg, "--")) break; try extra_cflags.append(next_arg); } + } else if (mem.eql(u8, arg, "-rcincludes")) { + rc_includes = parseRcIncludes(args_iter.nextOrFatal()); + } else if (mem.startsWith(u8, arg, "-rcincludes=")) { + rc_includes = parseRcIncludes(arg["-rcincludes=".len..]); } else if (mem.eql(u8, arg, "-rcflags")) { extra_rcflags.shrinkRetainingCapacity(0); while (true) { @@ -3369,6 +3379,7 @@ fn buildOutputType( .symbol_wrap_set = symbol_wrap_set, .c_source_files = c_source_files.items, .rc_source_files = rc_source_files.items, + .rc_includes = rc_includes, .link_objects = link_objects.items, .framework_dirs = framework_dirs.items, .frameworks = resolved_frameworks.items, @@ -6532,3 +6543,8 @@ fn accessFrameworkPath( return false; } + +fn parseRcIncludes(arg: []const u8) Compilation.RcIncludes { + return std.meta.stringToEnum(Compilation.RcIncludes, arg) orelse + fatal("unsupported rc includes type: '{s}'", .{arg}); +} From 471f279cd621cef7c826fe27f1f8d0c585cc76e2 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Sun, 17 Sep 2023 13:09:16 -0700 Subject: [PATCH 10/11] Fix rc preprocessing when using the MinGW includes and targeting the GNU abi Also update the standalone test so that this failure would have been detected on any host system. --- src/Compilation.zig | 13 +++++++++++++ test/standalone/windows_resources/build.zig | 13 ++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 6a078e8581a8..a5012e71c459 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -4513,6 +4513,19 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32 "-fms-compatibility", // Allow things like "header.h" to be resolved relative to the 'root' .rc file, among other things "-DRC_INVOKED", // https://learn.microsoft.com/en-us/windows/win32/menurc/predefined-macros }); + // Using -fms-compatibility and targeting the gnu abi interact in a strange way: + // - Targeting the GNU abi stops _MSC_VER from being defined + // - Passing -fms-compatibility stops __GNUC__ from being defined + // Neither being defined is a problem for things like things like MinGW's + // vadefs.h, which will fail during preprocessing if neither are defined. + // So, when targeting the GNU abi, we need to force __GNUC__ to be defined. + // + // TODO: This is a workaround that should be removed if possible. + if (comp.getTarget().isGnu()) { + // This is the same default gnuc version that Clang uses: + // https://github.com/llvm/llvm-project/blob/4b5366c9512aa273a5272af1d833961e1ed156e7/clang/lib/Driver/ToolChains/Clang.cpp#L6738 + try argv.append("-fgnuc-version=4.2.1"); + } for (options.extra_include_paths.items) |extra_include_path| { try argv.append("--include-directory"); try argv.append(extra_include_path); diff --git a/test/standalone/windows_resources/build.zig b/test/standalone/windows_resources/build.zig index 4c2854ffc951..9476fa7839db 100644 --- a/test/standalone/windows_resources/build.zig +++ b/test/standalone/windows_resources/build.zig @@ -11,11 +11,14 @@ pub fn build(b: *std.Build) void { .abi = .gnu, }; - add(b, native_target, test_step); - add(b, cross_target, test_step); + add(b, native_target, .any, test_step); + add(b, cross_target, .any, test_step); + + add(b, native_target, .gnu, test_step); + add(b, cross_target, .gnu, test_step); } -fn add(b: *std.Build, target: std.zig.CrossTarget, test_step: *std.Build.Step) void { +fn add(b: *std.Build, target: std.zig.CrossTarget, rc_includes: enum { any, gnu }, test_step: *std.Build.Step) void { const exe = b.addExecutable(.{ .name = "zig_resource_test", .root_source_file = .{ .path = "main.zig" }, @@ -26,6 +29,10 @@ fn add(b: *std.Build, target: std.zig.CrossTarget, test_step: *std.Build.Step) v .file = .{ .path = "res/zig.rc" }, .flags = &.{"/c65001"}, // UTF-8 code page }); + exe.rc_includes = switch (rc_includes) { + .any => .any, + .gnu => .gnu, + }; _ = exe.getEmittedBin(); From 572956ce240478ed6bdc5d98237e25fea0bab3e5 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Thu, 21 Sep 2023 21:30:21 -0700 Subject: [PATCH 11/11] classifyFileExt: Use case-insensitive matching for .rc and .res --- src/Compilation.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index a5012e71c459..caff912f01ff 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -5573,9 +5573,9 @@ pub fn classifyFileExt(filename: []const u8) FileExt { return .cu; } else if (mem.endsWith(u8, filename, ".def")) { return .def; - } else if (mem.endsWith(u8, filename, ".rc")) { + } else if (std.ascii.endsWithIgnoreCase(filename, ".rc")) { return .rc; - } else if (mem.endsWith(u8, filename, ".res")) { + } else if (std.ascii.endsWithIgnoreCase(filename, ".res")) { return .res; } else { return .unknown;