diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index a3a6b61b611d..a4f1a279ed69 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -90,6 +90,14 @@ is_linking_libc: bool, is_linking_libcpp: bool, vcpkg_bin_path: ?[]const u8 = null, +// keep in sync with src/Compilation.zig:RcIncludes +/// Behavior of automatic detection of include directories when compiling .rc files. +/// any: Use MSVC if available, fall back to MinGW. +/// msvc: Use MSVC include paths (must be present on the system). +/// gnu: Use MinGW include paths (distributed with Zig). +/// none: Do not use any autodetected include paths. +rc_includes: enum { any, msvc, gnu, none } = .any, + installed_path: ?[]const u8, /// Base address for an executable image. @@ -221,6 +229,26 @@ pub const CSourceFile = struct { } }; +pub const RcSourceFile = struct { + file: LazyPath, + /// Any option that rc.exe accepts will work here, with the exception of: + /// - `/fo`: The output filename is set by the build system + /// - Any MUI-related option + /// https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line- + /// + /// Implicitly defined options: + /// /x (ignore the INCLUDE environment variable) + /// /D_DEBUG or /DNDEBUG depending on the optimization mode + flags: []const []const u8 = &.{}, + + pub fn dupe(self: RcSourceFile, b: *std.Build) RcSourceFile { + return .{ + .file = self.file.dupe(b), + .flags = b.dupeStrings(self.flags), + }; + } +}; + pub const LinkObject = union(enum) { static_path: LazyPath, other_step: *Compile, @@ -228,6 +256,7 @@ pub const LinkObject = union(enum) { assembly_file: LazyPath, c_source_file: *CSourceFile, c_source_files: *CSourceFiles, + win32_resource_file: *RcSourceFile, }; pub const SystemLib = struct { @@ -910,6 +939,18 @@ pub fn addCSourceFile(self: *Compile, source: CSourceFile) void { source.file.addStepDependencies(&self.step); } +pub fn addWin32ResourceFile(self: *Compile, source: RcSourceFile) void { + // Only the PE/COFF format has a Resource Table, so for any other target + // the resource file is just ignored. + if (self.target.getObjectFormat() != .coff) return; + + const b = self.step.owner; + const rc_source_file = b.allocator.create(RcSourceFile) catch @panic("OOM"); + rc_source_file.* = source.dupe(b); + self.link_objects.append(.{ .win32_resource_file = rc_source_file }) catch @panic("OOM"); + source.file.addStepDependencies(&self.step); +} + pub fn setVerboseLink(self: *Compile, value: bool) void { self.verbose_link = value; } @@ -1358,6 +1399,7 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { try transitive_deps.add(self.link_objects.items); var prev_has_cflags = false; + var prev_has_rcflags = false; var prev_search_strategy: SystemLib.SearchStrategy = .paths_first; var prev_preferred_link_mode: std.builtin.LinkMode = .Dynamic; @@ -1500,6 +1542,24 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { try zig_args.append(b.pathFromRoot(file)); } }, + + .win32_resource_file => |rc_source_file| { + if (rc_source_file.flags.len == 0) { + if (prev_has_rcflags) { + try zig_args.append("-rcflags"); + try zig_args.append("--"); + prev_has_rcflags = false; + } + } else { + try zig_args.append("-rcflags"); + for (rc_source_file.flags) |arg| { + try zig_args.append(arg); + } + try zig_args.append("--"); + prev_has_rcflags = true; + } + try zig_args.append(rc_source_file.file.getPath(b)); + }, } } @@ -1897,6 +1957,11 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { } } + if (self.rc_includes != .any) { + try zig_args.append("-rcincludes"); + try zig_args.append(@tagName(self.rc_includes)); + } + try addFlag(&zig_args, "valgrind", self.valgrind_support); try addFlag(&zig_args, "each-lib-rpath", self.each_lib_rpath); diff --git a/lib/std/zig/ErrorBundle.zig b/lib/std/zig/ErrorBundle.zig index 24d304543680..141cdb119531 100644 --- a/lib/std/zig/ErrorBundle.zig +++ b/lib/std/zig/ErrorBundle.zig @@ -421,7 +421,7 @@ pub const Wip = struct { _ = try addExtra(wip, rt); } - pub fn addBundle(wip: *Wip, other: ErrorBundle) !void { + pub fn addBundleAsNotes(wip: *Wip, other: ErrorBundle) !void { const gpa = wip.gpa; try wip.string_bytes.ensureUnusedCapacity(gpa, other.string_bytes.len); @@ -436,6 +436,21 @@ pub const Wip = struct { } } + pub fn addBundleAsRoots(wip: *Wip, other: ErrorBundle) !void { + const gpa = wip.gpa; + + try wip.string_bytes.ensureUnusedCapacity(gpa, other.string_bytes.len); + try wip.extra.ensureUnusedCapacity(gpa, other.extra.len); + + const other_list = other.getMessages(); + + try wip.root_list.ensureUnusedCapacity(gpa, other_list.len); + for (other_list) |other_msg| { + // The ensureUnusedCapacity calls above guarantees this. + wip.root_list.appendAssumeCapacity(wip.addOtherMessage(other, other_msg) catch unreachable); + } + } + pub fn reserveNotes(wip: *Wip, notes_len: u32) !u32 { try wip.extra.ensureUnusedCapacity(wip.gpa, notes_len + notes_len * @typeInfo(ErrorBundle.ErrorMessage).Struct.fields.len); @@ -474,7 +489,10 @@ pub const Wip = struct { .span_start = other_sl.span_start, .span_main = other_sl.span_main, .span_end = other_sl.span_end, - .source_line = try wip.addString(other.nullTerminatedString(other_sl.source_line)), + .source_line = if (other_sl.source_line != 0) + try wip.addString(other.nullTerminatedString(other_sl.source_line)) + else + 0, .reference_trace_len = other_sl.reference_trace_len, }); diff --git a/src/Compilation.zig b/src/Compilation.zig index 0150d615e37f..caff912f01ff 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -39,6 +39,7 @@ const libtsan = @import("libtsan.zig"); const Zir = @import("Zir.zig"); const Autodoc = @import("Autodoc.zig"); const Color = @import("main.zig").Color; +const resinator = @import("resinator.zig"); /// General-purpose allocator. Used for both temporary and long-term storage. gpa: Allocator, @@ -46,6 +47,7 @@ gpa: Allocator, arena_state: std.heap.ArenaAllocator.State, bin_file: *link.File, c_object_table: std.AutoArrayHashMapUnmanaged(*CObject, void) = .{}, +win32_resource_table: std.AutoArrayHashMapUnmanaged(*Win32Resource, void) = .{}, /// This is a pointer to a local variable inside `update()`. whole_cache_manifest: ?*Cache.Manifest = null, whole_cache_manifest_mutex: std.Thread.Mutex = .{}, @@ -60,6 +62,10 @@ anon_work_queue: std.fifo.LinearFifo(Job, .Dynamic), /// gets linked with the Compilation. c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic), +/// These jobs are to invoke the RC compiler to create a compiled resource file (.res), which +/// gets linked with the Compilation. +win32_resource_work_queue: std.fifo.LinearFifo(*Win32Resource, .Dynamic), + /// These jobs are to tokenize, parse, and astgen files, which may be outdated /// since the last compilation, as well as scan for `@import` and queue up /// additional jobs corresponding to those new files. @@ -73,6 +79,10 @@ embed_file_work_queue: std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic), /// This data is accessed by multiple threads and is protected by `mutex`. failed_c_objects: std.AutoArrayHashMapUnmanaged(*CObject, *CObject.ErrorMsg) = .{}, +/// The ErrorBundle memory is owned by the `Win32Resource`, using Compilation's general purpose allocator. +/// This data is accessed by multiple threads and is protected by `mutex`. +failed_win32_resources: std.AutoArrayHashMapUnmanaged(*Win32Resource, ErrorBundle) = .{}, + /// Miscellaneous things that can fail. misc_failures: std.AutoArrayHashMapUnmanaged(MiscTask, MiscError) = .{}, @@ -109,6 +119,7 @@ last_update_was_cache_hit: bool = false, c_source_files: []const CSourceFile, clang_argv: []const []const u8, +rc_source_files: []const RcSourceFile, cache_parent: *Cache, /// Path to own executable for invoking `zig clang`. self_exe_path: ?[]const u8, @@ -125,6 +136,7 @@ local_cache_directory: Directory, global_cache_directory: Directory, libc_include_dir_list: []const []const u8, libc_framework_dir_list: []const []const u8, +rc_include_dir_list: []const []const u8, thread_pool: *ThreadPool, /// Populated when we build the libc++ static library. A Job to build this is placed in the queue @@ -225,6 +237,23 @@ pub const CSourceFile = struct { ext: ?FileExt = null, }; +/// For passing to resinator. +pub const RcSourceFile = struct { + src_path: []const u8, + extra_flags: []const []const u8 = &.{}, +}; + +pub const RcIncludes = enum { + /// Use MSVC if available, fall back to MinGW. + any, + /// Use MSVC include paths (MSVC install + Windows SDK, must be present on the system). + msvc, + /// Use MinGW include paths (distributed with Zig). + gnu, + /// Do not use any autodetected include paths. + none, +}; + const Job = union(enum) { /// Write the constant value for a Decl to the output file. codegen_decl: Module.Decl.Index, @@ -326,6 +355,50 @@ pub const CObject = struct { } }; +pub const Win32Resource = struct { + /// Relative to cwd. Owned by arena. + src: RcSourceFile, + status: union(enum) { + new, + success: struct { + /// The outputted result. Owned by gpa. + res_path: []u8, + /// This is a file system lock on the cache hash manifest representing this + /// object. It prevents other invocations of the Zig compiler from interfering + /// with this object until released. + lock: Cache.Lock, + }, + /// There will be a corresponding ErrorMsg in Compilation.failed_win32_resources. + failure, + /// A transient failure happened when trying to compile the resource file; it may + /// succeed if we try again. There may be a corresponding ErrorMsg in + /// Compilation.failed_win32_resources. If there is not, the failure is out of memory. + failure_retryable, + }, + + /// Returns true if there was failure. + pub fn clearStatus(self: *Win32Resource, gpa: Allocator) bool { + switch (self.status) { + .new => return false, + .failure, .failure_retryable => { + self.status = .new; + return true; + }, + .success => |*success| { + gpa.free(success.res_path); + success.lock.release(); + self.status = .new; + return false; + }, + } + } + + pub fn destroy(self: *Win32Resource, gpa: Allocator) void { + _ = self.clearStatus(gpa); + gpa.destroy(self); + } +}; + pub const MiscTask = enum { write_builtin_zig, glibc_crt_file, @@ -505,6 +578,8 @@ pub const InitOptions = struct { rpath_list: []const []const u8 = &[0][]const u8{}, symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{}, c_source_files: []const CSourceFile = &[0]CSourceFile{}, + rc_source_files: []const RcSourceFile = &[0]RcSourceFile{}, + rc_includes: RcIncludes = .any, link_objects: []LinkObject = &[0]LinkObject{}, framework_dirs: []const []const u8 = &[0][]const u8{}, frameworks: []const Framework = &.{}, @@ -938,6 +1013,11 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { options.libc_installation, ); + const rc_dirs = try detectWin32ResourceIncludeDirs( + arena, + options, + ); + const sysroot = options.sysroot orelse libc_dirs.sysroot; const must_pie = target_util.requiresPIE(options.target); @@ -1591,16 +1671,19 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { .work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa), .anon_work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa), .c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa), + .win32_resource_work_queue = std.fifo.LinearFifo(*Win32Resource, .Dynamic).init(gpa), .astgen_work_queue = std.fifo.LinearFifo(*Module.File, .Dynamic).init(gpa), .embed_file_work_queue = std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic).init(gpa), .keep_source_files_loaded = options.keep_source_files_loaded, .use_clang = use_clang, .clang_argv = options.clang_argv, .c_source_files = options.c_source_files, + .rc_source_files = options.rc_source_files, .cache_parent = cache, .self_exe_path = options.self_exe_path, .libc_include_dir_list = libc_dirs.libc_include_dir_list, .libc_framework_dir_list = libc_dirs.libc_framework_dir_list, + .rc_include_dir_list = rc_dirs.libc_include_dir_list, .sanitize_c = sanitize_c, .thread_pool = options.thread_pool, .clang_passthrough_mode = options.clang_passthrough_mode, @@ -1647,6 +1730,19 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { comp.c_object_table.putAssumeCapacityNoClobber(c_object, {}); } + // Add a `Win32Resource` for each `rc_source_files`. + try comp.win32_resource_table.ensureTotalCapacity(gpa, options.rc_source_files.len); + for (options.rc_source_files) |rc_source_file| { + const win32_resource = try gpa.create(Win32Resource); + errdefer gpa.destroy(win32_resource); + + win32_resource.* = .{ + .status = .{ .new = {} }, + .src = rc_source_file, + }; + comp.win32_resource_table.putAssumeCapacityNoClobber(win32_resource, {}); + } + const have_bin_emit = comp.bin_file.options.emit != null or comp.whole_bin_sub_path != null; if (have_bin_emit and !comp.bin_file.options.skip_linker_dependencies and target.ofmt != .c) { @@ -1804,6 +1900,7 @@ pub fn destroy(self: *Compilation) void { self.work_queue.deinit(); self.anon_work_queue.deinit(); self.c_object_work_queue.deinit(); + self.win32_resource_work_queue.deinit(); self.astgen_work_queue.deinit(); self.embed_file_work_queue.deinit(); @@ -1852,6 +1949,16 @@ pub fn destroy(self: *Compilation) void { } self.failed_c_objects.deinit(gpa); + for (self.win32_resource_table.keys()) |key| { + key.destroy(gpa); + } + self.win32_resource_table.deinit(gpa); + + for (self.failed_win32_resources.values()) |*value| { + value.deinit(gpa); + } + self.failed_win32_resources.deinit(gpa); + for (self.lld_errors.items) |*lld_error| { lld_error.deinit(gpa); } @@ -2014,6 +2121,13 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void comp.c_object_work_queue.writeItemAssumeCapacity(key); } + // For compiling Win32 resources, we rely on the cache hash system to avoid duplicating work. + // Add a Job for each Win32 resource file. + try comp.win32_resource_work_queue.ensureUnusedCapacity(comp.win32_resource_table.count()); + for (comp.win32_resource_table.keys()) |key| { + comp.win32_resource_work_queue.writeItemAssumeCapacity(key); + } + if (comp.bin_file.options.module) |module| { module.compile_log_text.shrinkAndFree(module.gpa, 0); module.generation += 1; @@ -2336,6 +2450,13 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes man.hash.addListOfBytes(key.src.extra_flags); } + for (comp.win32_resource_table.keys()) |key| { + _ = try man.addFile(key.src.src_path, null); + man.hash.addListOfBytes(key.src.extra_flags); + } + + man.hash.addListOfBytes(comp.rc_include_dir_list); + cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_asm); cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_ir); cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_bc); @@ -2571,8 +2692,14 @@ pub fn makeBinFileWritable(self: *Compilation) !void { /// This function is temporally single-threaded. pub fn totalErrorCount(self: *Compilation) u32 { - var total: usize = self.failed_c_objects.count() + self.misc_failures.count() + - @intFromBool(self.alloc_failure_occurred) + self.lld_errors.items.len; + var total: usize = self.failed_c_objects.count() + + self.misc_failures.count() + + @intFromBool(self.alloc_failure_occurred) + + self.lld_errors.items.len; + + for (self.failed_win32_resources.values()) |errs| { + total += errs.errorMessageCount(); + } if (self.bin_file.options.module) |module| { total += module.failed_exports.count(); @@ -2664,6 +2791,13 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle { } } + { + var it = self.failed_win32_resources.iterator(); + while (it.next()) |entry| { + try bundle.addBundleAsRoots(entry.value_ptr.*); + } + } + for (self.lld_errors.items) |lld_error| { const notes_len = @as(u32, @intCast(lld_error.context_lines.len)); @@ -2683,7 +2817,7 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle { .msg = try bundle.addString(value.msg), .notes_len = if (value.children) |b| b.errorMessageCount() else 0, }); - if (value.children) |b| try bundle.addBundle(b); + if (value.children) |b| try bundle.addBundleAsNotes(b); } if (self.alloc_failure_occurred) { try bundle.addRootErrorMessage(.{ @@ -3082,6 +3216,9 @@ pub fn performAllTheWork( var c_obj_prog_node = main_progress_node.start("Compile C Objects", comp.c_source_files.len); defer c_obj_prog_node.end(); + var win32_resource_prog_node = main_progress_node.start("Compile Win32 Resources", comp.rc_source_files.len); + defer win32_resource_prog_node.end(); + var embed_file_prog_node = main_progress_node.start("Detect @embedFile updates", comp.embed_file_work_queue.count); defer embed_file_prog_node.end(); @@ -3130,6 +3267,13 @@ pub fn performAllTheWork( comp, c_object, &c_obj_prog_node, &comp.work_queue_wait_group, }); } + + while (comp.win32_resource_work_queue.readItem()) |win32_resource| { + comp.work_queue_wait_group.start(); + try comp.thread_pool.spawn(workerUpdateWin32Resource, .{ + comp, win32_resource, &win32_resource_prog_node, &comp.work_queue_wait_group, + }); + } } if (comp.bin_file.options.module) |mod| { @@ -3659,6 +3803,14 @@ pub fn obtainCObjectCacheManifest(comp: *const Compilation) Cache.Manifest { return man; } +pub fn obtainWin32ResourceCacheManifest(comp: *const Compilation) Cache.Manifest { + var man = comp.cache_parent.obtain(); + + man.hash.addListOfBytes(comp.rc_include_dir_list); + + return man; +} + test "cImport" { _ = cImport; } @@ -3832,6 +3984,26 @@ fn workerUpdateCObject( }; } +fn workerUpdateWin32Resource( + comp: *Compilation, + win32_resource: *Win32Resource, + progress_node: *std.Progress.Node, + wg: *WaitGroup, +) void { + defer wg.finish(); + + comp.updateWin32Resource(win32_resource, progress_node) catch |err| switch (err) { + error.AnalysisFail => return, + else => { + comp.reportRetryableWin32ResourceError(win32_resource, err) catch |oom| switch (oom) { + // Swallowing this error is OK because it's implied to be OOM when + // there is a missing failed_win32_resources error message. + error.OutOfMemory => {}, + }; + }, + }; +} + fn buildCompilerRtOneShot( comp: *Compilation, output_mode: std.builtin.OutputMode, @@ -3877,6 +4049,18 @@ fn reportRetryableCObjectError( } } +fn reportRetryableWin32ResourceError( + comp: *Compilation, + win32_resource: *Win32Resource, + err: anyerror, +) error{OutOfMemory}!void { + win32_resource.status = .failure_retryable; + + // TODO: something + _ = comp; + _ = @errorName(err); +} + fn reportRetryableAstGenError( comp: *Compilation, src: AstGenSrc, @@ -4233,6 +4417,311 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P }; } +fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32_resource_prog_node: *std.Progress.Node) !void { + if (!build_options.have_llvm) { + return comp.failWin32Resource(win32_resource, "clang not available: compiler built without LLVM extensions", .{}); + } + const self_exe_path = comp.self_exe_path orelse + return comp.failWin32Resource(win32_resource, "clang compilation disabled", .{}); + + const tracy_trace = trace(@src()); + defer tracy_trace.end(); + + log.debug("updating win32 resource: {s}", .{win32_resource.src.src_path}); + + if (win32_resource.clearStatus(comp.gpa)) { + // There was previous failure. + comp.mutex.lock(); + defer comp.mutex.unlock(); + // If the failure was OOM, there will not be an entry here, so we do + // not assert discard. + _ = comp.failed_win32_resources.swapRemove(win32_resource); + } + + var man = comp.obtainWin32ResourceCacheManifest(); + defer man.deinit(); + + _ = try man.addFile(win32_resource.src.src_path, null); + man.hash.addListOfBytes(win32_resource.src.extra_flags); + + var arena_allocator = std.heap.ArenaAllocator.init(comp.gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const rc_basename = std.fs.path.basename(win32_resource.src.src_path); + + win32_resource_prog_node.activate(); + var child_progress_node = win32_resource_prog_node.start(rc_basename, 0); + child_progress_node.activate(); + defer child_progress_node.end(); + + const rc_basename_noext = rc_basename[0 .. rc_basename.len - std.fs.path.extension(rc_basename).len]; + + const digest = if (try man.hit()) man.final() else blk: { + const rcpp_filename = try std.fmt.allocPrint(arena, "{s}.rcpp", .{rc_basename_noext}); + + const out_rcpp_path = try comp.tmpFilePath(arena, rcpp_filename); + var zig_cache_tmp_dir = try comp.local_cache_directory.handle.makeOpenPath("tmp", .{}); + defer zig_cache_tmp_dir.close(); + + const res_filename = try std.fmt.allocPrint(arena, "{s}.res", .{rc_basename_noext}); + + // We can't know the digest until we do the compilation, + // so we need a temporary filename. + const out_res_path = try comp.tmpFilePath(arena, res_filename); + + var options = options: { + var resinator_args = try std.ArrayListUnmanaged([]const u8).initCapacity(comp.gpa, win32_resource.src.extra_flags.len + 4); + defer resinator_args.deinit(comp.gpa); + + resinator_args.appendAssumeCapacity(""); // dummy 'process name' arg + resinator_args.appendSliceAssumeCapacity(win32_resource.src.extra_flags); + resinator_args.appendSliceAssumeCapacity(&.{ "--", out_rcpp_path, out_res_path }); + + var cli_diagnostics = resinator.cli.Diagnostics.init(comp.gpa); + defer cli_diagnostics.deinit(); + var options = resinator.cli.parse(comp.gpa, resinator_args.items, &cli_diagnostics) catch |err| switch (err) { + error.ParseError => { + return comp.failWin32ResourceCli(win32_resource, &cli_diagnostics); + }, + else => |e| return e, + }; + break :options options; + }; + defer options.deinit(); + + var argv = std.ArrayList([]const u8).init(comp.gpa); + defer argv.deinit(); + var temp_strings = std.ArrayList([]const u8).init(comp.gpa); + defer { + for (temp_strings.items) |temp_string| { + comp.gpa.free(temp_string); + } + temp_strings.deinit(); + } + + // TODO: support options.preprocess == .no and .only + // alternatively, error if those options are used + try argv.appendSlice(&[_][]const u8{ + self_exe_path, + "clang", + "-E", // preprocessor only + "--comments", + "-fuse-line-directives", // #line instead of # + "-xc", // output c + "-Werror=null-character", // error on null characters instead of converting them to spaces + "-fms-compatibility", // Allow things like "header.h" to be resolved relative to the 'root' .rc file, among other things + "-DRC_INVOKED", // https://learn.microsoft.com/en-us/windows/win32/menurc/predefined-macros + }); + // Using -fms-compatibility and targeting the gnu abi interact in a strange way: + // - Targeting the GNU abi stops _MSC_VER from being defined + // - Passing -fms-compatibility stops __GNUC__ from being defined + // Neither being defined is a problem for things like things like MinGW's + // vadefs.h, which will fail during preprocessing if neither are defined. + // So, when targeting the GNU abi, we need to force __GNUC__ to be defined. + // + // TODO: This is a workaround that should be removed if possible. + if (comp.getTarget().isGnu()) { + // This is the same default gnuc version that Clang uses: + // https://github.com/llvm/llvm-project/blob/4b5366c9512aa273a5272af1d833961e1ed156e7/clang/lib/Driver/ToolChains/Clang.cpp#L6738 + try argv.append("-fgnuc-version=4.2.1"); + } + for (options.extra_include_paths.items) |extra_include_path| { + try argv.append("--include-directory"); + try argv.append(extra_include_path); + } + var symbol_it = options.symbols.iterator(); + while (symbol_it.next()) |entry| { + switch (entry.value_ptr.*) { + .define => |value| { + try argv.append("-D"); + const define_arg = arg: { + const arg = try std.fmt.allocPrint(comp.gpa, "{s}={s}", .{ entry.key_ptr.*, value }); + errdefer comp.gpa.free(arg); + try temp_strings.append(arg); + break :arg arg; + }; + try argv.append(define_arg); + }, + .undefine => { + try argv.append("-U"); + try argv.append(entry.key_ptr.*); + }, + } + } + try argv.append(win32_resource.src.src_path); + try argv.appendSlice(&[_][]const u8{ + "-o", + out_rcpp_path, + }); + + const out_dep_path = try std.fmt.allocPrint(arena, "{s}.d", .{out_rcpp_path}); + // Note: addCCArgs will implicitly add _DEBUG/NDEBUG depending on the optimization + // mode. While these defines are not normally present when calling rc.exe directly, + // them being defined matches the behavior of how MSVC calls rc.exe which is the more + // relevant behavior in this case. + try comp.addCCArgs(arena, &argv, .rc, out_dep_path); + + if (comp.verbose_cc) { + dump_argv(argv.items); + } + + if (std.process.can_spawn) { + var child = std.ChildProcess.init(argv.items, arena); + child.stdin_behavior = .Ignore; + child.stdout_behavior = .Ignore; + child.stderr_behavior = .Pipe; + + try child.spawn(); + + const stderr_reader = child.stderr.?.reader(); + + const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024); + + const term = child.wait() catch |err| { + return comp.failWin32Resource(win32_resource, "unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) }); + }; + + switch (term) { + .Exited => |code| { + if (code != 0) { + // TODO parse clang stderr and turn it into an error message + // and then call failCObjWithOwnedErrorMsg + log.err("clang preprocessor failed with stderr:\n{s}", .{stderr}); + return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{code}); + } + }, + else => { + log.err("clang preprocessor terminated with stderr:\n{s}", .{stderr}); + return comp.failWin32Resource(win32_resource, "clang preprocessor terminated unexpectedly", .{}); + }, + } + } else { + const exit_code = try clangMain(arena, argv.items); + if (exit_code != 0) { + return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{exit_code}); + } + } + + const dep_basename = std.fs.path.basename(out_dep_path); + // Add the files depended on to the cache system. + try man.addDepFilePost(zig_cache_tmp_dir, dep_basename); + if (comp.whole_cache_manifest) |whole_cache_manifest| { + comp.whole_cache_manifest_mutex.lock(); + defer comp.whole_cache_manifest_mutex.unlock(); + try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename); + } + // Just to save disk space, we delete the file because it is never needed again. + zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| { + log.warn("failed to delete '{s}': {s}", .{ out_dep_path, @errorName(err) }); + }; + + var full_input = std.fs.cwd().readFileAlloc(arena, out_rcpp_path, std.math.maxInt(usize)) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => |e| { + return comp.failWin32Resource(win32_resource, "failed to read preprocessed file '{s}': {s}", .{ out_rcpp_path, @errorName(e) }); + }, + }; + + var mapping_results = try resinator.source_mapping.parseAndRemoveLineCommands(arena, full_input, full_input, .{ .initial_filename = win32_resource.src.src_path }); + defer mapping_results.mappings.deinit(arena); + + var final_input = resinator.comments.removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings); + + var output_file = zig_cache_tmp_dir.createFile(out_res_path, .{}) catch |err| { + return comp.failWin32Resource(win32_resource, "failed to create output file '{s}': {s}", .{ out_res_path, @errorName(err) }); + }; + var output_file_closed = false; + defer if (!output_file_closed) output_file.close(); + + var diagnostics = resinator.errors.Diagnostics.init(arena); + defer diagnostics.deinit(); + + var dependencies_list = std.ArrayList([]const u8).init(comp.gpa); + defer { + for (dependencies_list.items) |item| { + comp.gpa.free(item); + } + dependencies_list.deinit(); + } + + var output_buffered_stream = std.io.bufferedWriter(output_file.writer()); + + resinator.compile.compile(arena, final_input, output_buffered_stream.writer(), .{ + .cwd = std.fs.cwd(), + .diagnostics = &diagnostics, + .source_mappings = &mapping_results.mappings, + .dependencies_list = &dependencies_list, + .system_include_paths = comp.rc_include_dir_list, + .ignore_include_env_var = true, + // options + .extra_include_paths = options.extra_include_paths.items, + .default_language_id = options.default_language_id, + .default_code_page = options.default_code_page orelse .windows1252, + .verbose = options.verbose, + .null_terminate_string_table_strings = options.null_terminate_string_table_strings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + .silent_duplicate_control_ids = options.silent_duplicate_control_ids, + .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, + }) catch |err| switch (err) { + error.ParseError, error.CompileError => { + // Delete the output file on error + output_file.close(); + output_file_closed = true; + // Failing to delete is not really a big deal, so swallow any errors + zig_cache_tmp_dir.deleteFile(out_res_path) catch { + log.warn("failed to delete '{s}': {s}", .{ out_res_path, @errorName(err) }); + }; + return comp.failWin32ResourceCompile(win32_resource, final_input, &diagnostics, mapping_results.mappings); + }, + else => |e| return e, + }; + + try output_buffered_stream.flush(); + + for (dependencies_list.items) |dep_file_path| { + try man.addFilePost(dep_file_path); + if (comp.whole_cache_manifest) |whole_cache_manifest| { + comp.whole_cache_manifest_mutex.lock(); + defer comp.whole_cache_manifest_mutex.unlock(); + try whole_cache_manifest.addFilePost(dep_file_path); + } + } + + // Rename into place. + const digest = man.final(); + const o_sub_path = try std.fs.path.join(arena, &[_][]const u8{ "o", &digest }); + var o_dir = try comp.local_cache_directory.handle.makeOpenPath(o_sub_path, .{}); + defer o_dir.close(); + const tmp_basename = std.fs.path.basename(out_res_path); + try std.fs.rename(zig_cache_tmp_dir, tmp_basename, o_dir, res_filename); + const tmp_rcpp_basename = std.fs.path.basename(out_rcpp_path); + try std.fs.rename(zig_cache_tmp_dir, tmp_rcpp_basename, o_dir, rcpp_filename); + break :blk digest; + }; + + if (man.have_exclusive_lock) { + // Write the updated manifest. This is a no-op if the manifest is not dirty. Note that it is + // possible we had a hit and the manifest is dirty, for example if the file mtime changed but + // the contents were the same, we hit the cache but the manifest is dirty and we need to update + // it to prevent doing a full file content comparison the next time around. + man.writeManifest() catch |err| { + log.warn("failed to write cache manifest when compiling '{s}': {s}", .{ win32_resource.src.src_path, @errorName(err) }); + }; + } + + const res_basename = try std.fmt.allocPrint(arena, "{s}.res", .{rc_basename_noext}); + + win32_resource.status = .{ + .success = .{ + .res_path = try comp.local_cache_directory.join(comp.gpa, &[_][]const u8{ + "o", &digest, res_basename, + }), + .lock = man.toOwnedLock(), + }, + }; +} + pub fn tmpFilePath(comp: *Compilation, ally: Allocator, suffix: []const u8) error{OutOfMemory}![]const u8 { const s = std.fs.path.sep_str; const rand_int = std.crypto.random.int(u64); @@ -4347,7 +4836,7 @@ pub fn addCCArgs( try argv.appendSlice(&[_][]const u8{ "-target", llvm_triple }); switch (ext) { - .c, .cpp, .m, .mm, .h, .cu => { + .c, .cpp, .m, .mm, .h, .cu, .rc => { try argv.appendSlice(&[_][]const u8{ "-nostdinc", "-fno-spell-checking", @@ -4375,9 +4864,16 @@ pub fn addCCArgs( try argv.append("-isystem"); try argv.append(c_headers_dir); - for (comp.libc_include_dir_list) |include_dir| { - try argv.append("-isystem"); - try argv.append(include_dir); + if (ext == .rc) { + for (comp.rc_include_dir_list) |include_dir| { + try argv.append("-isystem"); + try argv.append(include_dir); + } + } else { + for (comp.libc_include_dir_list) |include_dir| { + try argv.append("-isystem"); + try argv.append(include_dir); + } } if (target.cpu.model.llvm_name) |llvm_name| { @@ -4680,6 +5176,253 @@ fn failCObjWithOwnedErrorMsg( return error.AnalysisFail; } +/// The include directories used when preprocessing .rc files are separate from the +/// target. Which include directories are used is determined by `options.rc_includes`. +/// +/// Note: It should be okay that the include directories used when compiling .rc +/// files differ from the include directories used when compiling the main +/// binary, since the .res format is not dependent on anything ABI-related. The +/// only relevant differences would be things like `#define` constants being +/// different in the MinGW headers vs the MSVC headers, but any such +/// differences would likely be a MinGW bug. +fn detectWin32ResourceIncludeDirs(arena: Allocator, options: InitOptions) !LibCDirs { + // Set the includes to .none here when there are no rc files to compile + var includes = if (options.rc_source_files.len > 0) options.rc_includes else .none; + if (builtin.target.os.tag != .windows) { + switch (includes) { + // MSVC can't be found when the host isn't Windows, so short-circuit. + .msvc => return error.WindowsSdkNotFound, + // Skip straight to gnu since we won't be able to detect MSVC on non-Windows hosts. + .any => includes = .gnu, + .none, .gnu => {}, + } + } + while (true) { + switch (includes) { + .any, .msvc => return detectLibCIncludeDirs( + arena, + options.zig_lib_directory.path.?, + .{ + .cpu = options.target.cpu, + .os = options.target.os, + .abi = .msvc, + .ofmt = options.target.ofmt, + }, + options.is_native_abi, + // The .rc preprocessor will need to know the libc include dirs even if we + // are not linking libc, so force 'link_libc' to true + true, + options.libc_installation, + ) catch |err| { + if (includes == .any) { + // fall back to mingw + includes = .gnu; + continue; + } + return err; + }, + .gnu => return detectLibCFromBuilding(arena, options.zig_lib_directory.path.?, .{ + .cpu = options.target.cpu, + .os = options.target.os, + .abi = .gnu, + .ofmt = options.target.ofmt, + }), + .none => return LibCDirs{ + .libc_include_dir_list = &[0][]u8{}, + .libc_installation = null, + .libc_framework_dir_list = &.{}, + .sysroot = null, + }, + } + } +} + +fn failWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, comptime format: []const u8, args: anytype) SemaError { + @setCold(true); + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(comp.gpa); + errdefer bundle.deinit(); + try bundle.addRootErrorMessage(.{ + .msg = try bundle.printString(format, args), + .src_loc = try bundle.addSourceLocation(.{ + .src_path = try bundle.addString(win32_resource.src.src_path), + .line = 0, + .column = 0, + .span_start = 0, + .span_main = 0, + .span_end = 0, + }), + }); + const finished_bundle = try bundle.toOwnedBundle(""); + return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle); +} + +fn failWin32ResourceWithOwnedBundle( + comp: *Compilation, + win32_resource: *Win32Resource, + err_bundle: ErrorBundle, +) SemaError { + @setCold(true); + { + comp.mutex.lock(); + defer comp.mutex.unlock(); + try comp.failed_win32_resources.putNoClobber(comp.gpa, win32_resource, err_bundle); + } + win32_resource.status = .failure; + return error.AnalysisFail; +} + +fn failWin32ResourceCli( + comp: *Compilation, + win32_resource: *Win32Resource, + diagnostics: *resinator.cli.Diagnostics, +) SemaError { + @setCold(true); + + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(comp.gpa); + errdefer bundle.deinit(); + + try bundle.addRootErrorMessage(.{ + .msg = try bundle.addString("invalid command line option(s)"), + .src_loc = try bundle.addSourceLocation(.{ + .src_path = try bundle.addString(win32_resource.src.src_path), + .line = 0, + .column = 0, + .span_start = 0, + .span_main = 0, + .span_end = 0, + }), + }); + + var cur_err: ?ErrorBundle.ErrorMessage = null; + var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; + defer cur_notes.deinit(comp.gpa); + for (diagnostics.errors.items) |err_details| { + switch (err_details.type) { + .err => { + if (cur_err) |err| { + try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); + } + cur_err = .{ + .msg = try bundle.addString(err_details.msg.items), + }; + cur_notes.clearRetainingCapacity(); + }, + .warning => cur_err = null, + .note => { + if (cur_err == null) continue; + cur_err.?.notes_len += 1; + try cur_notes.append(comp.gpa, .{ + .msg = try bundle.addString(err_details.msg.items), + }); + }, + } + } + if (cur_err) |err| { + try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); + } + + const finished_bundle = try bundle.toOwnedBundle(""); + return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle); +} + +fn failWin32ResourceCompile( + comp: *Compilation, + win32_resource: *Win32Resource, + source: []const u8, + diagnostics: *resinator.errors.Diagnostics, + mappings: resinator.source_mapping.SourceMappings, +) SemaError { + @setCold(true); + + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(comp.gpa); + errdefer bundle.deinit(); + + var msg_buf: std.ArrayListUnmanaged(u8) = .{}; + defer msg_buf.deinit(comp.gpa); + var cur_err: ?ErrorBundle.ErrorMessage = null; + var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; + defer cur_notes.deinit(comp.gpa); + for (diagnostics.errors.items) |err_details| { + switch (err_details.type) { + .hint => continue, + // Clear the current error so that notes don't bleed into unassociated errors + .warning => { + cur_err = null; + continue; + }, + .note => if (cur_err == null) continue, + .err => {}, + } + const corresponding_span = mappings.get(err_details.token.line_number); + const corresponding_file = mappings.files.get(corresponding_span.filename_offset); + + const source_line_start = err_details.token.getLineStart(source); + const column = err_details.token.calculateColumn(source, 1, source_line_start); + const err_line = corresponding_span.start_line; + + msg_buf.clearRetainingCapacity(); + try err_details.render(msg_buf.writer(comp.gpa), source, diagnostics.strings.items); + + const src_loc = src_loc: { + var src_loc: ErrorBundle.SourceLocation = .{ + .src_path = try bundle.addString(corresponding_file), + .line = @intCast(err_line - 1), // 1-based -> 0-based + .column = @intCast(column), + .span_start = 0, + .span_main = 0, + .span_end = 0, + }; + if (err_details.print_source_line) { + const source_line = err_details.token.getLine(source, source_line_start); + const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len); + src_loc.span_start = @intCast(visual_info.point_offset - visual_info.before_len); + src_loc.span_main = @intCast(visual_info.point_offset); + src_loc.span_end = @intCast(visual_info.point_offset + 1 + visual_info.after_len); + src_loc.source_line = try bundle.addString(source_line); + } + break :src_loc try bundle.addSourceLocation(src_loc); + }; + + switch (err_details.type) { + .err => { + if (cur_err) |err| { + try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); + } + cur_err = .{ + .msg = try bundle.addString(msg_buf.items), + .src_loc = src_loc, + }; + cur_notes.clearRetainingCapacity(); + }, + .note => { + cur_err.?.notes_len += 1; + try cur_notes.append(comp.gpa, .{ + .msg = try bundle.addString(msg_buf.items), + .src_loc = src_loc, + }); + }, + .warning, .hint => unreachable, + } + } + if (cur_err) |err| { + try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); + } + + const finished_bundle = try bundle.toOwnedBundle(""); + return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle); +} + +fn win32ResourceFlushErrorMessage(wip: *ErrorBundle.Wip, msg: ErrorBundle.ErrorMessage, notes: []const ErrorBundle.ErrorMessage) !void { + try wip.addRootErrorMessage(msg); + const notes_start = try wip.reserveNotes(@intCast(notes.len)); + for (notes_start.., notes) |i, note| { + wip.extra.items[i] = @intFromEnum(wip.addErrorMessageAssumeCapacity(note)); + } +} + pub const FileExt = enum { c, cpp, @@ -4696,6 +5439,7 @@ pub const FileExt = enum { static_library, zig, def, + rc, res, unknown, @@ -4712,6 +5456,7 @@ pub const FileExt = enum { .static_library, .zig, .def, + .rc, .res, .unknown, => false, @@ -4735,6 +5480,7 @@ pub const FileExt = enum { .static_library => target.staticLibSuffix(), .zig => ".zig", .def => ".def", + .rc => ".rc", .res => ".res", .unknown => "", }; @@ -4827,7 +5573,9 @@ pub fn classifyFileExt(filename: []const u8) FileExt { return .cu; } else if (mem.endsWith(u8, filename, ".def")) { return .def; - } else if (mem.endsWith(u8, filename, ".res")) { + } else if (std.ascii.endsWithIgnoreCase(filename, ".rc")) { + return .rc; + } else if (std.ascii.endsWithIgnoreCase(filename, ".res")) { return .res; } else { return .unknown; @@ -4971,6 +5719,13 @@ fn detectLibCFromLibCInstallation(arena: Allocator, target: Target, lci: *const if (!is_redundant) list.appendAssumeCapacity(lci.sys_include_dir.?); if (target.os.tag == .windows) { + if (std.fs.path.dirname(lci.sys_include_dir.?)) |sys_include_dir_parent| { + // This include path will only exist when the optional "Desktop development with C++" + // is installed. It contains headers, .rc files, and resources. It is especially + // necessary when working with Windows resources. + const atlmfc_dir = try std.fs.path.join(arena, &[_][]const u8{ sys_include_dir_parent, "atlmfc", "include" }); + list.appendAssumeCapacity(atlmfc_dir); + } if (std.fs.path.dirname(lci.include_dir.?)) |include_dir_parent| { const um_dir = try std.fs.path.join(arena, &[_][]const u8{ include_dir_parent, "um" }); list.appendAssumeCapacity(um_dir); diff --git a/src/link.zig b/src/link.zig index 4db946658a8d..a1c816550c2d 100644 --- a/src/link.zig +++ b/src/link.zig @@ -1027,6 +1027,9 @@ pub const File = struct { for (comp.c_object_table.keys()) |key| { _ = try man.addFile(key.status.success.object_path, null); } + for (comp.win32_resource_table.keys()) |key| { + _ = try man.addFile(key.status.success.res_path, null); + } try man.addOptionalFile(module_obj_path); try man.addOptionalFile(compiler_rt_path); @@ -1056,7 +1059,7 @@ pub const File = struct { }; } - const num_object_files = base.options.objects.len + comp.c_object_table.count() + 2; + const num_object_files = base.options.objects.len + comp.c_object_table.count() + comp.win32_resource_table.count() + 2; var object_files = try std.ArrayList([*:0]const u8).initCapacity(base.allocator, num_object_files); defer object_files.deinit(); @@ -1066,6 +1069,9 @@ pub const File = struct { for (comp.c_object_table.keys()) |key| { object_files.appendAssumeCapacity(try arena.dupeZ(u8, key.status.success.object_path)); } + for (comp.win32_resource_table.keys()) |key| { + object_files.appendAssumeCapacity(try arena.dupeZ(u8, key.status.success.res_path)); + } if (module_obj_path) |p| { object_files.appendAssumeCapacity(try arena.dupeZ(u8, p)); } diff --git a/src/link/Coff/lld.zig b/src/link/Coff/lld.zig index c0f88704e5c5..38385ceedf35 100644 --- a/src/link/Coff/lld.zig +++ b/src/link/Coff/lld.zig @@ -72,6 +72,9 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod for (comp.c_object_table.keys()) |key| { _ = try man.addFile(key.status.success.object_path, null); } + for (comp.win32_resource_table.keys()) |key| { + _ = try man.addFile(key.status.success.res_path, null); + } try man.addOptionalFile(module_obj_path); man.hash.addOptionalBytes(self.base.options.entry); man.hash.addOptional(self.base.options.stack_size_override); @@ -268,6 +271,10 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod try argv.append(key.status.success.object_path); } + for (comp.win32_resource_table.keys()) |key| { + try argv.append(key.status.success.res_path); + } + if (module_obj_path) |p| { try argv.append(p); } diff --git a/src/main.zig b/src/main.zig index 6e7b330c407b..2913ac2ea2a6 100644 --- a/src/main.zig +++ b/src/main.zig @@ -472,6 +472,12 @@ const usage_build_generic = \\ -D[macro]=[value] Define C [macro] to [value] (1 if [value] omitted) \\ --libc [file] Provide a file which specifies libc paths \\ -cflags [flags] -- Set extra flags for the next positional C source files + \\ -rcflags [flags] -- Set extra flags for the next positional .rc source files + \\ -rcincludes=[type] Set the type of includes to use when compiling .rc source files + \\ any (default) Use msvc if available, fall back to gnu + \\ msvc Use msvc include paths (must be present on the system) + \\ gnu Use mingw include paths (distributed with Zig) + \\ none Do not use any autodetected include paths \\ \\Link Options: \\ -l[lib], --library [lib] Link against system library (only if actually used) @@ -919,11 +925,15 @@ fn buildOutputType( var wasi_emulated_libs = std.ArrayList(wasi_libc.CRTFile).init(arena); var clang_argv = std.ArrayList([]const u8).init(arena); var extra_cflags = std.ArrayList([]const u8).init(arena); + var extra_rcflags = std.ArrayList([]const u8).init(arena); // These are before resolving sysroot. var lib_dir_args = std.ArrayList([]const u8).init(arena); var rpath_list = std.ArrayList([]const u8).init(arena); var symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{}; var c_source_files = std.ArrayList(Compilation.CSourceFile).init(arena); + var rc_source_files = std.ArrayList(Compilation.RcSourceFile).init(arena); + var rc_includes: Compilation.RcIncludes = .any; + var res_files = std.ArrayList(Compilation.LinkObject).init(arena); var link_objects = std.ArrayList(Compilation.LinkObject).init(arena); var framework_dirs = std.ArrayList([]const u8).init(arena); var frameworks: std.StringArrayHashMapUnmanaged(Framework) = .{}; @@ -1042,6 +1052,19 @@ fn buildOutputType( if (mem.eql(u8, next_arg, "--")) break; try extra_cflags.append(next_arg); } + } else if (mem.eql(u8, arg, "-rcincludes")) { + rc_includes = parseRcIncludes(args_iter.nextOrFatal()); + } else if (mem.startsWith(u8, arg, "-rcincludes=")) { + rc_includes = parseRcIncludes(arg["-rcincludes=".len..]); + } else if (mem.eql(u8, arg, "-rcflags")) { + extra_rcflags.shrinkRetainingCapacity(0); + while (true) { + const next_arg = args_iter.next() orelse { + fatal("expected -- after -rcflags", .{}); + }; + if (mem.eql(u8, next_arg, "--")) break; + try extra_rcflags.append(next_arg); + } } else if (mem.eql(u8, arg, "--color")) { const next_arg = args_iter.next() orelse { fatal("expected [auto|on|off] after --color", .{}); @@ -1590,7 +1613,8 @@ fn buildOutputType( } } else switch (file_ext orelse Compilation.classifyFileExt(arg)) { - .object, .static_library, .shared_library, .res => try link_objects.append(.{ .path = arg }), + .object, .static_library, .shared_library => try link_objects.append(.{ .path = arg }), + .res => try res_files.append(.{ .path = arg }), .assembly, .assembly_with_cpp, .c, .cpp, .h, .ll, .bc, .m, .mm, .cu => { try c_source_files.append(.{ .src_path = arg, @@ -1599,6 +1623,12 @@ fn buildOutputType( .ext = file_ext, }); }, + .rc => { + try rc_source_files.append(.{ + .src_path = arg, + .extra_flags = try arena.dupe([]const u8, extra_rcflags.items), + }); + }, .zig => { if (root_src_file) |other| { fatal("found another zig file '{s}' after root source file '{s}'", .{ arg, other }); @@ -1684,13 +1714,20 @@ fn buildOutputType( .ext = file_ext, // duped while parsing the args. }); }, - .unknown, .shared_library, .object, .static_library, .res => try link_objects.append(.{ + .unknown, .shared_library, .object, .static_library => try link_objects.append(.{ + .path = it.only_arg, + .must_link = must_link, + }), + .res => try res_files.append(.{ .path = it.only_arg, .must_link = must_link, }), .def => { linker_module_definition_file = it.only_arg; }, + .rc => { + try rc_source_files.append(.{ .src_path = it.only_arg }); + }, .zig => { if (root_src_file) |other| { fatal("found another zig file '{s}' after root source file '{s}'", .{ it.only_arg, other }); @@ -2452,6 +2489,12 @@ fn buildOutputType( } else if (emit_bin == .yes) { const basename = fs.path.basename(emit_bin.yes); break :blk basename[0 .. basename.len - fs.path.extension(basename).len]; + } else if (rc_source_files.items.len >= 1) { + const basename = fs.path.basename(rc_source_files.items[0].src_path); + break :blk basename[0 .. basename.len - fs.path.extension(basename).len]; + } else if (res_files.items.len >= 1) { + const basename = fs.path.basename(res_files.items[0].path); + break :blk basename[0 .. basename.len - fs.path.extension(basename).len]; } else if (show_builtin) { break :blk "builtin"; } else if (arg_mode == .run) { @@ -2530,6 +2573,21 @@ fn buildOutputType( link_libcpp = true; } + if (target_info.target.ofmt == .coff) { + // Now that we know the target supports resources, + // we can add the res files as link objects. + for (res_files.items) |res_file| { + try link_objects.append(res_file); + } + } else { + if (rc_source_files.items.len != 0) { + fatal("rc files are not allowed unless the target object format is coff (Windows/UEFI)", .{}); + } + if (res_files.items.len != 0) { + fatal("res files are not allowed unless the target object format is coff (Windows/UEFI)", .{}); + } + } + if (target_info.target.cpu.arch.isWasm()) blk: { if (single_threaded == null) { single_threaded = true; @@ -2933,6 +2991,7 @@ fn buildOutputType( if (output_mode == .Obj and (object_format == .coff or object_format == .macho)) { const total_obj_count = c_source_files.items.len + @intFromBool(root_src_file != null) + + rc_source_files.items.len + link_objects.items.len; if (total_obj_count > 1) { fatal("{s} does not support linking multiple objects into one", .{@tagName(object_format)}); @@ -3319,6 +3378,8 @@ fn buildOutputType( .rpath_list = rpath_list.items, .symbol_wrap_set = symbol_wrap_set, .c_source_files = c_source_files.items, + .rc_source_files = rc_source_files.items, + .rc_includes = rc_includes, .link_objects = link_objects.items, .framework_dirs = framework_dirs.items, .frameworks = resolved_frameworks.items, @@ -6482,3 +6543,8 @@ fn accessFrameworkPath( return false; } + +fn parseRcIncludes(arg: []const u8) Compilation.RcIncludes { + return std.meta.stringToEnum(Compilation.RcIncludes, arg) orelse + fatal("unsupported rc includes type: '{s}'", .{arg}); +} diff --git a/src/resinator.zig b/src/resinator.zig new file mode 100644 index 000000000000..3287641a7de0 --- /dev/null +++ b/src/resinator.zig @@ -0,0 +1,18 @@ +pub const ani = @import("resinator/ani.zig"); +pub const ast = @import("resinator/ast.zig"); +pub const bmp = @import("resinator/bmp.zig"); +pub const cli = @import("resinator/cli.zig"); +pub const code_pages = @import("resinator/code_pages.zig"); +pub const comments = @import("resinator/comments.zig"); +pub const compile = @import("resinator/compile.zig"); +pub const errors = @import("resinator/errors.zig"); +pub const ico = @import("resinator/ico.zig"); +pub const lang = @import("resinator/lang.zig"); +pub const lex = @import("resinator/lex.zig"); +pub const literals = @import("resinator/literals.zig"); +pub const parse = @import("resinator/parse.zig"); +pub const rc = @import("resinator/rc.zig"); +pub const res = @import("resinator/res.zig"); +pub const source_mapping = @import("resinator/source_mapping.zig"); +pub const utils = @import("resinator/utils.zig"); +pub const windows1252 = @import("resinator/windows1252.zig"); diff --git a/src/resinator/ani.zig b/src/resinator/ani.zig new file mode 100644 index 000000000000..7b8b05564fba --- /dev/null +++ b/src/resinator/ani.zig @@ -0,0 +1,58 @@ +//! https://en.wikipedia.org/wiki/Resource_Interchange_File_Format +//! https://www.moon-soft.com/program/format/windows/ani.htm +//! https://www.gdgsoft.com/anituner/help/aniformat.htm +//! https://www.lomont.org/software/aniexploit/ExploitANI.pdf +//! +//! RIFF( 'ACON' +//! [LIST( 'INFO' )] +//! [] +//! anih( ) +//! [rate( )] +//! ['seq '( )] +//! LIST( 'fram' icon( ) ... ) +//! ) + +const std = @import("std"); + +const AF_ICON: u32 = 1; + +pub fn isAnimatedIcon(reader: anytype) bool { + const flags = getAniheaderFlags(reader) catch return false; + return flags & AF_ICON == AF_ICON; +} + +fn getAniheaderFlags(reader: anytype) !u32 { + const riff_header = try reader.readBytesNoEof(4); + if (!std.mem.eql(u8, &riff_header, "RIFF")) return error.InvalidFormat; + + _ = try reader.readIntLittle(u32); // size of RIFF chunk + + const form_type = try reader.readBytesNoEof(4); + if (!std.mem.eql(u8, &form_type, "ACON")) return error.InvalidFormat; + + while (true) { + const chunk_id = try reader.readBytesNoEof(4); + const chunk_len = try reader.readIntLittle(u32); + if (!std.mem.eql(u8, &chunk_id, "anih")) { + // TODO: Move file cursor instead of skipBytes + try reader.skipBytes(chunk_len, .{}); + continue; + } + + const aniheader = try reader.readStruct(ANIHEADER); + return std.mem.nativeToLittle(u32, aniheader.flags); + } +} + +/// From Microsoft Multimedia Data Standards Update April 15, 1994 +const ANIHEADER = extern struct { + cbSizeof: u32, + cFrames: u32, + cSteps: u32, + cx: u32, + cy: u32, + cBitCount: u32, + cPlanes: u32, + jifRate: u32, + flags: u32, +}; diff --git a/src/resinator/ast.zig b/src/resinator/ast.zig new file mode 100644 index 000000000000..e6f6c030c08c --- /dev/null +++ b/src/resinator/ast.zig @@ -0,0 +1,1084 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Token = @import("lex.zig").Token; +const CodePage = @import("code_pages.zig").CodePage; + +pub const Tree = struct { + node: *Node, + input_code_pages: CodePageLookup, + output_code_pages: CodePageLookup, + + /// not owned by the tree + source: []const u8, + + arena: std.heap.ArenaAllocator.State, + allocator: Allocator, + + pub fn deinit(self: *Tree) void { + self.arena.promote(self.allocator).deinit(); + } + + pub fn root(self: *Tree) *Node.Root { + return @fieldParentPtr(Node.Root, "base", self.node); + } + + pub fn dump(self: *Tree, writer: anytype) @TypeOf(writer).Error!void { + try self.node.dump(self, writer, 0); + } +}; + +pub const CodePageLookup = struct { + lookup: std.ArrayListUnmanaged(CodePage) = .{}, + allocator: Allocator, + default_code_page: CodePage, + + pub fn init(allocator: Allocator, default_code_page: CodePage) CodePageLookup { + return .{ + .allocator = allocator, + .default_code_page = default_code_page, + }; + } + + pub fn deinit(self: *CodePageLookup) void { + self.lookup.deinit(self.allocator); + } + + /// line_num is 1-indexed + pub fn setForLineNum(self: *CodePageLookup, line_num: usize, code_page: CodePage) !void { + const index = line_num - 1; + if (index >= self.lookup.items.len) { + const new_size = line_num; + const missing_lines_start_index = self.lookup.items.len; + try self.lookup.resize(self.allocator, new_size); + + // If there are any gaps created, we need to fill them in with the value of the + // last line before the gap. This can happen for e.g. string literals that + // span multiple lines, or if the start of a file has multiple empty lines. + const fill_value = if (missing_lines_start_index > 0) + self.lookup.items[missing_lines_start_index - 1] + else + self.default_code_page; + var i: usize = missing_lines_start_index; + while (i < new_size - 1) : (i += 1) { + self.lookup.items[i] = fill_value; + } + } + self.lookup.items[index] = code_page; + } + + pub fn setForToken(self: *CodePageLookup, token: Token, code_page: CodePage) !void { + return self.setForLineNum(token.line_number, code_page); + } + + /// line_num is 1-indexed + pub fn getForLineNum(self: CodePageLookup, line_num: usize) CodePage { + return self.lookup.items[line_num - 1]; + } + + pub fn getForToken(self: CodePageLookup, token: Token) CodePage { + return self.getForLineNum(token.line_number); + } +}; + +test "CodePageLookup" { + var lookup = CodePageLookup.init(std.testing.allocator, .windows1252); + defer lookup.deinit(); + + try lookup.setForLineNum(5, .utf8); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(1)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(2)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(3)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(4)); + try std.testing.expectEqual(CodePage.utf8, lookup.getForLineNum(5)); + try std.testing.expectEqual(@as(usize, 5), lookup.lookup.items.len); + + try lookup.setForLineNum(7, .windows1252); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(1)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(2)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(3)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(4)); + try std.testing.expectEqual(CodePage.utf8, lookup.getForLineNum(5)); + try std.testing.expectEqual(CodePage.utf8, lookup.getForLineNum(6)); + try std.testing.expectEqual(CodePage.windows1252, lookup.getForLineNum(7)); + try std.testing.expectEqual(@as(usize, 7), lookup.lookup.items.len); +} + +pub const Node = struct { + id: Id, + + pub const Id = enum { + root, + resource_external, + resource_raw_data, + literal, + binary_expression, + grouped_expression, + not_expression, + accelerators, + accelerator, + dialog, + control_statement, + toolbar, + menu, + menu_item, + menu_item_separator, + menu_item_ex, + popup, + popup_ex, + version_info, + version_statement, + block, + block_value, + block_value_value, + string_table, + string_table_string, + language_statement, + font_statement, + simple_statement, + invalid, + + pub fn Type(comptime id: Id) type { + return switch (id) { + .root => Root, + .resource_external => ResourceExternal, + .resource_raw_data => ResourceRawData, + .literal => Literal, + .binary_expression => BinaryExpression, + .grouped_expression => GroupedExpression, + .not_expression => NotExpression, + .accelerators => Accelerators, + .accelerator => Accelerator, + .dialog => Dialog, + .control_statement => ControlStatement, + .toolbar => Toolbar, + .menu => Menu, + .menu_item => MenuItem, + .menu_item_separator => MenuItemSeparator, + .menu_item_ex => MenuItemEx, + .popup => Popup, + .popup_ex => PopupEx, + .version_info => VersionInfo, + .version_statement => VersionStatement, + .block => Block, + .block_value => BlockValue, + .block_value_value => BlockValueValue, + .string_table => StringTable, + .string_table_string => StringTableString, + .language_statement => LanguageStatement, + .font_statement => FontStatement, + .simple_statement => SimpleStatement, + .invalid => Invalid, + }; + } + }; + + pub fn cast(base: *Node, comptime id: Id) ?*id.Type() { + if (base.id == id) { + return @fieldParentPtr(id.Type(), "base", base); + } + return null; + } + + pub const Root = struct { + base: Node = .{ .id = .root }, + body: []*Node, + }; + + pub const ResourceExternal = struct { + base: Node = .{ .id = .resource_external }, + id: Token, + type: Token, + common_resource_attributes: []Token, + filename: *Node, + }; + + pub const ResourceRawData = struct { + base: Node = .{ .id = .resource_raw_data }, + id: Token, + type: Token, + common_resource_attributes: []Token, + begin_token: Token, + raw_data: []*Node, + end_token: Token, + }; + + pub const Literal = struct { + base: Node = .{ .id = .literal }, + token: Token, + }; + + pub const BinaryExpression = struct { + base: Node = .{ .id = .binary_expression }, + operator: Token, + left: *Node, + right: *Node, + }; + + pub const GroupedExpression = struct { + base: Node = .{ .id = .grouped_expression }, + open_token: Token, + expression: *Node, + close_token: Token, + }; + + pub const NotExpression = struct { + base: Node = .{ .id = .not_expression }, + not_token: Token, + number_token: Token, + }; + + pub const Accelerators = struct { + base: Node = .{ .id = .accelerators }, + id: Token, + type: Token, + common_resource_attributes: []Token, + optional_statements: []*Node, + begin_token: Token, + accelerators: []*Node, + end_token: Token, + }; + + pub const Accelerator = struct { + base: Node = .{ .id = .accelerator }, + event: *Node, + idvalue: *Node, + type_and_options: []Token, + }; + + pub const Dialog = struct { + base: Node = .{ .id = .dialog }, + id: Token, + type: Token, + common_resource_attributes: []Token, + x: *Node, + y: *Node, + width: *Node, + height: *Node, + help_id: ?*Node, + optional_statements: []*Node, + begin_token: Token, + controls: []*Node, + end_token: Token, + }; + + pub const ControlStatement = struct { + base: Node = .{ .id = .control_statement }, + type: Token, + text: ?Token, + /// Only relevant for the user-defined CONTROL control + class: ?*Node, + id: *Node, + x: *Node, + y: *Node, + width: *Node, + height: *Node, + style: ?*Node, + exstyle: ?*Node, + help_id: ?*Node, + extra_data_begin: ?Token, + extra_data: []*Node, + extra_data_end: ?Token, + + /// Returns true if this node describes a user-defined CONTROL control + /// https://learn.microsoft.com/en-us/windows/win32/menurc/control-control + pub fn isUserDefined(self: *const ControlStatement) bool { + return self.class != null; + } + }; + + pub const Toolbar = struct { + base: Node = .{ .id = .toolbar }, + id: Token, + type: Token, + common_resource_attributes: []Token, + button_width: *Node, + button_height: *Node, + begin_token: Token, + /// Will contain Literal and SimpleStatement nodes + buttons: []*Node, + end_token: Token, + }; + + pub const Menu = struct { + base: Node = .{ .id = .menu }, + id: Token, + type: Token, + common_resource_attributes: []Token, + optional_statements: []*Node, + /// `help_id` will never be non-null if `type` is MENU + help_id: ?*Node, + begin_token: Token, + items: []*Node, + end_token: Token, + }; + + pub const MenuItem = struct { + base: Node = .{ .id = .menu_item }, + menuitem: Token, + text: Token, + result: *Node, + option_list: []Token, + }; + + pub const MenuItemSeparator = struct { + base: Node = .{ .id = .menu_item_separator }, + menuitem: Token, + separator: Token, + }; + + pub const MenuItemEx = struct { + base: Node = .{ .id = .menu_item_ex }, + menuitem: Token, + text: Token, + id: ?*Node, + type: ?*Node, + state: ?*Node, + }; + + pub const Popup = struct { + base: Node = .{ .id = .popup }, + popup: Token, + text: Token, + option_list: []Token, + begin_token: Token, + items: []*Node, + end_token: Token, + }; + + pub const PopupEx = struct { + base: Node = .{ .id = .popup_ex }, + popup: Token, + text: Token, + id: ?*Node, + type: ?*Node, + state: ?*Node, + help_id: ?*Node, + begin_token: Token, + items: []*Node, + end_token: Token, + }; + + pub const VersionInfo = struct { + base: Node = .{ .id = .version_info }, + id: Token, + versioninfo: Token, + common_resource_attributes: []Token, + /// Will contain VersionStatement and/or SimpleStatement nodes + fixed_info: []*Node, + begin_token: Token, + block_statements: []*Node, + end_token: Token, + }; + + /// Used for FILEVERSION and PRODUCTVERSION statements + pub const VersionStatement = struct { + base: Node = .{ .id = .version_statement }, + type: Token, + /// Between 1-4 parts + parts: []*Node, + }; + + pub const Block = struct { + base: Node = .{ .id = .block }, + /// The BLOCK token itself + identifier: Token, + key: Token, + /// This is undocumented but BLOCK statements support values after + /// the key just like VALUE statements. + values: []*Node, + begin_token: Token, + children: []*Node, + end_token: Token, + }; + + pub const BlockValue = struct { + base: Node = .{ .id = .block_value }, + /// The VALUE token itself + identifier: Token, + key: Token, + /// These will be BlockValueValue nodes + values: []*Node, + }; + + pub const BlockValueValue = struct { + base: Node = .{ .id = .block_value_value }, + expression: *Node, + /// Whether or not the value has a trailing comma is relevant + trailing_comma: bool, + }; + + pub const StringTable = struct { + base: Node = .{ .id = .string_table }, + type: Token, + common_resource_attributes: []Token, + optional_statements: []*Node, + begin_token: Token, + strings: []*Node, + end_token: Token, + }; + + pub const StringTableString = struct { + base: Node = .{ .id = .string_table_string }, + id: *Node, + maybe_comma: ?Token, + string: Token, + }; + + pub const LanguageStatement = struct { + base: Node = .{ .id = .language_statement }, + /// The LANGUAGE token itself + language_token: Token, + primary_language_id: *Node, + sublanguage_id: *Node, + }; + + pub const FontStatement = struct { + base: Node = .{ .id = .font_statement }, + /// The FONT token itself + identifier: Token, + point_size: *Node, + typeface: Token, + weight: ?*Node, + italic: ?*Node, + char_set: ?*Node, + }; + + /// A statement with one value associated with it. + /// Used for CAPTION, CHARACTERISTICS, CLASS, EXSTYLE, MENU, STYLE, VERSION, + /// as well as VERSIONINFO-specific statements FILEFLAGSMASK, FILEFLAGS, FILEOS, + /// FILETYPE, FILESUBTYPE + pub const SimpleStatement = struct { + base: Node = .{ .id = .simple_statement }, + identifier: Token, + value: *Node, + }; + + pub const Invalid = struct { + base: Node = .{ .id = .invalid }, + context: []Token, + }; + + pub fn isNumberExpression(node: *const Node) bool { + switch (node.id) { + .literal => { + const literal = @fieldParentPtr(Node.Literal, "base", node); + return switch (literal.token.id) { + .number => true, + else => false, + }; + }, + .binary_expression, .grouped_expression, .not_expression => return true, + else => return false, + } + } + + pub fn isStringLiteral(node: *const Node) bool { + switch (node.id) { + .literal => { + const literal = @fieldParentPtr(Node.Literal, "base", node); + return switch (literal.token.id) { + .quoted_ascii_string, .quoted_wide_string => true, + else => false, + }; + }, + else => return false, + } + } + + pub fn getFirstToken(node: *const Node) Token { + switch (node.id) { + .root => unreachable, + .resource_external => { + const casted = @fieldParentPtr(Node.ResourceExternal, "base", node); + return casted.id; + }, + .resource_raw_data => { + const casted = @fieldParentPtr(Node.ResourceRawData, "base", node); + return casted.id; + }, + .literal => { + const casted = @fieldParentPtr(Node.Literal, "base", node); + return casted.token; + }, + .binary_expression => { + const casted = @fieldParentPtr(Node.BinaryExpression, "base", node); + return casted.left.getFirstToken(); + }, + .grouped_expression => { + const casted = @fieldParentPtr(Node.GroupedExpression, "base", node); + return casted.open_token; + }, + .not_expression => { + const casted = @fieldParentPtr(Node.NotExpression, "base", node); + return casted.not_token; + }, + .accelerators => { + const casted = @fieldParentPtr(Node.Accelerators, "base", node); + return casted.id; + }, + .accelerator => { + const casted = @fieldParentPtr(Node.Accelerator, "base", node); + return casted.event.getFirstToken(); + }, + .dialog => { + const casted = @fieldParentPtr(Node.Dialog, "base", node); + return casted.id; + }, + .control_statement => { + const casted = @fieldParentPtr(Node.ControlStatement, "base", node); + return casted.type; + }, + .toolbar => { + const casted = @fieldParentPtr(Node.Toolbar, "base", node); + return casted.id; + }, + .menu => { + const casted = @fieldParentPtr(Node.Menu, "base", node); + return casted.id; + }, + inline .menu_item, .menu_item_separator, .menu_item_ex => |menu_item_type| { + const node_type = menu_item_type.Type(); + const casted = @fieldParentPtr(node_type, "base", node); + return casted.menuitem; + }, + inline .popup, .popup_ex => |popup_type| { + const node_type = popup_type.Type(); + const casted = @fieldParentPtr(node_type, "base", node); + return casted.popup; + }, + .version_info => { + const casted = @fieldParentPtr(Node.VersionInfo, "base", node); + return casted.id; + }, + .version_statement => { + const casted = @fieldParentPtr(Node.VersionStatement, "base", node); + return casted.type; + }, + .block => { + const casted = @fieldParentPtr(Node.Block, "base", node); + return casted.identifier; + }, + .block_value => { + const casted = @fieldParentPtr(Node.BlockValue, "base", node); + return casted.identifier; + }, + .block_value_value => { + const casted = @fieldParentPtr(Node.BlockValueValue, "base", node); + return casted.expression.getFirstToken(); + }, + .string_table => { + const casted = @fieldParentPtr(Node.StringTable, "base", node); + return casted.type; + }, + .string_table_string => { + const casted = @fieldParentPtr(Node.StringTableString, "base", node); + return casted.id.getFirstToken(); + }, + .language_statement => { + const casted = @fieldParentPtr(Node.LanguageStatement, "base", node); + return casted.language_token; + }, + .font_statement => { + const casted = @fieldParentPtr(Node.FontStatement, "base", node); + return casted.identifier; + }, + .simple_statement => { + const casted = @fieldParentPtr(Node.SimpleStatement, "base", node); + return casted.identifier; + }, + .invalid => { + const casted = @fieldParentPtr(Node.Invalid, "base", node); + return casted.context[0]; + }, + } + } + + pub fn getLastToken(node: *const Node) Token { + switch (node.id) { + .root => unreachable, + .resource_external => { + const casted = @fieldParentPtr(Node.ResourceExternal, "base", node); + return casted.filename.getLastToken(); + }, + .resource_raw_data => { + const casted = @fieldParentPtr(Node.ResourceRawData, "base", node); + return casted.end_token; + }, + .literal => { + const casted = @fieldParentPtr(Node.Literal, "base", node); + return casted.token; + }, + .binary_expression => { + const casted = @fieldParentPtr(Node.BinaryExpression, "base", node); + return casted.right.getLastToken(); + }, + .grouped_expression => { + const casted = @fieldParentPtr(Node.GroupedExpression, "base", node); + return casted.close_token; + }, + .not_expression => { + const casted = @fieldParentPtr(Node.NotExpression, "base", node); + return casted.number_token; + }, + .accelerators => { + const casted = @fieldParentPtr(Node.Accelerators, "base", node); + return casted.end_token; + }, + .accelerator => { + const casted = @fieldParentPtr(Node.Accelerator, "base", node); + if (casted.type_and_options.len > 0) return casted.type_and_options[casted.type_and_options.len - 1]; + return casted.idvalue.getLastToken(); + }, + .dialog => { + const casted = @fieldParentPtr(Node.Dialog, "base", node); + return casted.end_token; + }, + .control_statement => { + const casted = @fieldParentPtr(Node.ControlStatement, "base", node); + if (casted.extra_data_end) |token| return token; + if (casted.help_id) |help_id_node| return help_id_node.getLastToken(); + if (casted.exstyle) |exstyle_node| return exstyle_node.getLastToken(); + // For user-defined CONTROL controls, the style comes before 'x', but + // otherwise it comes after 'height' so it could be the last token if + // it's present. + if (!casted.isUserDefined()) { + if (casted.style) |style_node| return style_node.getLastToken(); + } + return casted.height.getLastToken(); + }, + .toolbar => { + const casted = @fieldParentPtr(Node.Toolbar, "base", node); + return casted.end_token; + }, + .menu => { + const casted = @fieldParentPtr(Node.Menu, "base", node); + return casted.end_token; + }, + .menu_item => { + const casted = @fieldParentPtr(Node.MenuItem, "base", node); + if (casted.option_list.len > 0) return casted.option_list[casted.option_list.len - 1]; + return casted.result.getLastToken(); + }, + .menu_item_separator => { + const casted = @fieldParentPtr(Node.MenuItemSeparator, "base", node); + return casted.separator; + }, + .menu_item_ex => { + const casted = @fieldParentPtr(Node.MenuItemEx, "base", node); + if (casted.state) |state_node| return state_node.getLastToken(); + if (casted.type) |type_node| return type_node.getLastToken(); + if (casted.id) |id_node| return id_node.getLastToken(); + return casted.text; + }, + inline .popup, .popup_ex => |popup_type| { + const node_type = popup_type.Type(); + const casted = @fieldParentPtr(node_type, "base", node); + return casted.end_token; + }, + .version_info => { + const casted = @fieldParentPtr(Node.VersionInfo, "base", node); + return casted.end_token; + }, + .version_statement => { + const casted = @fieldParentPtr(Node.VersionStatement, "base", node); + return casted.parts[casted.parts.len - 1].getLastToken(); + }, + .block => { + const casted = @fieldParentPtr(Node.Block, "base", node); + return casted.end_token; + }, + .block_value => { + const casted = @fieldParentPtr(Node.BlockValue, "base", node); + if (casted.values.len > 0) return casted.values[casted.values.len - 1].getLastToken(); + return casted.key; + }, + .block_value_value => { + const casted = @fieldParentPtr(Node.BlockValueValue, "base", node); + return casted.expression.getLastToken(); + }, + .string_table => { + const casted = @fieldParentPtr(Node.StringTable, "base", node); + return casted.end_token; + }, + .string_table_string => { + const casted = @fieldParentPtr(Node.StringTableString, "base", node); + return casted.string; + }, + .language_statement => { + const casted = @fieldParentPtr(Node.LanguageStatement, "base", node); + return casted.sublanguage_id.getLastToken(); + }, + .font_statement => { + const casted = @fieldParentPtr(Node.FontStatement, "base", node); + if (casted.char_set) |char_set_node| return char_set_node.getLastToken(); + if (casted.italic) |italic_node| return italic_node.getLastToken(); + if (casted.weight) |weight_node| return weight_node.getLastToken(); + return casted.typeface; + }, + .simple_statement => { + const casted = @fieldParentPtr(Node.SimpleStatement, "base", node); + return casted.value.getLastToken(); + }, + .invalid => { + const casted = @fieldParentPtr(Node.Invalid, "base", node); + return casted.context[casted.context.len - 1]; + }, + } + } + + pub fn dump( + node: *const Node, + tree: *const Tree, + writer: anytype, + indent: usize, + ) @TypeOf(writer).Error!void { + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(@tagName(node.id)); + switch (node.id) { + .root => { + try writer.writeAll("\n"); + const root = @fieldParentPtr(Node.Root, "base", node); + for (root.body) |body_node| { + try body_node.dump(tree, writer, indent + 1); + } + }, + .resource_external => { + const resource = @fieldParentPtr(Node.ResourceExternal, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ resource.id.slice(tree.source), resource.type.slice(tree.source), resource.common_resource_attributes.len }); + try resource.filename.dump(tree, writer, indent + 1); + }, + .resource_raw_data => { + const resource = @fieldParentPtr(Node.ResourceRawData, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes] raw data: {}\n", .{ resource.id.slice(tree.source), resource.type.slice(tree.source), resource.common_resource_attributes.len, resource.raw_data.len }); + for (resource.raw_data) |data_expression| { + try data_expression.dump(tree, writer, indent + 1); + } + }, + .literal => { + const literal = @fieldParentPtr(Node.Literal, "base", node); + try writer.writeAll(" "); + try writer.writeAll(literal.token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .binary_expression => { + const binary = @fieldParentPtr(Node.BinaryExpression, "base", node); + try writer.writeAll(" "); + try writer.writeAll(binary.operator.slice(tree.source)); + try writer.writeAll("\n"); + try binary.left.dump(tree, writer, indent + 1); + try binary.right.dump(tree, writer, indent + 1); + }, + .grouped_expression => { + const grouped = @fieldParentPtr(Node.GroupedExpression, "base", node); + try writer.writeAll("\n"); + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(grouped.open_token.slice(tree.source)); + try writer.writeAll("\n"); + try grouped.expression.dump(tree, writer, indent + 1); + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(grouped.close_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .not_expression => { + const not = @fieldParentPtr(Node.NotExpression, "base", node); + try writer.writeAll(" "); + try writer.writeAll(not.not_token.slice(tree.source)); + try writer.writeAll(" "); + try writer.writeAll(not.number_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .accelerators => { + const accelerators = @fieldParentPtr(Node.Accelerators, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ accelerators.id.slice(tree.source), accelerators.type.slice(tree.source), accelerators.common_resource_attributes.len }); + for (accelerators.optional_statements) |statement| { + try statement.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(accelerators.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (accelerators.accelerators) |accelerator| { + try accelerator.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(accelerators.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .accelerator => { + const accelerator = @fieldParentPtr(Node.Accelerator, "base", node); + for (accelerator.type_and_options, 0..) |option, i| { + if (i != 0) try writer.writeAll(","); + try writer.writeByte(' '); + try writer.writeAll(option.slice(tree.source)); + } + try writer.writeAll("\n"); + try accelerator.event.dump(tree, writer, indent + 1); + try accelerator.idvalue.dump(tree, writer, indent + 1); + }, + .dialog => { + const dialog = @fieldParentPtr(Node.Dialog, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ dialog.id.slice(tree.source), dialog.type.slice(tree.source), dialog.common_resource_attributes.len }); + inline for (.{ "x", "y", "width", "height" }) |arg| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try @field(dialog, arg).dump(tree, writer, indent + 2); + } + if (dialog.help_id) |help_id| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll("help_id:\n"); + try help_id.dump(tree, writer, indent + 2); + } + for (dialog.optional_statements) |statement| { + try statement.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(dialog.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (dialog.controls) |control| { + try control.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(dialog.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .control_statement => { + const control = @fieldParentPtr(Node.ControlStatement, "base", node); + try writer.print(" {s}", .{control.type.slice(tree.source)}); + if (control.text) |text| { + try writer.print(" text: {s}", .{text.slice(tree.source)}); + } + try writer.writeByte('\n'); + if (control.class) |class| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll("class:\n"); + try class.dump(tree, writer, indent + 2); + } + inline for (.{ "id", "x", "y", "width", "height" }) |arg| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try @field(control, arg).dump(tree, writer, indent + 2); + } + inline for (.{ "style", "exstyle", "help_id" }) |arg| { + if (@field(control, arg)) |val_node| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try val_node.dump(tree, writer, indent + 2); + } + } + if (control.extra_data_begin != null) { + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(control.extra_data_begin.?.slice(tree.source)); + try writer.writeAll("\n"); + for (control.extra_data) |data_node| { + try data_node.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(control.extra_data_end.?.slice(tree.source)); + try writer.writeAll("\n"); + } + }, + .toolbar => { + const toolbar = @fieldParentPtr(Node.Toolbar, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ toolbar.id.slice(tree.source), toolbar.type.slice(tree.source), toolbar.common_resource_attributes.len }); + inline for (.{ "button_width", "button_height" }) |arg| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try @field(toolbar, arg).dump(tree, writer, indent + 2); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(toolbar.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (toolbar.buttons) |button_or_sep| { + try button_or_sep.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(toolbar.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .menu => { + const menu = @fieldParentPtr(Node.Menu, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ menu.id.slice(tree.source), menu.type.slice(tree.source), menu.common_resource_attributes.len }); + for (menu.optional_statements) |statement| { + try statement.dump(tree, writer, indent + 1); + } + if (menu.help_id) |help_id| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll("help_id:\n"); + try help_id.dump(tree, writer, indent + 2); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(menu.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (menu.items) |item| { + try item.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(menu.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .menu_item => { + const menu_item = @fieldParentPtr(Node.MenuItem, "base", node); + try writer.print(" {s} {s} [{d} options]\n", .{ menu_item.menuitem.slice(tree.source), menu_item.text.slice(tree.source), menu_item.option_list.len }); + try menu_item.result.dump(tree, writer, indent + 1); + }, + .menu_item_separator => { + const menu_item = @fieldParentPtr(Node.MenuItemSeparator, "base", node); + try writer.print(" {s} {s}\n", .{ menu_item.menuitem.slice(tree.source), menu_item.separator.slice(tree.source) }); + }, + .menu_item_ex => { + const menu_item = @fieldParentPtr(Node.MenuItemEx, "base", node); + try writer.print(" {s} {s}\n", .{ menu_item.menuitem.slice(tree.source), menu_item.text.slice(tree.source) }); + inline for (.{ "id", "type", "state" }) |arg| { + if (@field(menu_item, arg)) |val_node| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try val_node.dump(tree, writer, indent + 2); + } + } + }, + .popup => { + const popup = @fieldParentPtr(Node.Popup, "base", node); + try writer.print(" {s} {s} [{d} options]\n", .{ popup.popup.slice(tree.source), popup.text.slice(tree.source), popup.option_list.len }); + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(popup.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (popup.items) |item| { + try item.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(popup.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .popup_ex => { + const popup = @fieldParentPtr(Node.PopupEx, "base", node); + try writer.print(" {s} {s}\n", .{ popup.popup.slice(tree.source), popup.text.slice(tree.source) }); + inline for (.{ "id", "type", "state", "help_id" }) |arg| { + if (@field(popup, arg)) |val_node| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try val_node.dump(tree, writer, indent + 2); + } + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(popup.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (popup.items) |item| { + try item.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(popup.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .version_info => { + const version_info = @fieldParentPtr(Node.VersionInfo, "base", node); + try writer.print(" {s} {s} [{d} common_resource_attributes]\n", .{ version_info.id.slice(tree.source), version_info.versioninfo.slice(tree.source), version_info.common_resource_attributes.len }); + for (version_info.fixed_info) |fixed_info| { + try fixed_info.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(version_info.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (version_info.block_statements) |block| { + try block.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(version_info.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .version_statement => { + const version_statement = @fieldParentPtr(Node.VersionStatement, "base", node); + try writer.print(" {s}\n", .{version_statement.type.slice(tree.source)}); + for (version_statement.parts) |part| { + try part.dump(tree, writer, indent + 1); + } + }, + .block => { + const block = @fieldParentPtr(Node.Block, "base", node); + try writer.print(" {s} {s}\n", .{ block.identifier.slice(tree.source), block.key.slice(tree.source) }); + for (block.values) |value| { + try value.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(block.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (block.children) |child| { + try child.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(block.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .block_value => { + const block_value = @fieldParentPtr(Node.BlockValue, "base", node); + try writer.print(" {s} {s}\n", .{ block_value.identifier.slice(tree.source), block_value.key.slice(tree.source) }); + for (block_value.values) |value| { + try value.dump(tree, writer, indent + 1); + } + }, + .block_value_value => { + const block_value = @fieldParentPtr(Node.BlockValueValue, "base", node); + if (block_value.trailing_comma) { + try writer.writeAll(" ,"); + } + try writer.writeAll("\n"); + try block_value.expression.dump(tree, writer, indent + 1); + }, + .string_table => { + const string_table = @fieldParentPtr(Node.StringTable, "base", node); + try writer.print(" {s} [{d} common_resource_attributes]\n", .{ string_table.type.slice(tree.source), string_table.common_resource_attributes.len }); + for (string_table.optional_statements) |statement| { + try statement.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(string_table.begin_token.slice(tree.source)); + try writer.writeAll("\n"); + for (string_table.strings) |string| { + try string.dump(tree, writer, indent + 1); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll(string_table.end_token.slice(tree.source)); + try writer.writeAll("\n"); + }, + .string_table_string => { + try writer.writeAll("\n"); + const string = @fieldParentPtr(Node.StringTableString, "base", node); + try string.id.dump(tree, writer, indent + 1); + try writer.writeByteNTimes(' ', indent + 1); + try writer.print("{s}\n", .{string.string.slice(tree.source)}); + }, + .language_statement => { + const language = @fieldParentPtr(Node.LanguageStatement, "base", node); + try writer.print(" {s}\n", .{language.language_token.slice(tree.source)}); + try language.primary_language_id.dump(tree, writer, indent + 1); + try language.sublanguage_id.dump(tree, writer, indent + 1); + }, + .font_statement => { + const font = @fieldParentPtr(Node.FontStatement, "base", node); + try writer.print(" {s} typeface: {s}\n", .{ font.identifier.slice(tree.source), font.typeface.slice(tree.source) }); + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll("point_size:\n"); + try font.point_size.dump(tree, writer, indent + 2); + inline for (.{ "weight", "italic", "char_set" }) |arg| { + if (@field(font, arg)) |arg_node| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.writeAll(arg ++ ":\n"); + try arg_node.dump(tree, writer, indent + 2); + } + } + }, + .simple_statement => { + const statement = @fieldParentPtr(Node.SimpleStatement, "base", node); + try writer.print(" {s}\n", .{statement.identifier.slice(tree.source)}); + try statement.value.dump(tree, writer, indent + 1); + }, + .invalid => { + const invalid = @fieldParentPtr(Node.Invalid, "base", node); + try writer.print(" context.len: {}\n", .{invalid.context.len}); + for (invalid.context) |context_token| { + try writer.writeByteNTimes(' ', indent + 1); + try writer.print("{s}:{s}", .{ @tagName(context_token.id), context_token.slice(tree.source) }); + try writer.writeByte('\n'); + } + }, + } + } +}; diff --git a/src/resinator/bmp.zig b/src/resinator/bmp.zig new file mode 100644 index 000000000000..f6fdb9f28041 --- /dev/null +++ b/src/resinator/bmp.zig @@ -0,0 +1,268 @@ +//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader +//! https://learn.microsoft.com/en-us/previous-versions//dd183376(v=vs.85) +//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfo +//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapcoreheader +//! https://archive.org/details/mac_Graphics_File_Formats_Second_Edition_1996/page/n607/mode/2up +//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapv5header +//! +//! Notes: +//! - The Microsoft documentation is incredibly unclear about the color table when the +//! bit depth is >= 16. +//! + For bit depth 24 it says "the bmiColors member of BITMAPINFO is NULL" but also +//! says "the bmiColors color table is used for optimizing colors used on palette-based +//! devices, and must contain the number of entries specified by the bV5ClrUsed member" +//! + For bit depth 16 and 32, it seems to imply that if the compression is BI_BITFIELDS +//! or BI_ALPHABITFIELDS, then the color table *only* consists of the bit masks, but +//! doesn't really say this outright and the Wikipedia article seems to disagree +//! For the purposes of this implementation, color tables can always be present for any +//! bit depth and compression, and the color table follows the header + any optional +//! bit mask fields dictated by the specified compression. + +const std = @import("std"); +const BitmapHeader = @import("ico.zig").BitmapHeader; + +pub const windows_format_id = std.mem.readIntNative(u16, "BM"); +pub const file_header_len = 14; + +pub const ReadError = error{ + UnexpectedEOF, + InvalidFileHeader, + ImpossiblePixelDataOffset, + UnknownBitmapVersion, + InvalidBitsPerPixel, + TooManyColorsInPalette, + MissingBitfieldMasks, +}; + +pub const BitmapInfo = struct { + dib_header_size: u32, + /// Contains the interpreted number of colors in the palette (e.g. + /// if the field's value is zero and the bit depth is <= 8, this + /// will contain the maximum number of colors for the bit depth + /// rather than the field's value directly). + colors_in_palette: u32, + bytes_per_color_palette_element: u8, + pixel_data_offset: u32, + compression: Compression, + + pub fn getExpectedPaletteByteLen(self: *const BitmapInfo) u64 { + return @as(u64, self.colors_in_palette) * self.bytes_per_color_palette_element; + } + + pub fn getActualPaletteByteLen(self: *const BitmapInfo) u64 { + return self.getByteLenBetweenHeadersAndPixels() - self.getBitmasksByteLen(); + } + + pub fn getByteLenBetweenHeadersAndPixels(self: *const BitmapInfo) u64 { + return @as(u64, self.pixel_data_offset) - self.dib_header_size - file_header_len; + } + + pub fn getBitmasksByteLen(self: *const BitmapInfo) u8 { + return switch (self.compression) { + .BI_BITFIELDS => 12, + .BI_ALPHABITFIELDS => 16, + else => 0, + }; + } + + pub fn getMissingPaletteByteLen(self: *const BitmapInfo) u64 { + if (self.getActualPaletteByteLen() >= self.getExpectedPaletteByteLen()) return 0; + return self.getExpectedPaletteByteLen() - self.getActualPaletteByteLen(); + } + + /// Returns the full byte len of the DIB header + optional bitmasks + color palette + pub fn getExpectedByteLenBeforePixelData(self: *const BitmapInfo) u64 { + return @as(u64, self.dib_header_size) + self.getBitmasksByteLen() + self.getExpectedPaletteByteLen(); + } + + /// Returns the full expected byte len + pub fn getExpectedByteLen(self: *const BitmapInfo, file_size: u64) u64 { + return self.getExpectedByteLenBeforePixelData() + self.getPixelDataLen(file_size); + } + + pub fn getPixelDataLen(self: *const BitmapInfo, file_size: u64) u64 { + return file_size - self.pixel_data_offset; + } +}; + +pub fn read(reader: anytype, max_size: u64) ReadError!BitmapInfo { + var bitmap_info: BitmapInfo = undefined; + const file_header = reader.readBytesNoEof(file_header_len) catch return error.UnexpectedEOF; + + const id = std.mem.readIntNative(u16, file_header[0..2]); + if (id != windows_format_id) return error.InvalidFileHeader; + + bitmap_info.pixel_data_offset = std.mem.readIntNative(u32, file_header[10..14]); + if (bitmap_info.pixel_data_offset > max_size) return error.ImpossiblePixelDataOffset; + + bitmap_info.dib_header_size = reader.readIntLittle(u32) catch return error.UnexpectedEOF; + if (bitmap_info.pixel_data_offset < file_header_len + bitmap_info.dib_header_size) return error.ImpossiblePixelDataOffset; + const dib_version = BitmapHeader.Version.get(bitmap_info.dib_header_size); + switch (dib_version) { + .@"nt3.1", .@"nt4.0", .@"nt5.0" => { + var dib_header_buf: [@sizeOf(BITMAPINFOHEADER)]u8 align(@alignOf(BITMAPINFOHEADER)) = undefined; + std.mem.writeIntLittle(u32, dib_header_buf[0..4], bitmap_info.dib_header_size); + reader.readNoEof(dib_header_buf[4..]) catch return error.UnexpectedEOF; + var dib_header: *BITMAPINFOHEADER = @ptrCast(&dib_header_buf); + structFieldsLittleToNative(BITMAPINFOHEADER, dib_header); + + bitmap_info.colors_in_palette = try dib_header.numColorsInTable(); + bitmap_info.bytes_per_color_palette_element = 4; + bitmap_info.compression = @enumFromInt(dib_header.biCompression); + + if (bitmap_info.getByteLenBetweenHeadersAndPixels() < bitmap_info.getBitmasksByteLen()) { + return error.MissingBitfieldMasks; + } + }, + .@"win2.0" => { + var dib_header_buf: [@sizeOf(BITMAPCOREHEADER)]u8 align(@alignOf(BITMAPCOREHEADER)) = undefined; + std.mem.writeIntLittle(u32, dib_header_buf[0..4], bitmap_info.dib_header_size); + reader.readNoEof(dib_header_buf[4..]) catch return error.UnexpectedEOF; + var dib_header: *BITMAPCOREHEADER = @ptrCast(&dib_header_buf); + structFieldsLittleToNative(BITMAPCOREHEADER, dib_header); + + // > The size of the color palette is calculated from the BitsPerPixel value. + // > The color palette has 2, 16, 256, or 0 entries for a BitsPerPixel of + // > 1, 4, 8, and 24, respectively. + bitmap_info.colors_in_palette = switch (dib_header.bcBitCount) { + inline 1, 4, 8 => |bit_count| 1 << bit_count, + 24 => 0, + else => return error.InvalidBitsPerPixel, + }; + bitmap_info.bytes_per_color_palette_element = 3; + + bitmap_info.compression = .BI_RGB; + }, + .unknown => return error.UnknownBitmapVersion, + } + + return bitmap_info; +} + +/// https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapcoreheader +pub const BITMAPCOREHEADER = extern struct { + bcSize: u32, + bcWidth: u16, + bcHeight: u16, + bcPlanes: u16, + bcBitCount: u16, +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader +pub const BITMAPINFOHEADER = extern struct { + bcSize: u32, + biWidth: i32, + biHeight: i32, + biPlanes: u16, + biBitCount: u16, + biCompression: u32, + biSizeImage: u32, + biXPelsPerMeter: i32, + biYPelsPerMeter: i32, + biClrUsed: u32, + biClrImportant: u32, + + /// Returns error.TooManyColorsInPalette if the number of colors specified + /// exceeds the number of possible colors referenced in the pixel data (i.e. + /// if 1 bit is used per pixel, then the color table can't have more than 2 colors + /// since any more couldn't possibly be indexed in the pixel data) + /// + /// Returns error.InvalidBitsPerPixel if the bit depth is not 1, 4, 8, 16, 24, or 32. + pub fn numColorsInTable(self: BITMAPINFOHEADER) !u32 { + switch (self.biBitCount) { + inline 1, 4, 8 => |bit_count| switch (self.biClrUsed) { + // > If biClrUsed is zero, the array contains the maximum number of + // > colors for the given bitdepth; that is, 2^biBitCount colors + 0 => return 1 << bit_count, + // > If biClrUsed is nonzero and the biBitCount member is less than 16, + // > the biClrUsed member specifies the actual number of colors the + // > graphics engine or device driver accesses. + else => { + const max_colors = 1 << bit_count; + if (self.biClrUsed > max_colors) { + return error.TooManyColorsInPalette; + } + return self.biClrUsed; + }, + }, + // > If biBitCount is 16 or greater, the biClrUsed member specifies + // > the size of the color table used to optimize performance of the + // > system color palettes. + // + // Note: Bit depths >= 16 only use the color table 'for optimizing colors + // used on palette-based devices', but it still makes sense to limit their + // colors since the pixel data is still limited to this number of colors + // (i.e. even though the color table is not indexed by the pixel data, + // the color table having more colors than the pixel data can represent + // would never make sense and indicates a malformed bitmap). + inline 16, 24, 32 => |bit_count| { + const max_colors = 1 << bit_count; + if (self.biClrUsed > max_colors) { + return error.TooManyColorsInPalette; + } + return self.biClrUsed; + }, + else => return error.InvalidBitsPerPixel, + } + } +}; + +pub const Compression = enum(u32) { + BI_RGB = 0, + BI_RLE8 = 1, + BI_RLE4 = 2, + BI_BITFIELDS = 3, + BI_JPEG = 4, + BI_PNG = 5, + BI_ALPHABITFIELDS = 6, + BI_CMYK = 11, + BI_CMYKRLE8 = 12, + BI_CMYKRLE4 = 13, + _, +}; + +fn structFieldsLittleToNative(comptime T: type, x: *T) void { + inline for (@typeInfo(T).Struct.fields) |field| { + @field(x, field.name) = std.mem.littleToNative(field.type, @field(x, field.name)); + } +} + +test "read" { + var bmp_data = "BM<\x00\x00\x00\x00\x00\x00\x006\x00\x00\x00(\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x10\x00\x00\x00\x00\x00\x06\x00\x00\x00\x12\x0b\x00\x00\x12\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x7f\x00\x00\x00\x00".*; + var fbs = std.io.fixedBufferStream(&bmp_data); + + { + const bitmap = try read(fbs.reader(), bmp_data.len); + try std.testing.expectEqual(@as(u32, BitmapHeader.Version.@"nt3.1".len()), bitmap.dib_header_size); + } + + { + fbs.reset(); + bmp_data[file_header_len] = 11; + try std.testing.expectError(error.UnknownBitmapVersion, read(fbs.reader(), bmp_data.len)); + + // restore + bmp_data[file_header_len] = BitmapHeader.Version.@"nt3.1".len(); + } + + { + fbs.reset(); + bmp_data[0] = 'b'; + try std.testing.expectError(error.InvalidFileHeader, read(fbs.reader(), bmp_data.len)); + + // restore + bmp_data[0] = 'B'; + } + + { + const cutoff_len = file_header_len + BitmapHeader.Version.@"nt3.1".len() - 1; + var dib_cutoff_fbs = std.io.fixedBufferStream(bmp_data[0..cutoff_len]); + try std.testing.expectError(error.UnexpectedEOF, read(dib_cutoff_fbs.reader(), bmp_data.len)); + } + + { + const cutoff_len = file_header_len - 1; + var bmp_cutoff_fbs = std.io.fixedBufferStream(bmp_data[0..cutoff_len]); + try std.testing.expectError(error.UnexpectedEOF, read(bmp_cutoff_fbs.reader(), bmp_data.len)); + } +} diff --git a/src/resinator/cli.zig b/src/resinator/cli.zig new file mode 100644 index 000000000000..2e244b878e48 --- /dev/null +++ b/src/resinator/cli.zig @@ -0,0 +1,1433 @@ +const std = @import("std"); +const CodePage = @import("code_pages.zig").CodePage; +const lang = @import("lang.zig"); +const res = @import("res.zig"); +const Allocator = std.mem.Allocator; +const lex = @import("lex.zig"); + +/// This is what /SL 100 will set the maximum string literal length to +pub const max_string_literal_length_100_percent = 8192; + +pub const usage_string = + \\Usage: resinator [options] [--] [] + \\ + \\The sequence -- can be used to signify when to stop parsing options. + \\This is necessary when the input path begins with a forward slash. + \\ + \\Supported Win32 RC Options: + \\ /?, /h Print this help and exit. + \\ /v Verbose (print progress messages). + \\ /d [=] Define a symbol (during preprocessing). + \\ /u Undefine a symbol (during preprocessing). + \\ /fo Specify output file path. + \\ /l Set default language using hexadecimal id (ex: 409). + \\ /ln Set default language using language name (ex: en-us). + \\ /i Add an include path. + \\ /x Ignore INCLUDE environment variable. + \\ /c Set default code page (ex: 65001). + \\ /w Warn on invalid code page in .rc (instead of error). + \\ /y Suppress warnings for duplicate control IDs. + \\ /n Null-terminate all strings in string tables. + \\ /sl Specify string literal length limit in percentage (1-100) + \\ where 100 corresponds to a limit of 8192. If the /sl + \\ option is not specified, the default limit is 4097. + \\ /p Only run the preprocessor and output a .rcpp file. + \\ + \\No-op Win32 RC Options: + \\ /nologo, /a, /r Options that are recognized but do nothing. + \\ + \\Unsupported Win32 RC Options: + \\ /fm, /q, /g, /gn, /g1, /g2 Unsupported MUI-related options. + \\ /?c, /hc, /t, /tp:, Unsupported LCX/LCE-related options. + \\ /tn, /tm, /tc, /tw, /te, + \\ /ti, /ta + \\ /z Unsupported font-substitution-related option. + \\ /s Unsupported HWB-related option. + \\ + \\Custom Options (resinator-specific): + \\ /:no-preprocess Do not run the preprocessor. + \\ /:debug Output the preprocessed .rc file and the parsed AST. + \\ /:auto-includes Set the automatic include path detection behavior. + \\ any (default) Use MSVC if available, fall back to MinGW + \\ msvc Use MSVC include paths (must be present on the system) + \\ gnu Use MinGW include paths (requires Zig as the preprocessor) + \\ none Do not use any autodetected include paths + \\ + \\Note: For compatibility reasons, all custom options start with : + \\ +; + +pub const Diagnostics = struct { + errors: std.ArrayListUnmanaged(ErrorDetails) = .{}, + allocator: Allocator, + + pub const ErrorDetails = struct { + arg_index: usize, + arg_span: ArgSpan = .{}, + msg: std.ArrayListUnmanaged(u8) = .{}, + type: Type = .err, + print_args: bool = true, + + pub const Type = enum { err, warning, note }; + pub const ArgSpan = struct { + point_at_next_arg: bool = false, + name_offset: usize = 0, + prefix_len: usize = 0, + value_offset: usize = 0, + name_len: usize = 0, + }; + }; + + pub fn init(allocator: Allocator) Diagnostics { + return .{ + .allocator = allocator, + }; + } + + pub fn deinit(self: *Diagnostics) void { + for (self.errors.items) |*details| { + details.msg.deinit(self.allocator); + } + self.errors.deinit(self.allocator); + } + + pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void { + try self.errors.append(self.allocator, error_details); + } + + pub fn renderToStdErr(self: *Diagnostics, args: []const []const u8, config: std.io.tty.Config) void { + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + const stderr = std.io.getStdErr().writer(); + self.renderToWriter(args, stderr, config) catch return; + } + + pub fn renderToWriter(self: *Diagnostics, args: []const []const u8, writer: anytype, config: std.io.tty.Config) !void { + for (self.errors.items) |err_details| { + try renderErrorMessage(writer, config, err_details, args); + } + } + + pub fn hasError(self: *const Diagnostics) bool { + for (self.errors.items) |err| { + if (err.type == .err) return true; + } + return false; + } +}; + +pub const Options = struct { + allocator: Allocator, + input_filename: []const u8 = &[_]u8{}, + output_filename: []const u8 = &[_]u8{}, + extra_include_paths: std.ArrayListUnmanaged([]const u8) = .{}, + ignore_include_env_var: bool = false, + preprocess: Preprocess = .yes, + default_language_id: ?u16 = null, + default_code_page: ?CodePage = null, + verbose: bool = false, + symbols: std.StringArrayHashMapUnmanaged(SymbolValue) = .{}, + null_terminate_string_table_strings: bool = false, + max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints, + silent_duplicate_control_ids: bool = false, + warn_instead_of_error_on_invalid_code_page: bool = false, + debug: bool = false, + print_help_and_exit: bool = false, + auto_includes: AutoIncludes = .any, + + pub const AutoIncludes = enum { any, msvc, gnu, none }; + pub const Preprocess = enum { no, yes, only }; + pub const SymbolAction = enum { define, undefine }; + pub const SymbolValue = union(SymbolAction) { + define: []const u8, + undefine: void, + + pub fn deinit(self: SymbolValue, allocator: Allocator) void { + switch (self) { + .define => |value| allocator.free(value), + .undefine => {}, + } + } + }; + + /// Does not check that identifier contains only valid characters + pub fn define(self: *Options, identifier: []const u8, value: []const u8) !void { + if (self.symbols.getPtr(identifier)) |val_ptr| { + // If the symbol is undefined, then that always takes precedence so + // we shouldn't change anything. + if (val_ptr.* == .undefine) return; + // Otherwise, the new value takes precedence. + var duped_value = try self.allocator.dupe(u8, value); + errdefer self.allocator.free(duped_value); + val_ptr.deinit(self.allocator); + val_ptr.* = .{ .define = duped_value }; + return; + } + var duped_key = try self.allocator.dupe(u8, identifier); + errdefer self.allocator.free(duped_key); + var duped_value = try self.allocator.dupe(u8, value); + errdefer self.allocator.free(duped_value); + try self.symbols.put(self.allocator, duped_key, .{ .define = duped_value }); + } + + /// Does not check that identifier contains only valid characters + pub fn undefine(self: *Options, identifier: []const u8) !void { + if (self.symbols.getPtr(identifier)) |action| { + action.deinit(self.allocator); + action.* = .{ .undefine = {} }; + return; + } + var duped_key = try self.allocator.dupe(u8, identifier); + errdefer self.allocator.free(duped_key); + try self.symbols.put(self.allocator, duped_key, .{ .undefine = {} }); + } + + /// If the current input filename both: + /// - does not have an extension, and + /// - does not exist in the cwd + /// then this function will append `.rc` to the input filename + /// + /// Note: This behavior is different from the Win32 compiler. + /// It always appends .RC if the filename does not have + /// a `.` in it and it does not even try the verbatim name + /// in that scenario. + /// + /// The approach taken here is meant to give us a 'best of both + /// worlds' situation where we'll be compatible with most use-cases + /// of the .rc extension being omitted from the CLI args, but still + /// work fine if the file itself does not have an extension. + pub fn maybeAppendRC(options: *Options, cwd: std.fs.Dir) !void { + if (std.fs.path.extension(options.input_filename).len == 0) { + cwd.access(options.input_filename, .{}) catch |err| switch (err) { + error.FileNotFound => { + var filename_bytes = try options.allocator.alloc(u8, options.input_filename.len + 3); + std.mem.copy(u8, filename_bytes, options.input_filename); + std.mem.copy(u8, filename_bytes[filename_bytes.len - 3 ..], ".rc"); + options.allocator.free(options.input_filename); + options.input_filename = filename_bytes; + }, + else => {}, + }; + } + } + + pub fn deinit(self: *Options) void { + for (self.extra_include_paths.items) |extra_include_path| { + self.allocator.free(extra_include_path); + } + self.extra_include_paths.deinit(self.allocator); + self.allocator.free(self.input_filename); + self.allocator.free(self.output_filename); + var symbol_it = self.symbols.iterator(); + while (symbol_it.next()) |entry| { + self.allocator.free(entry.key_ptr.*); + entry.value_ptr.deinit(self.allocator); + } + self.symbols.deinit(self.allocator); + } + + pub fn dumpVerbose(self: *const Options, writer: anytype) !void { + try writer.print("Input filename: {s}\n", .{self.input_filename}); + try writer.print("Output filename: {s}\n", .{self.output_filename}); + if (self.extra_include_paths.items.len > 0) { + try writer.writeAll(" Extra include paths:\n"); + for (self.extra_include_paths.items) |extra_include_path| { + try writer.print(" \"{s}\"\n", .{extra_include_path}); + } + } + if (self.ignore_include_env_var) { + try writer.writeAll(" The INCLUDE environment variable will be ignored\n"); + } + if (self.preprocess == .no) { + try writer.writeAll(" The preprocessor will not be invoked\n"); + } else if (self.preprocess == .only) { + try writer.writeAll(" Only the preprocessor will be invoked\n"); + } + if (self.symbols.count() > 0) { + try writer.writeAll(" Symbols:\n"); + var it = self.symbols.iterator(); + while (it.next()) |symbol| { + try writer.print(" {s} {s}", .{ switch (symbol.value_ptr.*) { + .define => "#define", + .undefine => "#undef", + }, symbol.key_ptr.* }); + if (symbol.value_ptr.* == .define) { + try writer.print(" {s}", .{symbol.value_ptr.define}); + } + try writer.writeAll("\n"); + } + } + if (self.null_terminate_string_table_strings) { + try writer.writeAll(" Strings in string tables will be null-terminated\n"); + } + if (self.max_string_literal_codepoints != lex.default_max_string_literal_codepoints) { + try writer.print(" Max string literal length: {}\n", .{self.max_string_literal_codepoints}); + } + if (self.silent_duplicate_control_ids) { + try writer.writeAll(" Duplicate control IDs will not emit warnings\n"); + } + if (self.silent_duplicate_control_ids) { + try writer.writeAll(" Invalid code page in .rc will produce a warning (instead of an error)\n"); + } + + const language_id = self.default_language_id orelse res.Language.default; + const language_name = language_name: { + if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| { + break :language_name @tagName(lang_enum_val); + } else |_| {} + if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) { + break :language_name "LOCALE_CUSTOM_UNSPECIFIED"; + } + break :language_name ""; + }; + try writer.print("Default language: {s} (id=0x{x})\n", .{ language_name, language_id }); + + const code_page = self.default_code_page orelse .windows1252; + try writer.print("Default codepage: {s} (id={})\n", .{ @tagName(code_page), @intFromEnum(code_page) }); + } +}; + +pub const Arg = struct { + prefix: enum { long, short, slash }, + name_offset: usize, + full: []const u8, + + pub fn fromString(str: []const u8) ?@This() { + if (std.mem.startsWith(u8, str, "--")) { + return .{ .prefix = .long, .name_offset = 2, .full = str }; + } else if (std.mem.startsWith(u8, str, "-")) { + return .{ .prefix = .short, .name_offset = 1, .full = str }; + } else if (std.mem.startsWith(u8, str, "/")) { + return .{ .prefix = .slash, .name_offset = 1, .full = str }; + } + return null; + } + + pub fn prefixSlice(self: Arg) []const u8 { + return self.full[0..(if (self.prefix == .long) 2 else 1)]; + } + + pub fn name(self: Arg) []const u8 { + return self.full[self.name_offset..]; + } + + pub fn optionWithoutPrefix(self: Arg, option_len: usize) []const u8 { + return self.name()[0..option_len]; + } + + pub fn missingSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan { + return .{ + .point_at_next_arg = true, + .value_offset = 0, + .name_offset = self.name_offset, + .prefix_len = self.prefixSlice().len, + }; + } + + pub fn optionAndAfterSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan { + return self.optionSpan(0); + } + + pub fn optionSpan(self: Arg, option_len: usize) Diagnostics.ErrorDetails.ArgSpan { + return .{ + .name_offset = self.name_offset, + .prefix_len = self.prefixSlice().len, + .name_len = option_len, + }; + } + + pub const Value = struct { + slice: []const u8, + index_increment: u2 = 1, + + pub fn argSpan(self: Value, arg: Arg) Diagnostics.ErrorDetails.ArgSpan { + const prefix_len = arg.prefixSlice().len; + switch (self.index_increment) { + 1 => return .{ + .value_offset = @intFromPtr(self.slice.ptr) - @intFromPtr(arg.full.ptr), + .prefix_len = prefix_len, + .name_offset = arg.name_offset, + }, + 2 => return .{ + .point_at_next_arg = true, + .prefix_len = prefix_len, + .name_offset = arg.name_offset, + }, + else => unreachable, + } + } + + pub fn index(self: Value, arg_index: usize) usize { + if (self.index_increment == 2) return arg_index + 1; + return arg_index; + } + }; + + pub fn value(self: Arg, option_len: usize, index: usize, args: []const []const u8) error{MissingValue}!Value { + const rest = self.full[self.name_offset + option_len ..]; + if (rest.len > 0) return .{ .slice = rest }; + if (index + 1 >= args.len) return error.MissingValue; + return .{ .slice = args[index + 1], .index_increment = 2 }; + } + + pub const Context = struct { + index: usize, + arg: Arg, + value: Value, + }; +}; + +pub const ParseError = error{ParseError} || Allocator.Error; + +/// Note: Does not run `Options.maybeAppendRC` automatically. If that behavior is desired, +/// it must be called separately. +pub fn parse(allocator: Allocator, args: []const []const u8, diagnostics: *Diagnostics) ParseError!Options { + var options = Options{ .allocator = allocator }; + errdefer options.deinit(); + + var output_filename: ?[]const u8 = null; + var output_filename_context: Arg.Context = undefined; + + var arg_i: usize = 1; // start at 1 to skip past the exe name + next_arg: while (arg_i < args.len) { + var arg = Arg.fromString(args[arg_i]) orelse break; + if (arg.name().len == 0) { + switch (arg.prefix) { + // -- on its own ends arg parsing + .long => { + arg_i += 1; + break; + }, + // - or / on its own is an error + else => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid option: {s}", .{arg.prefixSlice()}); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + }, + } + } + + while (arg.name().len > 0) { + const arg_name = arg.name(); + // Note: These cases should be in order from longest to shortest, since + // shorter options that are a substring of a longer one could make + // the longer option's branch unreachable. + if (std.ascii.startsWithIgnoreCase(arg_name, ":no-preprocess")) { + options.preprocess = .no; + arg.name_offset += ":no-preprocess".len; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":auto-includes")) { + const value = arg.value(":auto-includes".len, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":auto-includes".len) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + options.auto_includes = std.meta.stringToEnum(Options.AutoIncludes, value.slice) orelse blk: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid auto includes setting: {s} ", .{value.slice}); + try diagnostics.append(err_details); + break :blk options.auto_includes; + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "nologo")) { + // No-op, we don't display any 'logo' to suppress + arg.name_offset += "nologo".len; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":debug")) { + options.debug = true; + arg.name_offset += ":debug".len; + } + // Unsupported LCX/LCE options that need a value (within the same arg only) + else if (std.ascii.startsWithIgnoreCase(arg_name, "tp:")) { + const rest = arg.full[arg.name_offset + 3 ..]; + if (rest.len == 0) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = .{ + .name_offset = arg.name_offset, + .prefix_len = arg.prefixSlice().len, + .value_offset = arg.name_offset + 3, + } }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value for {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) }); + try diagnostics.append(err_details); + } + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) }); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + } + // Unsupported LCX/LCE options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "tn")) { + const value = arg.value(2, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 2; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Unsupported MUI options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "fm") or + std.ascii.startsWithIgnoreCase(arg_name, "gn") or + std.ascii.startsWithIgnoreCase(arg_name, "g2")) + { + const value = arg.value(2, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 2; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Unsupported MUI options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "g1")) { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg.name_offset += 2; + } + // Unsupported LCX/LCE options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "tm") or + std.ascii.startsWithIgnoreCase(arg_name, "tc") or + std.ascii.startsWithIgnoreCase(arg_name, "tw") or + std.ascii.startsWithIgnoreCase(arg_name, "te") or + std.ascii.startsWithIgnoreCase(arg_name, "ti") or + std.ascii.startsWithIgnoreCase(arg_name, "ta")) + { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg.name_offset += 2; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "fo")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing output path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + output_filename_context = .{ .index = arg_i, .arg = arg, .value = value }; + output_filename = value.slice; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "sl")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const percent_str = value.slice; + const percent: u32 = parsePercent(percent_str) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid percent format '{s}'", .{percent_str}); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)"); + try diagnostics.append(note_details); + arg_i += value.index_increment; + continue :next_arg; + }; + if (percent == 0 or percent > 100) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("percent out of range: {} (parsed from '{s}')", .{ percent, percent_str }); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)"); + try diagnostics.append(note_details); + arg_i += value.index_increment; + continue :next_arg; + } + const percent_float = @as(f32, @floatFromInt(percent)) / 100; + options.max_string_literal_codepoints = @intFromFloat(percent_float * max_string_literal_length_100_percent); + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "ln")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const tag = value.slice; + options.default_language_id = lang.tagToInt(tag) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid language tag: {s}", .{tag}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + if (options.default_language_id.? == lang.LOCALE_CUSTOM_UNSPECIFIED) { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("language tag '{s}' does not have an assigned ID so it will be resolved to LOCALE_CUSTOM_UNSPECIFIED (id=0x{x})", .{ tag, lang.LOCALE_CUSTOM_UNSPECIFIED }); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "l")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const num_str = value.slice; + options.default_language_id = lang.parseInt(num_str) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid language ID: {s}", .{num_str}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "h") or std.mem.startsWith(u8, arg_name, "?")) { + options.print_help_and_exit = true; + // If there's been an error to this point, then we still want to fail + if (diagnostics.hasError()) return error.ParseError; + return options; + } + // 1 char unsupported MUI options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "q") or + std.ascii.startsWithIgnoreCase(arg_name, "g")) + { + const value = arg.value(1, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 1; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Undocumented (and unsupported) options that need a value + // /z has to do something with font substitution + // /s has something to do with HWB resources being inserted into the .res + else if (std.ascii.startsWithIgnoreCase(arg_name, "z") or + std.ascii.startsWithIgnoreCase(arg_name, "s")) + { + const value = arg.value(1, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 1; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // 1 char unsupported LCX/LCE options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "t")) { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(1) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "c")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing code page ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const num_str = value.slice; + const code_page_id = std.fmt.parseUnsigned(u16, num_str, 10) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid code page ID: {s}", .{num_str}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + options.default_code_page = CodePage.getByIdentifierEnsureSupported(code_page_id) catch |err| switch (err) { + error.InvalidCodePage => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid or unknown code page ID: {}", .{code_page_id}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }, + error.UnsupportedCodePage => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("unsupported code page: {s} (id={})", .{ + @tagName(CodePage.getByIdentifier(code_page_id) catch unreachable), + code_page_id, + }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }, + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "v")) { + options.verbose = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "x")) { + options.ignore_include_env_var = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "p")) { + options.preprocess = .only; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "i")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing include path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const path = value.slice; + const duped = try allocator.dupe(u8, path); + errdefer allocator.free(duped); + try options.extra_include_paths.append(options.allocator, duped); + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "r")) { + // From https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line- + // "Ignored. Provided for compatibility with existing makefiles." + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "n")) { + options.null_terminate_string_table_strings = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "y")) { + options.silent_duplicate_control_ids = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "w")) { + options.warn_instead_of_error_on_invalid_code_page = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "a")) { + // Undocumented option with unknown function + // TODO: More investigation to figure out what it does (if anything) + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = arg.optionSpan(1) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("option {s}{s} has no effect (it is undocumented and its function is unknown in the Win32 RC compiler)", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "d")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing symbol to define after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + var tokenizer = std.mem.tokenize(u8, value.slice, "="); + // guaranteed to exist since an empty value.slice would invoke + // the 'missing symbol to define' branch above + const symbol = tokenizer.next().?; + const symbol_value = tokenizer.next() orelse "1"; + + if (isValidIdentifier(symbol)) { + try options.define(symbol, symbol_value); + } else { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be defined", .{symbol}); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "u")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing symbol to undefine after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const symbol = value.slice; + if (isValidIdentifier(symbol)) { + try options.undefine(symbol); + } else { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be undefined", .{symbol}); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid option: {s}{s}", .{ arg.prefixSlice(), arg.name() }); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + } + } else { + // The while loop exited via its conditional, meaning we are done with + // the current arg and can move on the the next + arg_i += 1; + continue; + } + } + + var positionals = args[arg_i..]; + + if (positionals.len < 1) { + var err_details = Diagnostics.ErrorDetails{ .print_args = false, .arg_index = arg_i }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.writeAll("missing input filename"); + try diagnostics.append(err_details); + + const last_arg = args[args.len - 1]; + if (arg_i > 1 and last_arg.len > 0 and last_arg[0] == '/' and std.ascii.endsWithIgnoreCase(last_arg, ".rc")) { + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = true, .arg_index = arg_i - 1 }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing"); + try diagnostics.append(note_details); + } + + // This is a fatal enough problem to justify an early return, since + // things after this rely on the value of the input filename. + return error.ParseError; + } + options.input_filename = try allocator.dupe(u8, positionals[0]); + + if (positionals.len > 1) { + if (output_filename != null) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i + 1 }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.writeAll("output filename already specified"); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ + .type = .note, + .arg_index = output_filename_context.value.index(output_filename_context.index), + .arg_span = output_filename_context.value.argSpan(output_filename_context.arg), + }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("output filename previously specified here"); + try diagnostics.append(note_details); + } else { + output_filename = positionals[1]; + } + } + if (output_filename == null) { + var buf = std.ArrayList(u8).init(allocator); + errdefer buf.deinit(); + + if (std.fs.path.dirname(options.input_filename)) |dirname| { + var end_pos = dirname.len; + // We want to ensure that we write a path separator at the end, so if the dirname + // doesn't end with a path sep then include the char after the dirname + // which must be a path sep. + if (!std.fs.path.isSep(dirname[dirname.len - 1])) end_pos += 1; + try buf.appendSlice(options.input_filename[0..end_pos]); + } + try buf.appendSlice(std.fs.path.stem(options.input_filename)); + if (options.preprocess == .only) { + try buf.appendSlice(".rcpp"); + } else { + try buf.appendSlice(".res"); + } + + options.output_filename = try buf.toOwnedSlice(); + } else { + options.output_filename = try allocator.dupe(u8, output_filename.?); + } + + if (diagnostics.hasError()) { + return error.ParseError; + } + + return options; +} + +/// Returns true if the str is a valid C identifier for use in a #define/#undef macro +pub fn isValidIdentifier(str: []const u8) bool { + for (str, 0..) |c, i| switch (c) { + '0'...'9' => if (i == 0) return false, + 'a'...'z', 'A'...'Z', '_' => {}, + else => return false, + }; + return true; +} + +/// This function is specific to how the Win32 RC command line interprets +/// max string literal length percent. +/// - Wraps on overflow of u32 +/// - Stops parsing on any invalid hexadecimal digits +/// - Errors if a digit is not the first char +/// - `-` (negative) prefix is allowed +pub fn parsePercent(str: []const u8) error{InvalidFormat}!u32 { + var result: u32 = 0; + const radix: u8 = 10; + var buf = str; + + const Prefix = enum { none, minus }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + else => {}, + } + + for (buf, 0..) |c, i| { + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + '0'...'9' => std.fmt.charToDigit(c, radix) catch break, + else => { + // First digit must be valid + if (i == 0) { + return error.InvalidFormat; + } + break; + }, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result = 0 -% result, + } + + return result; +} + +test parsePercent { + try std.testing.expectEqual(@as(u32, 16), try parsePercent("16")); + try std.testing.expectEqual(@as(u32, 0), try parsePercent("0x1A")); + try std.testing.expectEqual(@as(u32, 0x1), try parsePercent("1zzzz")); + try std.testing.expectEqual(@as(u32, 0xffffffff), try parsePercent("-1")); + try std.testing.expectEqual(@as(u32, 0xfffffff0), try parsePercent("-16")); + try std.testing.expectEqual(@as(u32, 1), try parsePercent("4294967297")); + try std.testing.expectError(error.InvalidFormat, parsePercent("--1")); + try std.testing.expectError(error.InvalidFormat, parsePercent("ha")); + try std.testing.expectError(error.InvalidFormat, parsePercent("¹")); + try std.testing.expectError(error.InvalidFormat, parsePercent("~1")); +} + +pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, err_details: Diagnostics.ErrorDetails, args: []const []const u8) !void { + try config.setColor(writer, .dim); + try writer.writeAll(""); + try config.setColor(writer, .reset); + try config.setColor(writer, .bold); + try writer.writeAll(": "); + switch (err_details.type) { + .err => { + try config.setColor(writer, .red); + try writer.writeAll("error: "); + }, + .warning => { + try config.setColor(writer, .yellow); + try writer.writeAll("warning: "); + }, + .note => { + try config.setColor(writer, .cyan); + try writer.writeAll("note: "); + }, + } + try config.setColor(writer, .reset); + try config.setColor(writer, .bold); + try writer.writeAll(err_details.msg.items); + try writer.writeByte('\n'); + try config.setColor(writer, .reset); + + if (!err_details.print_args) { + try writer.writeByte('\n'); + return; + } + + try config.setColor(writer, .dim); + const prefix = " ... "; + try writer.writeAll(prefix); + try config.setColor(writer, .reset); + + const arg_with_name = args[err_details.arg_index]; + const prefix_slice = arg_with_name[0..err_details.arg_span.prefix_len]; + const before_name_slice = arg_with_name[err_details.arg_span.prefix_len..err_details.arg_span.name_offset]; + var name_slice = arg_with_name[err_details.arg_span.name_offset..]; + if (err_details.arg_span.name_len > 0) name_slice.len = err_details.arg_span.name_len; + const after_name_slice = arg_with_name[err_details.arg_span.name_offset + name_slice.len ..]; + + try writer.writeAll(prefix_slice); + if (before_name_slice.len > 0) { + try config.setColor(writer, .dim); + try writer.writeAll(before_name_slice); + try config.setColor(writer, .reset); + } + try writer.writeAll(name_slice); + if (after_name_slice.len > 0) { + try config.setColor(writer, .dim); + try writer.writeAll(after_name_slice); + try config.setColor(writer, .reset); + } + + var next_arg_len: usize = 0; + if (err_details.arg_span.point_at_next_arg and err_details.arg_index + 1 < args.len) { + const next_arg = args[err_details.arg_index + 1]; + try writer.writeByte(' '); + try writer.writeAll(next_arg); + next_arg_len = next_arg.len; + } + + const last_shown_arg_index = if (err_details.arg_span.point_at_next_arg) err_details.arg_index + 1 else err_details.arg_index; + if (last_shown_arg_index + 1 < args.len) { + // special case for when pointing to a missing value within the same arg + // as the name + if (err_details.arg_span.value_offset >= arg_with_name.len) { + try writer.writeByte(' '); + } + try config.setColor(writer, .dim); + try writer.writeAll(" ..."); + try config.setColor(writer, .reset); + } + try writer.writeByte('\n'); + + try config.setColor(writer, .green); + try writer.writeByteNTimes(' ', prefix.len); + // Special case for when the option is *only* a prefix (e.g. invalid option: -) + if (err_details.arg_span.prefix_len == arg_with_name.len) { + try writer.writeByteNTimes('^', err_details.arg_span.prefix_len); + } else { + try writer.writeByteNTimes('~', err_details.arg_span.prefix_len); + try writer.writeByteNTimes(' ', err_details.arg_span.name_offset - err_details.arg_span.prefix_len); + if (!err_details.arg_span.point_at_next_arg and err_details.arg_span.value_offset == 0) { + try writer.writeByte('^'); + try writer.writeByteNTimes('~', name_slice.len - 1); + } else if (err_details.arg_span.value_offset > 0) { + try writer.writeByteNTimes('~', err_details.arg_span.value_offset - err_details.arg_span.name_offset); + try writer.writeByte('^'); + if (err_details.arg_span.value_offset < arg_with_name.len) { + try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.value_offset - 1); + } + } else if (err_details.arg_span.point_at_next_arg) { + try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.name_offset + 1); + try writer.writeByte('^'); + if (next_arg_len > 0) { + try writer.writeByteNTimes('~', next_arg_len - 1); + } + } + } + try writer.writeByte('\n'); + try config.setColor(writer, .reset); +} + +fn testParse(args: []const []const u8) !Options { + return (try testParseOutput(args, "")).?; +} + +fn testParseWarning(args: []const []const u8, expected_output: []const u8) !Options { + return (try testParseOutput(args, expected_output)).?; +} + +fn testParseError(args: []const []const u8, expected_output: []const u8) !void { + var maybe_options = try testParseOutput(args, expected_output); + if (maybe_options != null) { + std.debug.print("expected error, got options: {}\n", .{maybe_options.?}); + maybe_options.?.deinit(); + return error.TestExpectedError; + } +} + +fn testParseOutput(args: []const []const u8, expected_output: []const u8) !?Options { + var diagnostics = Diagnostics.init(std.testing.allocator); + defer diagnostics.deinit(); + + var output = std.ArrayList(u8).init(std.testing.allocator); + defer output.deinit(); + + var options = parse(std.testing.allocator, args, &diagnostics) catch |err| switch (err) { + error.ParseError => { + try diagnostics.renderToWriter(args, output.writer(), .no_color); + try std.testing.expectEqualStrings(expected_output, output.items); + return null; + }, + else => |e| return e, + }; + errdefer options.deinit(); + + try diagnostics.renderToWriter(args, output.writer(), .no_color); + try std.testing.expectEqualStrings(expected_output, output.items); + return options; +} + +test "parse errors: basic" { + try testParseError(&.{ "foo.exe", "/" }, + \\: error: invalid option: / + \\ ... / + \\ ^ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/ln" }, + \\: error: missing language tag after /ln option + \\ ... /ln + \\ ~~~~^ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "-vln" }, + \\: error: missing language tag after -ln option + \\ ... -vln + \\ ~ ~~~^ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/_not-an-option" }, + \\: error: invalid option: /_not-an-option + \\ ... /_not-an-option + \\ ~^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "-_not-an-option" }, + \\: error: invalid option: -_not-an-option + \\ ... -_not-an-option + \\ ~^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "--_not-an-option" }, + \\: error: invalid option: --_not-an-option + \\ ... --_not-an-option + \\ ~~^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/v_not-an-option" }, + \\: error: invalid option: /_not-an-option + \\ ... /v_not-an-option + \\ ~ ^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "-v_not-an-option" }, + \\: error: invalid option: -_not-an-option + \\ ... -v_not-an-option + \\ ~ ^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "--v_not-an-option" }, + \\: error: invalid option: --_not-an-option + \\ ... --v_not-an-option + \\ ~~ ^~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/some/absolute/path/parsed/as/an/option.rc" }, + \\: error: the /s option is unsupported + \\ ... /some/absolute/path/parsed/as/an/option.rc + \\ ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + \\: error: missing input filename + \\ + \\: note: if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing + \\ ... /some/absolute/path/parsed/as/an/option.rc + \\ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + \\ + ); +} + +test "parse errors: /ln" { + try testParseError(&.{ "foo.exe", "/ln", "invalid", "foo.rc" }, + \\: error: invalid language tag: invalid + \\ ... /ln invalid ... + \\ ~~~~^~~~~~~ + \\ + ); + try testParseError(&.{ "foo.exe", "/lninvalid", "foo.rc" }, + \\: error: invalid language tag: invalid + \\ ... /lninvalid ... + \\ ~~~^~~~~~~ + \\ + ); +} + +test "parse: options" { + { + var options = try testParse(&.{ "foo.exe", "/v", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "foo.exe", "/vx", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "foo.exe", "/xv", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "foo.exe", "/xvFObar.res", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("bar.res", options.output_filename); + } +} + +test "parse: define and undefine" { + { + var options = try testParse(&.{ "foo.exe", "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.define, action); + try std.testing.expectEqualStrings("1", action.define); + } + { + var options = try testParse(&.{ "foo.exe", "/dfoo=bar", "/dfoo=baz", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.define, action); + try std.testing.expectEqualStrings("baz", action.define); + } + { + var options = try testParse(&.{ "foo.exe", "/ufoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Once undefined, future defines are ignored + var options = try testParse(&.{ "foo.exe", "/ufoo", "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Undefined always takes precedence + var options = try testParse(&.{ "foo.exe", "/dfoo", "/ufoo", "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Warn + ignore invalid identifiers + var options = try testParseWarning( + &.{ "foo.exe", "/dfoo bar", "/u", "0leadingdigit", "foo.rc" }, + \\: warning: symbol "foo bar" is not a valid identifier and therefore cannot be defined + \\ ... /dfoo bar ... + \\ ~~^~~~~~~ + \\: warning: symbol "0leadingdigit" is not a valid identifier and therefore cannot be undefined + \\ ... /u 0leadingdigit ... + \\ ~~~^~~~~~~~~~~~~ + \\ + , + ); + defer options.deinit(); + + try std.testing.expectEqual(@as(usize, 0), options.symbols.count()); + } +} + +test "parse: /sl" { + try testParseError(&.{ "foo.exe", "/sl", "0", "foo.rc" }, + \\: error: percent out of range: 0 (parsed from '0') + \\ ... /sl 0 ... + \\ ~~~~^ + \\: note: string length percent must be an integer between 1 and 100 (inclusive) + \\ + \\ + ); + try testParseError(&.{ "foo.exe", "/sl", "abcd", "foo.rc" }, + \\: error: invalid percent format 'abcd' + \\ ... /sl abcd ... + \\ ~~~~^~~~ + \\: note: string length percent must be an integer between 1 and 100 (inclusive) + \\ + \\ + ); + { + var options = try testParse(&.{ "foo.exe", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, lex.default_max_string_literal_codepoints), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "foo.exe", "/sl100", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, max_string_literal_length_100_percent), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "foo.exe", "-SL33", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, 2703), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "foo.exe", "/sl15", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, 1228), options.max_string_literal_codepoints); + } +} + +test "parse: unsupported MUI-related options" { + try testParseError(&.{ "foo.exe", "/q", "blah", "/g1", "-G2", "blah", "/fm", "blah", "/g", "blah", "foo.rc" }, + \\: error: the /q option is unsupported + \\ ... /q ... + \\ ~^ + \\: error: the /g1 option is unsupported + \\ ... /g1 ... + \\ ~^~ + \\: error: the -G2 option is unsupported + \\ ... -G2 ... + \\ ~^~ + \\: error: the /fm option is unsupported + \\ ... /fm ... + \\ ~^~ + \\: error: the /g option is unsupported + \\ ... /g ... + \\ ~^ + \\ + ); +} + +test "parse: unsupported LCX/LCE-related options" { + try testParseError(&.{ "foo.exe", "/t", "/tp:", "/tp:blah", "/tm", "/tc", "/tw", "-TEti", "/ta", "/tn", "blah", "foo.rc" }, + \\: error: the /t option is unsupported + \\ ... /t ... + \\ ~^ + \\: error: missing value for /tp: option + \\ ... /tp: ... + \\ ~~~~^ + \\: error: the /tp: option is unsupported + \\ ... /tp: ... + \\ ~^~~ + \\: error: the /tp: option is unsupported + \\ ... /tp:blah ... + \\ ~^~~~~~~ + \\: error: the /tm option is unsupported + \\ ... /tm ... + \\ ~^~ + \\: error: the /tc option is unsupported + \\ ... /tc ... + \\ ~^~ + \\: error: the /tw option is unsupported + \\ ... /tw ... + \\ ~^~ + \\: error: the -TE option is unsupported + \\ ... -TEti ... + \\ ~^~ + \\: error: the -ti option is unsupported + \\ ... -TEti ... + \\ ~ ^~ + \\: error: the /ta option is unsupported + \\ ... /ta ... + \\ ~^~ + \\: error: the /tn option is unsupported + \\ ... /tn ... + \\ ~^~ + \\ + ); +} + +test "maybeAppendRC" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + var options = try testParse(&.{ "foo.exe", "foo" }); + defer options.deinit(); + try std.testing.expectEqualStrings("foo", options.input_filename); + + // Create the file so that it's found. In this scenario, .rc should not get + // appended. + var file = try tmp.dir.createFile("foo", .{}); + file.close(); + try options.maybeAppendRC(tmp.dir); + try std.testing.expectEqualStrings("foo", options.input_filename); + + // Now delete the file and try again. Since the verbatim name is no longer found + // and the input filename does not have an extension, .rc should get appended. + try tmp.dir.deleteFile("foo"); + try options.maybeAppendRC(tmp.dir); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); +} diff --git a/src/resinator/code_pages.zig b/src/resinator/code_pages.zig new file mode 100644 index 000000000000..4b9a87ce7a55 --- /dev/null +++ b/src/resinator/code_pages.zig @@ -0,0 +1,487 @@ +const std = @import("std"); +const windows1252 = @import("windows1252.zig"); + +// TODO: Parts of this comment block may be more relevant to string/NameOrOrdinal parsing +// than it is to the stuff in this file. +// +// ‰ representations for context: +// Win-1252 89 +// UTF-8 E2 80 B0 +// UTF-16 20 30 +// +// With code page 65001: +// ‰ RCDATA { "‰" L"‰" } +// File encoded as Windows-1252: +// ‰ => as u16 +// "‰" => 0x3F ('?') +// L"‰" => as u16 +// File encoded as UTF-8: +// ‰ => as u16 +// "‰" => 0x89 ('‰' encoded as Windows-1252) +// L"‰" => as u16 +// +// With code page 1252: +// ‰ RCDATA { "‰" L"‰" } +// File encoded as Windows-1252: +// ‰ => as u16 +// "‰" => 0x89 ('‰' encoded as Windows-1252) +// L"‰" => as u16 +// File encoded as UTF-8: +// ‰ => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 +// ^ first byte of utf8 representation +// ^ second byte of UTF-8 representation (0x80), but interpretted as +// Windows-1252 ('€') and then converted to UTF-16 () +// ^ third byte of utf8 representation +// "‰" => 0xE2, 0x80, 0xB0 (the bytes of the UTF-8 representation) +// L"‰" => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 (see '‰ =>' explanation) +// +// With code page 1252: +// <0x90> RCDATA { "<0x90>" L"<0x90>" } +// File encoded as Windows-1252: +// <0x90> => 0x90 as u16 +// "<0x90>" => 0x90 +// L"<0x90>" => 0x90 as u16 +// File encoded as UTF-8: +// <0x90> => 0xC2 as u16, 0x90 as u16 +// "<0x90>" => 0xC2, 0x90 (the bytes of the UTF-8 representation of ) +// L"<0x90>" => 0xC2 as u16, 0x90 as u16 +// +// Within a raw data block, file encoded as Windows-1252 ( is <0xC2>): +// "Âa" L"Âa" "\xC2ad" L"\xC2AD" +// With code page 1252: +// C2 61 C2 00 61 00 C2 61 64 AD C2 +// Â^ a^ Â~~~^ a~~~^ .^ a^ d^ ^~~~~\xC2AD +// \xC2~` +// With code page 65001: +// 3F 61 FD FF 61 00 C2 61 64 AD C2 +// ^. a^ ^~~~. a~~~^ ^. a^ d^ ^~~~~\xC2AD +// `. `. `~\xC2 +// `. `.~<0xC2>a is not well-formed UTF-8 (0xC2 expects a continutation byte after it). +// `. Because 'a' is a valid first byte of a UTF-8 sequence, it is not included in the +// `. invalid sequence so only the <0xC2> gets converted to . +// `~Same as ^ but converted to '?' instead. +// +// Within a raw data block, file encoded as Windows-1252 (ð is <0xF0>, € is <0x80>): +// "ð€a" L"ð€a" +// With code page 1252: +// F0 80 61 F0 00 AC 20 61 00 +// ð^ €^ a^ ð~~~^ €~~~^ a~~~^ +// With code page 65001: +// 3F 61 FD FF 61 00 +// ^. a^ ^~~~. a~~~^ +// `. `. +// `. `.~<0xF0><0x80> is not well-formed UTF-8, and <0x80> is not a valid first byte, so +// `. both bytes are considered an invalid sequence and get converted to '' +// `~Same as ^ but converted to '?' instead. + +/// https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers +pub const CodePage = enum(u16) { + // supported + windows1252 = 1252, // windows-1252 ANSI Latin 1; Western European (Windows) + utf8 = 65001, // utf-8 Unicode (UTF-8) + + // unsupported but valid + ibm037 = 37, // IBM037 IBM EBCDIC US-Canada + ibm437 = 437, // IBM437 OEM United States + ibm500 = 500, // IBM500 IBM EBCDIC International + asmo708 = 708, // ASMO-708 Arabic (ASMO 708) + asmo449plus = 709, // Arabic (ASMO-449+, BCON V4) + transparent_arabic = 710, // Arabic - Transparent Arabic + dos720 = 720, // DOS-720 Arabic (Transparent ASMO); Arabic (DOS) + ibm737 = 737, // ibm737 OEM Greek (formerly 437G); Greek (DOS) + ibm775 = 775, // ibm775 OEM Baltic; Baltic (DOS) + ibm850 = 850, // ibm850 OEM Multilingual Latin 1; Western European (DOS) + ibm852 = 852, // ibm852 OEM Latin 2; Central European (DOS) + ibm855 = 855, // IBM855 OEM Cyrillic (primarily Russian) + ibm857 = 857, // ibm857 OEM Turkish; Turkish (DOS) + ibm00858 = 858, // IBM00858 OEM Multilingual Latin 1 + Euro symbol + ibm860 = 860, // IBM860 OEM Portuguese; Portuguese (DOS) + ibm861 = 861, // ibm861 OEM Icelandic; Icelandic (DOS) + dos862 = 862, // DOS-862 OEM Hebrew; Hebrew (DOS) + ibm863 = 863, // IBM863 OEM French Canadian; French Canadian (DOS) + ibm864 = 864, // IBM864 OEM Arabic; Arabic (864) + ibm865 = 865, // IBM865 OEM Nordic; Nordic (DOS) + cp866 = 866, // cp866 OEM Russian; Cyrillic (DOS) + ibm869 = 869, // ibm869 OEM Modern Greek; Greek, Modern (DOS) + ibm870 = 870, // IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 + windows874 = 874, // windows-874 Thai (Windows) + cp875 = 875, // cp875 IBM EBCDIC Greek Modern + shift_jis = 932, // shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS) + gb2312 = 936, // gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) + ks_c_5601_1987 = 949, // ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code) + big5 = 950, // big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) + ibm1026 = 1026, // IBM1026 IBM EBCDIC Turkish (Latin 5) + ibm01047 = 1047, // IBM01047 IBM EBCDIC Latin 1/Open System + ibm01140 = 1140, // IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) + ibm01141 = 1141, // IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) + ibm01142 = 1142, // IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) + ibm01143 = 1143, // IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) + ibm01144 = 1144, // IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) + ibm01145 = 1145, // IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) + ibm01146 = 1146, // IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) + ibm01147 = 1147, // IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) + ibm01148 = 1148, // IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) + ibm01149 = 1149, // IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) + utf16 = 1200, // utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications + utf16_fffe = 1201, // unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications + windows1250 = 1250, // windows-1250 ANSI Central European; Central European (Windows) + windows1251 = 1251, // windows-1251 ANSI Cyrillic; Cyrillic (Windows) + windows1253 = 1253, // windows-1253 ANSI Greek; Greek (Windows) + windows1254 = 1254, // windows-1254 ANSI Turkish; Turkish (Windows) + windows1255 = 1255, // windows-1255 ANSI Hebrew; Hebrew (Windows) + windows1256 = 1256, // windows-1256 ANSI Arabic; Arabic (Windows) + windows1257 = 1257, // windows-1257 ANSI Baltic; Baltic (Windows) + windows1258 = 1258, // windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows) + johab = 1361, // Johab Korean (Johab) + macintosh = 10000, // macintosh MAC Roman; Western European (Mac) + x_mac_japanese = 10001, // x-mac-japanese Japanese (Mac) + x_mac_chinesetrad = 10002, // x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac) + x_mac_korean = 10003, // x-mac-korean Korean (Mac) + x_mac_arabic = 10004, // x-mac-arabic Arabic (Mac) + x_mac_hebrew = 10005, // x-mac-hebrew Hebrew (Mac) + x_mac_greek = 10006, // x-mac-greek Greek (Mac) + x_mac_cyrillic = 10007, // x-mac-cyrillic Cyrillic (Mac) + x_mac_chinesesimp = 10008, // x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) + x_mac_romanian = 10010, // x-mac-romanian Romanian (Mac) + x_mac_ukranian = 10017, // x-mac-ukrainian Ukrainian (Mac) + x_mac_thai = 10021, // x-mac-thai Thai (Mac) + x_mac_ce = 10029, // x-mac-ce MAC Latin 2; Central European (Mac) + x_mac_icelandic = 10079, // x-mac-icelandic Icelandic (Mac) + x_mac_turkish = 10081, // x-mac-turkish Turkish (Mac) + x_mac_croatian = 10082, // x-mac-croatian Croatian (Mac) + utf32 = 12000, // utf-32 Unicode UTF-32, little endian byte order; available only to managed applications + utf32_be = 12001, // utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications + x_chinese_cns = 20000, // x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS) + x_cp20001 = 20001, // x-cp20001 TCA Taiwan + x_chinese_eten = 20002, // x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten) + x_cp20003 = 20003, // x-cp20003 IBM5550 Taiwan + x_cp20004 = 20004, // x-cp20004 TeleText Taiwan + x_cp20005 = 20005, // x-cp20005 Wang Taiwan + x_ia5 = 20105, // x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) + x_ia5_german = 20106, // x-IA5-German IA5 German (7-bit) + x_ia5_swedish = 20107, // x-IA5-Swedish IA5 Swedish (7-bit) + x_ia5_norwegian = 20108, // x-IA5-Norwegian IA5 Norwegian (7-bit) + us_ascii = 20127, // us-ascii US-ASCII (7-bit) + x_cp20261 = 20261, // x-cp20261 T.61 + x_cp20269 = 20269, // x-cp20269 ISO 6937 Non-Spacing Accent + ibm273 = 20273, // IBM273 IBM EBCDIC Germany + ibm277 = 20277, // IBM277 IBM EBCDIC Denmark-Norway + ibm278 = 20278, // IBM278 IBM EBCDIC Finland-Sweden + ibm280 = 20280, // IBM280 IBM EBCDIC Italy + ibm284 = 20284, // IBM284 IBM EBCDIC Latin America-Spain + ibm285 = 20285, // IBM285 IBM EBCDIC United Kingdom + ibm290 = 20290, // IBM290 IBM EBCDIC Japanese Katakana Extended + ibm297 = 20297, // IBM297 IBM EBCDIC France + ibm420 = 20420, // IBM420 IBM EBCDIC Arabic + ibm423 = 20423, // IBM423 IBM EBCDIC Greek + ibm424 = 20424, // IBM424 IBM EBCDIC Hebrew + x_ebcdic_korean_extended = 20833, // x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended + ibm_thai = 20838, // IBM-Thai IBM EBCDIC Thai + koi8_r = 20866, // koi8-r Russian (KOI8-R); Cyrillic (KOI8-R) + ibm871 = 20871, // IBM871 IBM EBCDIC Icelandic + ibm880 = 20880, // IBM880 IBM EBCDIC Cyrillic Russian + ibm905 = 20905, // IBM905 IBM EBCDIC Turkish + ibm00924 = 20924, // IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) + euc_jp_jis = 20932, // EUC-JP Japanese (JIS 0208-1990 and 0212-1990) + x_cp20936 = 20936, // x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) + x_cp20949 = 20949, // x-cp20949 Korean Wansung + cp1025 = 21025, // cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian + // = 21027, // (deprecated) + koi8_u = 21866, // koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U) + iso8859_1 = 28591, // iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO) + iso8859_2 = 28592, // iso-8859-2 ISO 8859-2 Central European; Central European (ISO) + iso8859_3 = 28593, // iso-8859-3 ISO 8859-3 Latin 3 + iso8859_4 = 28594, // iso-8859-4 ISO 8859-4 Baltic + iso8859_5 = 28595, // iso-8859-5 ISO 8859-5 Cyrillic + iso8859_6 = 28596, // iso-8859-6 ISO 8859-6 Arabic + iso8859_7 = 28597, // iso-8859-7 ISO 8859-7 Greek + iso8859_8 = 28598, // iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual) + iso8859_9 = 28599, // iso-8859-9 ISO 8859-9 Turkish + iso8859_13 = 28603, // iso-8859-13 ISO 8859-13 Estonian + iso8859_15 = 28605, // iso-8859-15 ISO 8859-15 Latin 9 + x_europa = 29001, // x-Europa Europa 3 + is8859_8_i = 38598, // iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical) + iso2022_jp = 50220, // iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) + cs_iso2022_jp = 50221, // csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) + iso2022_jp_jis_x = 50222, // iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) + iso2022_kr = 50225, // iso-2022-kr ISO 2022 Korean + x_cp50227 = 50227, // x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) + iso2022_chinesetrad = 50229, // ISO 2022 Traditional Chinese + ebcdic_jp_katakana_extended = 50930, // EBCDIC Japanese (Katakana) Extended + ebcdic_us_ca_jp = 50931, // EBCDIC US-Canada and Japanese + ebcdic_kr_extended = 50933, // EBCDIC Korean Extended and Korean + ebcdic_chinesesimp_extended = 50935, // EBCDIC Simplified Chinese Extended and Simplified Chinese + ebcdic_chinesesimp = 50936, // EBCDIC Simplified Chinese + ebcdic_us_ca_chinesetrad = 50937, // EBCDIC US-Canada and Traditional Chinese + ebcdic_jp_latin_extended = 50939, // EBCDIC Japanese (Latin) Extended and Japanese + euc_jp = 51932, // euc-jp EUC Japanese + euc_cn = 51936, // EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC) + euc_kr = 51949, // euc-kr EUC Korean + euc_chinesetrad = 51950, // EUC Traditional Chinese + hz_gb2312 = 52936, // hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) + gb18030 = 54936, // GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) + x_iscii_de = 57002, // x-iscii-de ISCII Devanagari + x_iscii_be = 57003, // x-iscii-be ISCII Bangla + x_iscii_ta = 57004, // x-iscii-ta ISCII Tamil + x_iscii_te = 57005, // x-iscii-te ISCII Telugu + x_iscii_as = 57006, // x-iscii-as ISCII Assamese + x_iscii_or = 57007, // x-iscii-or ISCII Odia + x_iscii_ka = 57008, // x-iscii-ka ISCII Kannada + x_iscii_ma = 57009, // x-iscii-ma ISCII Malayalam + x_iscii_gu = 57010, // x-iscii-gu ISCII Gujarati + x_iscii_pa = 57011, // x-iscii-pa ISCII Punjabi + utf7 = 65000, // utf-7 Unicode (UTF-7) + + pub fn codepointAt(code_page: CodePage, index: usize, bytes: []const u8) ?Codepoint { + if (index >= bytes.len) return null; + switch (code_page) { + .windows1252 => { + // All byte values have a representation, so just convert the byte + return Codepoint{ + .value = windows1252.toCodepoint(bytes[index]), + .byte_len = 1, + }; + }, + .utf8 => { + return Utf8.WellFormedDecoder.decode(bytes[index..]); + }, + else => unreachable, + } + } + + pub fn isSupported(code_page: CodePage) bool { + return switch (code_page) { + .windows1252, .utf8 => true, + else => false, + }; + } + + pub fn getByIdentifier(identifier: u16) !CodePage { + // There's probably a more efficient way to do this (e.g. ComptimeHashMap?) but + // this should be fine, especially since this function likely won't be called much. + inline for (@typeInfo(CodePage).Enum.fields) |enumField| { + if (identifier == enumField.value) { + return @field(CodePage, enumField.name); + } + } + return error.InvalidCodePage; + } + + pub fn getByIdentifierEnsureSupported(identifier: u16) !CodePage { + const code_page = try getByIdentifier(identifier); + switch (isSupported(code_page)) { + true => return code_page, + false => return error.UnsupportedCodePage, + } + } +}; + +pub const Utf8 = struct { + /// Implements decoding with rejection of ill-formed UTF-8 sequences based on section + /// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically). + pub const WellFormedDecoder = struct { + /// Like std.unicode.utf8ByteSequenceLength, but: + /// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF + /// - Returns an optional value instead of an error union + pub fn sequenceLength(first_byte: u8) ?u3 { + return switch (first_byte) { + 0x00...0x7F => 1, + 0xC2...0xDF => 2, + 0xE0...0xEF => 3, + 0xF0...0xF4 => 4, + else => null, + }; + } + + fn isContinuationByte(byte: u8) bool { + return switch (byte) { + 0x80...0xBF => true, + else => false, + }; + } + + pub fn decode(bytes: []const u8) Codepoint { + std.debug.assert(bytes.len > 0); + var first_byte = bytes[0]; + var expected_len = sequenceLength(first_byte) orelse { + return .{ .value = Codepoint.invalid, .byte_len = 1 }; + }; + if (expected_len == 1) return .{ .value = first_byte, .byte_len = 1 }; + + var value: u21 = first_byte & 0b00011111; + var byte_index: u8 = 1; + while (byte_index < @min(bytes.len, expected_len)) : (byte_index += 1) { + const byte = bytes[byte_index]; + // See Table 3-7 of D92 in Chapter 3 of the Unicode Standard + const valid: bool = switch (byte_index) { + 1 => switch (first_byte) { + 0xE0 => switch (byte) { + 0xA0...0xBF => true, + else => false, + }, + 0xED => switch (byte) { + 0x80...0x9F => true, + else => false, + }, + 0xF0 => switch (byte) { + 0x90...0xBF => true, + else => false, + }, + 0xF4 => switch (byte) { + 0x80...0x8F => true, + else => false, + }, + else => switch (byte) { + 0x80...0xBF => true, + else => false, + }, + }, + else => switch (byte) { + 0x80...0xBF => true, + else => false, + }, + }; + + if (!valid) { + var len = byte_index; + // Only include the byte in the invalid sequence if it's in the range + // of a continuation byte. All other values should not be included in the + // invalid sequence. + // + // Note: This is how the Windows RC compiler handles this, this may not + // be the correct-as-according-to-the-Unicode-standard way to do it. + if (isContinuationByte(byte)) len += 1; + return .{ .value = Codepoint.invalid, .byte_len = len }; + } + + value <<= 6; + value |= byte & 0b00111111; + } + if (byte_index != expected_len) { + return .{ .value = Codepoint.invalid, .byte_len = byte_index }; + } + return .{ .value = value, .byte_len = expected_len }; + } + }; +}; + +test "Utf8.WellFormedDecoder" { + const invalid_utf8 = "\xF0\x80"; + var decoded = Utf8.WellFormedDecoder.decode(invalid_utf8); + try std.testing.expectEqual(Codepoint.invalid, decoded.value); + try std.testing.expectEqual(@as(usize, 2), decoded.byte_len); +} + +test "codepointAt invalid utf8" { + { + const invalid_utf8 = "\xf0\xf0\x80\x80\x80"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(1, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(3, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(4, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(5, invalid_utf8)); + } + + { + const invalid_utf8 = "\xE1\xA0\xC0"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(2, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(3, invalid_utf8)); + } + + { + const invalid_utf8 = "\xD2"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, invalid_utf8)); + } + + { + const invalid_utf8 = "\xE1\xA0"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8)); + } + + { + const invalid_utf8 = "\xC5\xFF"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(1, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8)); + } +} + +test "codepointAt utf8 encoded" { + const utf8_encoded = "²"; + + // with code page utf8 + try std.testing.expectEqual(Codepoint{ + .value = '²', + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, utf8_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, utf8_encoded)); + + // with code page windows1252 + try std.testing.expectEqual(Codepoint{ + .value = '\xC2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(0, utf8_encoded).?); + try std.testing.expectEqual(Codepoint{ + .value = '\xB2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(1, utf8_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, utf8_encoded)); +} + +test "codepointAt windows1252 encoded" { + const windows1252_encoded = "\xB2"; + + // with code page utf8 + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, windows1252_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, windows1252_encoded)); + + // with code page windows1252 + try std.testing.expectEqual(Codepoint{ + .value = '\xB2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(0, windows1252_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, windows1252_encoded)); +} + +pub const Codepoint = struct { + value: u21, + byte_len: usize, + + pub const invalid: u21 = std.math.maxInt(u21); +}; diff --git a/src/resinator/comments.zig b/src/resinator/comments.zig new file mode 100644 index 000000000000..cfb27ae34174 --- /dev/null +++ b/src/resinator/comments.zig @@ -0,0 +1,340 @@ +//! Expects to run after a C preprocessor step that preserves comments. +//! +//! `rc` has a peculiar quirk where something like `blah/**/blah` will be +//! transformed into `blahblah` during parsing. However, `clang -E` will +//! transform it into `blah blah`, so in order to match `rc`, we need +//! to remove comments ourselves after the preprocessor runs. +//! Note: Multiline comments that actually span more than one line do +//! get translated to a space character by `rc`. +//! +//! Removing comments before lexing also allows the lexer to not have to +//! deal with comments which would complicate its implementation (this is something +//! of a tradeoff, as removing comments in a separate pass means that we'll +//! need to iterate the source twice instead of once, but having to deal with +//! comments when lexing would be a pain). + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const LineHandler = @import("lex.zig").LineHandler; +const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair; + +/// `buf` must be at least as long as `source` +/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) +pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) []u8 { + std.debug.assert(buf.len >= source.len); + var result = UncheckedSliceWriter{ .slice = buf }; + const State = enum { + start, + forward_slash, + line_comment, + multiline_comment, + multiline_comment_end, + single_quoted, + single_quoted_escape, + double_quoted, + double_quoted_escape, + }; + var state: State = .start; + var index: usize = 0; + var pending_start: ?usize = null; + var line_handler = LineHandler{ .buffer = source }; + while (index < source.len) : (index += 1) { + const c = source[index]; + // TODO: Disallow \x1A, \x00, \x7F in comments. At least \x1A and \x00 can definitely + // cause errors or parsing weirdness in the Win32 RC compiler. These are disallowed + // in the lexer, but comments are stripped before getting to the lexer. + switch (state) { + .start => switch (c) { + '/' => { + state = .forward_slash; + pending_start = index; + }, + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + }, + else => { + switch (c) { + '"' => state = .double_quoted, + '\'' => state = .single_quoted, + else => {}, + } + result.write(c); + }, + }, + .forward_slash => switch (c) { + '/' => state = .line_comment, + '*' => { + state = .multiline_comment; + }, + else => { + _ = line_handler.maybeIncrementLineNumber(index); + result.writeSlice(source[pending_start.? .. index + 1]); + pending_start = null; + state = .start; + }, + }, + .line_comment => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + state = .start; + }, + else => {}, + }, + .multiline_comment => switch (c) { + '\r' => handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings), + '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + }, + '*' => state = .multiline_comment_end, + else => {}, + }, + .multiline_comment_end => switch (c) { + '\r' => { + handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings); + // We only want to treat this as a newline if it's part of a CRLF pair. If it's + // not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still + // functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works. + if (formsLineEndingPair(source, '\r', index + 1)) { + state = .multiline_comment; + } + }, + '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + state = .multiline_comment; + }, + '/' => { + state = .start; + }, + else => { + state = .multiline_comment; + }, + }, + .single_quoted => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + '\\' => { + state = .single_quoted_escape; + result.write(c); + }, + '\'' => { + state = .start; + result.write(c); + }, + else => { + result.write(c); + }, + }, + .single_quoted_escape => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + else => { + state = .single_quoted; + result.write(c); + }, + }, + .double_quoted => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + '\\' => { + state = .double_quoted_escape; + result.write(c); + }, + '"' => { + state = .start; + result.write(c); + }, + else => { + result.write(c); + }, + }, + .double_quoted_escape => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + else => { + state = .double_quoted; + result.write(c); + }, + }, + } + } + return result.getWritten(); +} + +inline fn handleMultilineCarriageReturn( + source: []const u8, + line_handler: *LineHandler, + index: usize, + result: *UncheckedSliceWriter, + source_mappings: ?*SourceMappings, +) void { + // Note: Bare \r within a multiline comment should *not* be treated as a line ending for the + // purposes of removing comments, but *should* be treated as a line ending for the + // purposes of line counting/source mapping + _ = line_handler.incrementLineNumber(index); + // So only write the \r if it's part of a CRLF pair + if (formsLineEndingPair(source, '\r', index + 1)) { + result.write('\r'); + } + // And otherwise, we want to collapse the source mapping so that we can still know which + // line came from where. + else { + // Because the line gets collapsed, we need to decrement line number so that + // the next collapse acts on the first of the collapsed line numbers + line_handler.line_number -= 1; + if (source_mappings) |mappings| { + mappings.collapse(line_handler.line_number, 1); + } + } +} + +pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 { + var buf = try allocator.alloc(u8, source.len); + errdefer allocator.free(buf); + var result = removeComments(source, buf, source_mappings); + return allocator.realloc(buf, result.len); +} + +fn testRemoveComments(expected: []const u8, source: []const u8) !void { + const result = try removeCommentsAlloc(std.testing.allocator, source, null); + defer std.testing.allocator.free(result); + + try std.testing.expectEqualStrings(expected, result); +} + +test "basic" { + try testRemoveComments("", "// comment"); + try testRemoveComments("", "/* comment */"); +} + +test "mixed" { + try testRemoveComments("hello", "hello// comment"); + try testRemoveComments("hello", "hel/* comment */lo"); +} + +test "within a string" { + // escaped " is \" + try testRemoveComments( + \\blah"//som\"/*ething*/"BLAH + , + \\blah"//som\"/*ething*/"BLAH + ); +} + +test "line comments retain newlines" { + try testRemoveComments( + \\ + \\ + \\ + , + \\// comment + \\// comment + \\// comment + ); + + try testRemoveComments("\r\n", "//comment\r\n"); +} + +test "crazy" { + try testRemoveComments( + \\blah"/*som*/\""BLAH + , + \\blah"/*som*/\""/*ething*/BLAH + ); + + try testRemoveComments( + \\blah"/*som*/"BLAH RCDATA "BEGIN END + \\ + \\ + \\hello + \\" + , + \\blah"/*som*/"/*ething*/BLAH RCDATA "BEGIN END + \\// comment + \\//"blah blah" RCDATA {} + \\hello + \\" + ); +} + +test "multiline comment with newlines" { + // bare \r is not treated as a newline + try testRemoveComments("blahblah", "blah/*some\rthing*/blah"); + + try testRemoveComments( + \\blah + \\blah + , + \\blah/*some + \\thing*/blah + ); + try testRemoveComments( + "blah\r\nblah", + "blah/*some\r\nthing*/blah", + ); + + // handle * correctly + try testRemoveComments( + \\blah + \\ + \\ + , + \\blah/*some + \\thing* + \\/bl*ah*/ + ); +} + +test "comments appended to a line" { + try testRemoveComments( + \\blah + \\blah + , + \\blah // line comment + \\blah + ); + try testRemoveComments( + "blah \r\nblah", + "blah // line comment\r\nblah", + ); +} + +test "remove comments with mappings" { + const allocator = std.testing.allocator; + var mut_source = "blah/*\rcommented line*\r/blah".*; + var mappings = SourceMappings{}; + _ = try mappings.files.put(allocator, "test.rc"); + try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = 0 }); + try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 2, .filename_offset = 0 }); + try mappings.set(allocator, 3, .{ .start_line = 3, .end_line = 3, .filename_offset = 0 }); + defer mappings.deinit(allocator); + + var result = removeComments(&mut_source, &mut_source, &mappings); + + try std.testing.expectEqualStrings("blahblah", result); + try std.testing.expectEqual(@as(usize, 1), mappings.mapping.items.len); + try std.testing.expectEqual(@as(usize, 3), mappings.mapping.items[0].end_line); +} + +test "in place" { + var mut_source = "blah /* comment */ blah".*; + var result = removeComments(&mut_source, &mut_source, null); + try std.testing.expectEqualStrings("blah blah", result); +} diff --git a/src/resinator/compile.zig b/src/resinator/compile.zig new file mode 100644 index 000000000000..c35a882ecabe --- /dev/null +++ b/src/resinator/compile.zig @@ -0,0 +1,3356 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const Allocator = std.mem.Allocator; +const Node = @import("ast.zig").Node; +const lex = @import("lex.zig"); +const Parser = @import("parse.zig").Parser; +const Resource = @import("rc.zig").Resource; +const Token = @import("lex.zig").Token; +const literals = @import("literals.zig"); +const Number = literals.Number; +const SourceBytes = literals.SourceBytes; +const Diagnostics = @import("errors.zig").Diagnostics; +const ErrorDetails = @import("errors.zig").ErrorDetails; +const MemoryFlags = @import("res.zig").MemoryFlags; +const rc = @import("rc.zig"); +const res = @import("res.zig"); +const ico = @import("ico.zig"); +const ani = @import("ani.zig"); +const bmp = @import("bmp.zig"); +const WORD = std.os.windows.WORD; +const DWORD = std.os.windows.DWORD; +const utils = @import("utils.zig"); +const NameOrOrdinal = res.NameOrOrdinal; +const CodePage = @import("code_pages.zig").CodePage; +const CodePageLookup = @import("ast.zig").CodePageLookup; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const windows1252 = @import("windows1252.zig"); +const lang = @import("lang.zig"); +const code_pages = @import("code_pages.zig"); +const errors = @import("errors.zig"); + +pub const CompileOptions = struct { + cwd: std.fs.Dir, + diagnostics: *Diagnostics, + source_mappings: ?*SourceMappings = null, + /// List of paths (absolute or relative to `cwd`) for every file that the resources within the .rc file depend on. + /// Items within the list will be allocated using the allocator of the ArrayList and must be + /// freed by the caller. + /// TODO: Maybe a dedicated struct for this purpose so that it's a bit nicer to work with. + dependencies_list: ?*std.ArrayList([]const u8) = null, + default_code_page: CodePage = .windows1252, + ignore_include_env_var: bool = false, + extra_include_paths: []const []const u8 = &.{}, + /// This is just an API convenience to allow separately passing 'system' (i.e. those + /// that would normally be gotten from the INCLUDE env var) include paths. This is mostly + /// intended for use when setting `ignore_include_env_var = true`. When `ignore_include_env_var` + /// is false, `system_include_paths` will be searched before the paths in the INCLUDE env var. + system_include_paths: []const []const u8 = &.{}, + default_language_id: ?u16 = null, + // TODO: Implement verbose output + verbose: bool = false, + null_terminate_string_table_strings: bool = false, + /// Note: This is a u15 to ensure that the maximum number of UTF-16 code units + /// plus a null-terminator can always fit into a u16. + max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints, + silent_duplicate_control_ids: bool = false, + warn_instead_of_error_on_invalid_code_page: bool = false, +}; + +pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, options: CompileOptions) !void { + var lexer = lex.Lexer.init(source, .{ + .default_code_page = options.default_code_page, + .source_mappings = options.source_mappings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + }); + var parser = Parser.init(&lexer, .{ + .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, + }); + var tree = try parser.parse(allocator, options.diagnostics); + defer tree.deinit(); + + var search_dirs = std.ArrayList(SearchDir).init(allocator); + defer { + for (search_dirs.items) |*search_dir| { + search_dir.deinit(allocator); + } + search_dirs.deinit(); + } + + if (options.source_mappings) |source_mappings| { + const root_path = source_mappings.files.get(source_mappings.root_filename_offset); + // If dirname returns null, then the root path will be the same as + // the cwd so we don't need to add it as a distinct search path. + if (std.fs.path.dirname(root_path)) |root_dir_path| { + var root_dir = try options.cwd.openDir(root_dir_path, .{}); + errdefer root_dir.close(); + try search_dirs.append(.{ .dir = root_dir, .path = try allocator.dupe(u8, root_dir_path) }); + } + } + // Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed) + // `catch unreachable` since `options.cwd` is expected to be a valid dir handle, so opening + // a new handle to it should be fine as well. + // TODO: Maybe catch and return an error instead + const cwd_dir = options.cwd.openDir(".", .{}) catch unreachable; + try search_dirs.append(.{ .dir = cwd_dir, .path = null }); + for (options.extra_include_paths) |extra_include_path| { + var dir = openSearchPathDir(options.cwd, extra_include_path) catch { + // TODO: maybe a warning that the search path is skipped? + continue; + }; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, extra_include_path) }); + } + for (options.system_include_paths) |system_include_path| { + var dir = openSearchPathDir(options.cwd, system_include_path) catch { + // TODO: maybe a warning that the search path is skipped? + continue; + }; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) }); + } + if (!options.ignore_include_env_var) { + const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch ""; + defer allocator.free(INCLUDE); + + // TODO: Should this be platform-specific? How does windres/llvm-rc handle this (if at all)? + var it = std.mem.tokenize(u8, INCLUDE, ";"); + while (it.next()) |search_path| { + var dir = openSearchPathDir(options.cwd, search_path) catch continue; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, search_path) }); + } + } + + var arena_allocator = std.heap.ArenaAllocator.init(allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + var compiler = Compiler{ + .source = source, + .arena = arena, + .allocator = allocator, + .cwd = options.cwd, + .diagnostics = options.diagnostics, + .dependencies_list = options.dependencies_list, + .input_code_pages = &tree.input_code_pages, + .output_code_pages = &tree.output_code_pages, + // This is only safe because we know search_dirs won't be modified past this point + .search_dirs = search_dirs.items, + .null_terminate_string_table_strings = options.null_terminate_string_table_strings, + .silent_duplicate_control_ids = options.silent_duplicate_control_ids, + }; + if (options.default_language_id) |default_language_id| { + compiler.state.language = res.Language.fromInt(default_language_id); + } + + try compiler.writeRoot(tree.root(), writer); +} + +pub const Compiler = struct { + source: []const u8, + arena: Allocator, + allocator: Allocator, + cwd: std.fs.Dir, + state: State = .{}, + diagnostics: *Diagnostics, + dependencies_list: ?*std.ArrayList([]const u8), + input_code_pages: *const CodePageLookup, + output_code_pages: *const CodePageLookup, + search_dirs: []SearchDir, + null_terminate_string_table_strings: bool, + silent_duplicate_control_ids: bool, + + pub const State = struct { + icon_id: u16 = 1, + string_tables: StringTablesByLanguage = .{}, + language: res.Language = .{}, + font_dir: FontDir = .{}, + version: u32 = 0, + characteristics: u32 = 0, + }; + + pub fn writeRoot(self: *Compiler, root: *Node.Root, writer: anytype) !void { + try writeEmptyResource(writer); + for (root.body) |node| { + try self.writeNode(node, writer); + } + + // now write the FONTDIR (if it has anything in it) + try self.state.font_dir.writeResData(self, writer); + if (self.state.font_dir.fonts.items.len != 0) { + // The Win32 RC compiler may write a different FONTDIR resource than us, + // due to it sometimes writing a non-zero-length device name/face name + // whereas we *always* write them both as zero-length. + // + // In practical terms, this doesn't matter, since for various reasons the format + // of the FONTDIR cannot be relied on and is seemingly not actually used by anything + // anymore. We still want to emit some sort of diagnostic for the purposes of being able + // to know that our .RES is intentionally not meant to be byte-for-byte identical with + // the rc.exe output. + // + // By using the hint type here, we allow this diagnostic to be detected in code, + // but it will not be printed since the end-user doesn't need to care. + try self.addErrorDetails(.{ + .err = .result_contains_fontdir, + .type = .hint, + .token = undefined, + }); + } + // once we've written every else out, we can write out the finalized STRINGTABLE resources + var string_tables_it = self.state.string_tables.tables.iterator(); + while (string_tables_it.next()) |string_table_entry| { + var string_table_it = string_table_entry.value_ptr.blocks.iterator(); + while (string_table_it.next()) |entry| { + try entry.value_ptr.writeResData(self, string_table_entry.key_ptr.*, entry.key_ptr.*, writer); + } + } + } + + pub fn writeNode(self: *Compiler, node: *Node, writer: anytype) !void { + switch (node.id) { + .root => unreachable, // writeRoot should be called directly instead + .resource_external => try self.writeResourceExternal(@fieldParentPtr(Node.ResourceExternal, "base", node), writer), + .resource_raw_data => try self.writeResourceRawData(@fieldParentPtr(Node.ResourceRawData, "base", node), writer), + .literal => unreachable, // this is context dependent and should be handled by its parent + .binary_expression => unreachable, + .grouped_expression => unreachable, + .not_expression => unreachable, + .invalid => {}, // no-op, currently only used for dangling literals at EOF + .accelerators => try self.writeAccelerators(@fieldParentPtr(Node.Accelerators, "base", node), writer), + .accelerator => unreachable, // handled by writeAccelerators + .dialog => try self.writeDialog(@fieldParentPtr(Node.Dialog, "base", node), writer), + .control_statement => unreachable, + .toolbar => try self.writeToolbar(@fieldParentPtr(Node.Toolbar, "base", node), writer), + .menu => try self.writeMenu(@fieldParentPtr(Node.Menu, "base", node), writer), + .menu_item => unreachable, + .menu_item_separator => unreachable, + .menu_item_ex => unreachable, + .popup => unreachable, + .popup_ex => unreachable, + .version_info => try self.writeVersionInfo(@fieldParentPtr(Node.VersionInfo, "base", node), writer), + .version_statement => unreachable, + .block => unreachable, + .block_value => unreachable, + .block_value_value => unreachable, + .string_table => try self.writeStringTable(@fieldParentPtr(Node.StringTable, "base", node)), + .string_table_string => unreachable, // handled by writeStringTable + .language_statement => self.writeLanguageStatement(@fieldParentPtr(Node.LanguageStatement, "base", node)), + .font_statement => unreachable, + .simple_statement => self.writeTopLevelSimpleStatement(@fieldParentPtr(Node.SimpleStatement, "base", node)), + } + } + + /// Returns the filename encoded as UTF-8 (allocated by self.allocator) + pub fn evaluateFilenameExpression(self: *Compiler, expression_node: *Node) ![]u8 { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + switch (literal_node.token.id) { + .literal, .number => { + const slice = literal_node.token.slice(self.source); + const code_page = self.input_code_pages.getForToken(literal_node.token); + var buf = try std.ArrayList(u8).initCapacity(self.allocator, slice.len); + errdefer buf.deinit(); + + var index: usize = 0; + while (code_page.codepointAt(index, slice)) |codepoint| : (index += codepoint.byte_len) { + const c = codepoint.value; + if (c == code_pages.Codepoint.invalid) { + try buf.appendSlice("�"); + } else { + // Anything that is not returned as an invalid codepoint must be encodable as UTF-8. + const utf8_len = std.unicode.utf8CodepointSequenceLength(c) catch unreachable; + try buf.ensureUnusedCapacity(utf8_len); + _ = std.unicode.utf8Encode(c, buf.unusedCapacitySlice()) catch unreachable; + buf.items.len += utf8_len; + } + } + + return buf.toOwnedSlice(); + }, + .quoted_ascii_string, .quoted_wide_string => { + const slice = literal_node.token.slice(self.source); + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ .slice = slice, .code_page = self.input_code_pages.getForToken(literal_node.token) }; + + var buf = std.ArrayList(u8).init(self.allocator); + errdefer buf.deinit(); + + // Filenames are sort-of parsed as if they were wide strings, but the max escape width of + // hex/octal escapes is still determined by the L prefix. Since we want to end up with + // UTF-8, we can parse either string type directly to UTF-8. + var parser = literals.IterativeStringParser.init(bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + }); + + while (try parser.nextUnchecked()) |parsed| { + const c = parsed.codepoint; + if (c == code_pages.Codepoint.invalid) { + try buf.appendSlice("�"); + } else { + var codepoint_buf: [4]u8 = undefined; + // If the codepoint cannot be encoded, we fall back to � + if (std.unicode.utf8Encode(c, &codepoint_buf)) |len| { + try buf.appendSlice(codepoint_buf[0..len]); + } else |_| { + try buf.appendSlice("�"); + } + } + } + + return buf.toOwnedSlice(); + }, + else => { + std.debug.print("unexpected filename token type: {}\n", .{literal_node.token}); + unreachable; // no other token types should be in a filename literal node + }, + } + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + return self.evaluateFilenameExpression(binary_expression_node.right); + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return self.evaluateFilenameExpression(grouped_expression_node.expression); + }, + else => unreachable, + } + } + + /// https://learn.microsoft.com/en-us/windows/win32/menurc/searching-for-files + /// + /// Searches, in this order: + /// Directory of the 'root' .rc file (if different from CWD) + /// CWD + /// extra_include_paths (resolved relative to CWD) + /// system_include_paths (resolve relative to CWD) + /// INCLUDE environment var paths (only if ignore_include_env_var is false; resolved relative to CWD) + /// + /// Note: The CWD being searched *in addition to* the directory of the 'root' .rc file + /// is also how the Win32 RC compiler preprocessor searches for includes, but that + /// differs from how the clang preprocessor searches for includes. + /// + /// Note: This will always return the first matching file that can be opened. + /// This matches the Win32 RC compiler, which will fail with an error if the first + /// matching file is invalid. That is, it does not do the `cmd` PATH searching + /// thing of continuing to look for matching files until it finds a valid + /// one if a matching file is invalid. + fn searchForFile(self: *Compiler, path: []const u8) !std.fs.File { + // If the path is absolute, then it is not resolved relative to any search + // paths, so there's no point in checking them. + // + // This behavior was determined/confirmed with the following test: + // - A `test.rc` file with the contents `1 RCDATA "/test.bin"` + // - A `test.bin` file at `C:\test.bin` + // - A `test.bin` file at `inc\test.bin` relative to the .rc file + // - Invoking `rc` with `rc /i inc test.rc` + // + // This results in a .res file with the contents of `C:\test.bin`, not + // the contents of `inc\test.bin`. Further, if `C:\test.bin` is deleted, + // then it start failing to find `/test.bin`, meaning that it does not resolve + // `/test.bin` relative to include paths and instead only treats it as + // an absolute path. + if (std.fs.path.isAbsolute(path)) { + const file = try utils.openFileNotDir(std.fs.cwd(), path, .{}); + errdefer file.close(); + + if (self.dependencies_list) |dependencies_list| { + const duped_path = try dependencies_list.allocator.dupe(u8, path); + errdefer dependencies_list.allocator.free(duped_path); + try dependencies_list.append(duped_path); + } + } + + var first_error: ?std.fs.File.OpenError = null; + for (self.search_dirs) |search_dir| { + if (utils.openFileNotDir(search_dir.dir, path, .{})) |file| { + errdefer file.close(); + + if (self.dependencies_list) |dependencies_list| { + const searched_file_path = try std.fs.path.join(dependencies_list.allocator, &.{ + search_dir.path orelse "", path, + }); + errdefer dependencies_list.allocator.free(searched_file_path); + try dependencies_list.append(searched_file_path); + } + + return file; + } else |err| if (first_error == null) { + first_error = err; + } + } + return first_error orelse error.FileNotFound; + } + + pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void { + // Init header with data size zero for now, will need to fill it in later + var header = try self.resourceHeader(node.id, node.type, .{}); + defer header.deinit(self.allocator); + + const maybe_predefined_type = header.predefinedResourceType(); + + // DLGINCLUDE has special handling that doesn't actually need the file to exist + if (maybe_predefined_type != null and maybe_predefined_type.? == .DLGINCLUDE) { + const filename_token = node.filename.cast(.literal).?.token; + const parsed_filename = try self.parseQuotedStringAsAsciiString(filename_token); + defer self.allocator.free(parsed_filename); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.data_size = @intCast(parsed_filename.len + 1); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try writer.writeAll(parsed_filename); + try writer.writeByte(0); + try writeDataPadding(writer, header.data_size); + return; + } + + const filename_utf8 = try self.evaluateFilenameExpression(node.filename); + defer self.allocator.free(filename_utf8); + + // TODO: More robust checking of the validity of the filename. + // This currently only checks for NUL bytes, but it should probably also check for + // platform-specific invalid characters like '*', '?', '"', '<', '>', '|' (Windows) + // Related: https://github.com/ziglang/zig/pull/14533#issuecomment-1416888193 + if (std.mem.indexOfScalar(u8, filename_utf8, 0) != null) { + return self.addErrorDetailsAndFail(.{ + .err = .invalid_filename, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .extra = .{ .number = 0 }, + }); + } + + // Allow plain number literals, but complex number expressions are evaluated strangely + // and almost certainly lead to things not intended by the user (e.g. '(1+-1)' evaluates + // to the filename '-1'), so error if the filename node is a grouped/binary expression. + // Note: This is done here instead of during parsing so that we can easily include + // the evaluated filename as part of the error messages. + if (node.filename.id != .literal) { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + try self.addErrorDetails(.{ + .err = .number_expression_as_filename, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .extra = .{ .number = filename_string_index }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .number_expression_as_filename, + .type = .note, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .print_source_line = false, + .extra = .{ .number = filename_string_index }, + }); + } + // From here on out, we know that the filename must be comprised of a single token, + // so get it here to simplify future usage. + const filename_token = node.filename.getFirstToken(); + + const file = self.searchForFile(filename_utf8) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + return self.addErrorDetailsAndFail(.{ + .err = .file_open_error, + .token = filename_token, + .extra = .{ .file_open_error = .{ + .err = ErrorDetails.FileOpenError.enumFromError(e), + .filename_string_index = filename_string_index, + } }, + }); + }, + }; + defer file.close(); + + if (maybe_predefined_type) |predefined_type| { + switch (predefined_type) { + .GROUP_ICON, .GROUP_CURSOR => { + // Check for animated icon first + if (ani.isAnimatedIcon(file.reader())) { + // Animated icons are just put into the resource unmodified, + // and the resource type changes to ANIICON/ANICURSOR + + const new_predefined_type: res.RT = switch (predefined_type) { + .GROUP_ICON => .ANIICON, + .GROUP_CURSOR => .ANICURSOR, + else => unreachable, + }; + header.type_value.ordinal = @intFromEnum(new_predefined_type); + header.memory_flags = MemoryFlags.defaults(new_predefined_type); + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.data_size = @intCast(try file.getEndPos()); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try file.seekTo(0); + try writeResourceData(writer, file.reader(), header.data_size); + return; + } + + // isAnimatedIcon moved the file cursor so reset to the start + try file.seekTo(0); + + const icon_dir = ico.read(self.allocator, file.reader(), try file.getEndPos()) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + return self.iconReadError( + e, + filename_utf8, + filename_token, + predefined_type, + ); + }, + }; + defer icon_dir.deinit(); + + // This limit is inherent to the ico format since number of entries is a u16 field. + std.debug.assert(icon_dir.entries.len <= std.math.maxInt(u16)); + + // Note: The Win32 RC compiler will compile the resource as whatever type is + // in the icon_dir regardless of the type of resource specified in the .rc. + // This leads to unusable .res files when the types mismatch, so + // we error instead. + const res_types_match = switch (predefined_type) { + .GROUP_ICON => icon_dir.image_type == .icon, + .GROUP_CURSOR => icon_dir.image_type == .cursor, + else => unreachable, + }; + if (!res_types_match) { + return self.addErrorDetailsAndFail(.{ + .err = .icon_dir_and_resource_type_mismatch, + .token = filename_token, + .extra = .{ .resource = switch (predefined_type) { + .GROUP_ICON => .icon, + .GROUP_CURSOR => .cursor, + else => unreachable, + } }, + }); + } + + // Memory flags affect the RT_ICON and the RT_GROUP_ICON differently + var icon_memory_flags = MemoryFlags.defaults(res.RT.ICON); + applyToMemoryFlags(&icon_memory_flags, node.common_resource_attributes, self.source); + applyToGroupMemoryFlags(&header.memory_flags, node.common_resource_attributes, self.source); + + const first_icon_id = self.state.icon_id; + const entry_type = if (predefined_type == .GROUP_ICON) @intFromEnum(res.RT.ICON) else @intFromEnum(res.RT.CURSOR); + for (icon_dir.entries, 0..) |*entry, entry_i_usize| { + // We know that the entry index must fit within a u16, so + // cast it here to simplify usage sites. + const entry_i: u16 = @intCast(entry_i_usize); + var full_data_size = entry.data_size_in_bytes; + if (icon_dir.image_type == .cursor) { + full_data_size = std.math.add(u32, full_data_size, 4) catch { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }; + } + + const image_header = ResourceHeader{ + .type_value = .{ .ordinal = entry_type }, + .name_value = .{ .ordinal = self.state.icon_id }, + .data_size = full_data_size, + .memory_flags = icon_memory_flags, + .language = self.state.language, + .version = self.state.version, + .characteristics = self.state.characteristics, + }; + try image_header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + // From https://learn.microsoft.com/en-us/windows/win32/menurc/localheader: + // > The LOCALHEADER structure is the first data written to the RT_CURSOR + // > resource if a RESDIR structure contains information about a cursor. + // where LOCALHEADER is `struct { WORD xHotSpot; WORD yHotSpot; }` + if (icon_dir.image_type == .cursor) { + try writer.writeIntLittle(u16, entry.type_specific_data.cursor.hotspot_x); + try writer.writeIntLittle(u16, entry.type_specific_data.cursor.hotspot_y); + } + + try file.seekTo(entry.data_offset_from_start_of_file); + const header_bytes = file.reader().readBytesNoEof(16) catch { + return self.iconReadError( + error.UnexpectedEOF, + filename_utf8, + filename_token, + predefined_type, + ); + }; + + const image_format = ico.ImageFormat.detect(&header_bytes); + if (!image_format.validate(&header_bytes)) { + return self.iconReadError( + error.InvalidHeader, + filename_utf8, + filename_token, + predefined_type, + ); + } + switch (image_format) { + .riff => switch (icon_dir.image_type) { + .icon => { + // The Win32 RC compiler treats this as an error, but icon dirs + // with RIFF encoded icons within them work ~okay (they work + // in some places but not others, they may not animate, etc) if they are + // allowed to be compiled. + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .note, + .print_source_line = false, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } }, + }); + }, + .cursor => { + // The Win32 RC compiler errors in this case too, but we only error + // here because the cursor would fail to be loaded at runtime if we + // compiled it. + return self.addErrorDetailsAndFail(.{ + .err = .format_not_supported_in_icon_dir, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .riff, .index = entry_i } }, + }); + }, + }, + .png => switch (icon_dir.image_type) { + .icon => { + // PNG always seems to have 1 for color planes no matter what + entry.type_specific_data.icon.color_planes = 1; + // These seem to be the only values of num_colors that + // get treated specially + entry.type_specific_data.icon.bits_per_pixel = switch (entry.num_colors) { + 2 => 1, + 8 => 3, + 16 => 4, + else => entry.type_specific_data.icon.bits_per_pixel, + }; + }, + .cursor => { + // The Win32 RC compiler treats this as an error, but cursor dirs + // with PNG encoded icons within them work fine if they are + // allowed to be compiled. + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .png, .index = entry_i } }, + }); + }, + }, + .dib => { + const bitmap_header: *const ico.BitmapHeader = @ptrCast(@alignCast(&header_bytes)); + const bitmap_version = ico.BitmapHeader.Version.get(std.mem.littleToNative(u32, bitmap_header.bcSize)); + + // The Win32 RC compiler only allows headers with + // `bcSize == sizeof(BITMAPINFOHEADER)`, but it seems unlikely + // that there's a good reason for that outside of too-old + // bitmap headers. + // TODO: Need to test V4 and V5 bitmaps to check they actually work + if (bitmap_version == .@"win2.0") { + return self.addErrorDetailsAndFail(.{ + .err = .rc_would_error_on_bitmap_version, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + .bitmap_version = bitmap_version, + } }, + }); + } else if (bitmap_version != .@"nt3.1") { + try self.addErrorDetails(.{ + .err = .rc_would_error_on_bitmap_version, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + .bitmap_version = bitmap_version, + } }, + }); + } + + switch (icon_dir.image_type) { + .icon => { + // The values in the icon's BITMAPINFOHEADER always take precedence over + // the values in the IconDir, but not in the LOCALHEADER (see above). + entry.type_specific_data.icon.color_planes = std.mem.littleToNative(u16, bitmap_header.bcPlanes); + entry.type_specific_data.icon.bits_per_pixel = std.mem.littleToNative(u16, bitmap_header.bcBitCount); + }, + .cursor => { + // Only cursors get the width/height from BITMAPINFOHEADER (icons don't) + entry.width = @intCast(bitmap_header.bcWidth); + entry.height = @intCast(bitmap_header.bcHeight); + entry.type_specific_data.cursor.hotspot_x = std.mem.littleToNative(u16, bitmap_header.bcPlanes); + entry.type_specific_data.cursor.hotspot_y = std.mem.littleToNative(u16, bitmap_header.bcBitCount); + }, + } + }, + } + + try file.seekTo(entry.data_offset_from_start_of_file); + try writeResourceDataNoPadding(writer, file.reader(), entry.data_size_in_bytes); + try writeDataPadding(writer, full_data_size); + + if (self.state.icon_id == std.math.maxInt(u16)) { + try self.addErrorDetails(.{ + .err = .max_icon_ids_exhausted, + .print_source_line = false, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + } }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .max_icon_ids_exhausted, + .type = .note, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + } }, + }); + } + self.state.icon_id += 1; + } + + header.data_size = icon_dir.getResDataSize(); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try icon_dir.writeResData(writer, first_icon_id); + try writeDataPadding(writer, header.data_size); + return; + }, + .RCDATA, .HTML, .MANIFEST, .MESSAGETABLE, .DLGINIT, .PLUGPLAY => { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + }, + .BITMAP => { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + const file_size = try file.getEndPos(); + + const bitmap_info = bmp.read(file.reader(), file_size) catch |err| { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + return self.addErrorDetailsAndFail(.{ + .err = .bmp_read_error, + .token = filename_token, + .extra = .{ .bmp_read_error = .{ + .err = ErrorDetails.BitmapReadError.enumFromError(err), + .filename_string_index = filename_string_index, + } }, + }); + }; + + if (bitmap_info.getActualPaletteByteLen() > bitmap_info.getExpectedPaletteByteLen()) { + const num_ignored_bytes = bitmap_info.getActualPaletteByteLen() - bitmap_info.getExpectedPaletteByteLen(); + var number_as_bytes: [8]u8 = undefined; + std.mem.writeIntNative(u64, &number_as_bytes, num_ignored_bytes); + const value_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_ignored_palette_bytes, + .type = .warning, + .token = filename_token, + .extra = .{ .number = value_string_index }, + }); + } else if (bitmap_info.getActualPaletteByteLen() < bitmap_info.getExpectedPaletteByteLen()) { + const num_padding_bytes = bitmap_info.getExpectedPaletteByteLen() - bitmap_info.getActualPaletteByteLen(); + + // TODO: Make this configurable (command line option) + const max_missing_bytes = 4096; + if (num_padding_bytes > max_missing_bytes) { + var numbers_as_bytes: [16]u8 = undefined; + std.mem.writeIntNative(u64, numbers_as_bytes[0..8], num_padding_bytes); + std.mem.writeIntNative(u64, numbers_as_bytes[8..16], max_missing_bytes); + const values_string_index = try self.diagnostics.putString(&numbers_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_too_many_missing_palette_bytes, + .token = filename_token, + .extra = .{ .number = values_string_index }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .bmp_too_many_missing_palette_bytes, + .type = .note, + .print_source_line = false, + .token = filename_token, + }); + } + + var number_as_bytes: [8]u8 = undefined; + std.mem.writeIntNative(u64, &number_as_bytes, num_padding_bytes); + const value_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_missing_palette_bytes, + .type = .warning, + .token = filename_token, + .extra = .{ .number = value_string_index }, + }); + const pixel_data_len = bitmap_info.getPixelDataLen(file_size); + if (pixel_data_len > 0) { + const miscompiled_bytes = @min(pixel_data_len, num_padding_bytes); + std.mem.writeIntNative(u64, &number_as_bytes, miscompiled_bytes); + const miscompiled_bytes_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_bmp_palette_padding, + .type = .warning, + .token = filename_token, + .extra = .{ .number = miscompiled_bytes_string_index }, + }); + } + } + + // TODO: It might be possible that the calculation done in this function + // could underflow if the underlying file is modified while reading + // it, but need to think about it more to determine if that's a + // real possibility + const bmp_bytes_to_write: u32 = @intCast(bitmap_info.getExpectedByteLen(file_size)); + + header.data_size = bmp_bytes_to_write; + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try file.seekTo(bmp.file_header_len); + const file_reader = file.reader(); + try writeResourceDataNoPadding(writer, file_reader, bitmap_info.dib_header_size); + if (bitmap_info.getBitmasksByteLen() > 0) { + try writeResourceDataNoPadding(writer, file_reader, bitmap_info.getBitmasksByteLen()); + } + if (bitmap_info.getExpectedPaletteByteLen() > 0) { + try writeResourceDataNoPadding(writer, file_reader, @intCast(bitmap_info.getActualPaletteByteLen())); + const padding_bytes = bitmap_info.getMissingPaletteByteLen(); + if (padding_bytes > 0) { + try writer.writeByteNTimes(0, padding_bytes); + } + } + try file.seekTo(bitmap_info.pixel_data_offset); + const pixel_bytes: u32 = @intCast(file_size - bitmap_info.pixel_data_offset); + try writeResourceDataNoPadding(writer, file_reader, pixel_bytes); + try writeDataPadding(writer, bmp_bytes_to_write); + return; + }, + .FONT => { + if (self.state.font_dir.ids.get(header.name_value.ordinal) != null) { + // Add warning and skip this resource + // Note: The Win32 compiler prints this as an error but it doesn't fail the compilation + // and the duplicate resource is skipped. + try self.addErrorDetails(ErrorDetails{ + .err = .font_id_already_defined, + .token = node.id, + .type = .warning, + .extra = .{ .number = header.name_value.ordinal }, + }); + try self.addErrorDetails(ErrorDetails{ + .err = .font_id_already_defined, + .token = self.state.font_dir.ids.get(header.name_value.ordinal).?, + .type = .note, + .extra = .{ .number = header.name_value.ordinal }, + }); + return; + } + header.applyMemoryFlags(node.common_resource_attributes, self.source); + const file_size = try file.getEndPos(); + if (file_size > std.math.maxInt(u32)) { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + } + + // We now know that the data size will fit in a u32 + header.data_size = @intCast(file_size); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var header_slurping_reader = headerSlurpingReader(148, file.reader()); + try writeResourceData(writer, header_slurping_reader.reader(), header.data_size); + + try self.state.font_dir.add(self.arena, FontDir.Font{ + .id = header.name_value.ordinal, + .header_bytes = header_slurping_reader.slurped_header, + }, node.id); + return; + }, + .ACCELERATOR, + .ANICURSOR, + .ANIICON, + .CURSOR, + .DIALOG, + .DLGINCLUDE, + .FONTDIR, + .ICON, + .MENU, + .STRING, + .TOOLBAR, + .VERSION, + .VXD, + => unreachable, + _ => unreachable, + } + } else { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + } + + // Fallback to just writing out the entire contents of the file + const data_size = try file.getEndPos(); + if (data_size > std.math.maxInt(u32)) { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + } + // We now know that the data size will fit in a u32 + header.data_size = @intCast(data_size); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try writeResourceData(writer, file.reader(), header.data_size); + } + + fn iconReadError( + self: *Compiler, + err: ico.ReadError, + filename: []const u8, + token: Token, + predefined_type: res.RT, + ) error{ CompileError, OutOfMemory } { + const filename_string_index = try self.diagnostics.putString(filename); + return self.addErrorDetailsAndFail(.{ + .err = .icon_read_error, + .token = token, + .extra = .{ .icon_read_error = .{ + .err = ErrorDetails.IconReadError.enumFromError(err), + .icon_type = switch (predefined_type) { + .GROUP_ICON => .icon, + .GROUP_CURSOR => .cursor, + else => unreachable, + }, + .filename_string_index = filename_string_index, + } }, + }); + } + + pub const DataType = enum { + number, + ascii_string, + wide_string, + }; + + pub const Data = union(DataType) { + number: Number, + ascii_string: []const u8, + wide_string: [:0]const u16, + + pub fn deinit(self: Data, allocator: Allocator) void { + switch (self) { + .wide_string => |wide_string| { + allocator.free(wide_string); + }, + .ascii_string => |ascii_string| { + allocator.free(ascii_string); + }, + else => {}, + } + } + + pub fn write(self: Data, writer: anytype) !void { + switch (self) { + .number => |number| switch (number.is_long) { + false => try writer.writeIntLittle(WORD, number.asWord()), + true => try writer.writeIntLittle(DWORD, number.value), + }, + .ascii_string => |ascii_string| { + try writer.writeAll(ascii_string); + }, + .wide_string => |wide_string| { + try writer.writeAll(std.mem.sliceAsBytes(wide_string)); + }, + } + } + }; + + /// Assumes that the node is a number or number expression + pub fn evaluateNumberExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) Number { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + std.debug.assert(literal_node.token.id == .number); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(source), + .code_page = code_page_lookup.getForToken(literal_node.token), + }; + return literals.parseNumberLiteral(bytes); + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + const lhs = evaluateNumberExpression(binary_expression_node.left, source, code_page_lookup); + const rhs = evaluateNumberExpression(binary_expression_node.right, source, code_page_lookup); + const operator_char = binary_expression_node.operator.slice(source)[0]; + return lhs.evaluateOperator(operator_char, rhs); + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return evaluateNumberExpression(grouped_expression_node.expression, source, code_page_lookup); + }, + else => unreachable, + } + } + + const FlagsNumber = struct { + value: u32, + not_mask: u32 = 0xFFFFFFFF, + + pub fn evaluateOperator(lhs: FlagsNumber, operator_char: u8, rhs: FlagsNumber) FlagsNumber { + const result = switch (operator_char) { + '-' => lhs.value -% rhs.value, + '+' => lhs.value +% rhs.value, + '|' => lhs.value | rhs.value, + '&' => lhs.value & rhs.value, + else => unreachable, // invalid operator, this would be a lexer/parser bug + }; + return .{ + .value = result, + .not_mask = lhs.not_mask & rhs.not_mask, + }; + } + + pub fn applyNotMask(self: FlagsNumber) u32 { + return self.value & self.not_mask; + } + }; + + pub fn evaluateFlagsExpressionWithDefault(default: u32, expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) u32 { + var context = FlagsExpressionContext{ .initial_value = default }; + const number = evaluateFlagsExpression(expression_node, source, code_page_lookup, &context); + return number.value; + } + + pub const FlagsExpressionContext = struct { + initial_value: u32 = 0, + initial_value_used: bool = false, + }; + + /// Assumes that the node is a number expression (which can contain not_expressions) + pub fn evaluateFlagsExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup, context: *FlagsExpressionContext) FlagsNumber { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + std.debug.assert(literal_node.token.id == .number); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(source), + .code_page = code_page_lookup.getForToken(literal_node.token), + }; + var value = literals.parseNumberLiteral(bytes).value; + if (!context.initial_value_used) { + context.initial_value_used = true; + value |= context.initial_value; + } + return .{ .value = value }; + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + const lhs = evaluateFlagsExpression(binary_expression_node.left, source, code_page_lookup, context); + const rhs = evaluateFlagsExpression(binary_expression_node.right, source, code_page_lookup, context); + const operator_char = binary_expression_node.operator.slice(source)[0]; + const result = lhs.evaluateOperator(operator_char, rhs); + return .{ .value = result.applyNotMask() }; + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return evaluateFlagsExpression(grouped_expression_node.expression, source, code_page_lookup, context); + }, + .not_expression => { + const not_expression = expression_node.cast(.not_expression).?; + const bytes = SourceBytes{ + .slice = not_expression.number_token.slice(source), + .code_page = code_page_lookup.getForToken(not_expression.number_token), + }; + const not_number = literals.parseNumberLiteral(bytes); + if (!context.initial_value_used) { + context.initial_value_used = true; + return .{ .value = context.initial_value & ~not_number.value }; + } + return .{ .value = 0, .not_mask = ~not_number.value }; + }, + else => unreachable, + } + } + + pub fn evaluateDataExpression(self: *Compiler, expression_node: *Node) !Data { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + switch (literal_node.token.id) { + .number => { + const number = evaluateNumberExpression(expression_node, self.source, self.input_code_pages); + return .{ .number = number }; + }, + .quoted_ascii_string => { + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const parsed = try literals.parseQuotedAsciiString(self.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + .output_code_page = self.output_code_pages.getForToken(literal_node.token), + }); + errdefer self.allocator.free(parsed); + return .{ .ascii_string = parsed }; + }, + .quoted_wide_string => { + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const parsed_string = try literals.parseQuotedWideString(self.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + }); + errdefer self.allocator.free(parsed_string); + return .{ .wide_string = parsed_string }; + }, + else => { + std.debug.print("unexpected token in literal node: {}\n", .{literal_node.token}); + unreachable; // no other token types should be in a data literal node + }, + } + }, + .binary_expression, .grouped_expression => { + const result = evaluateNumberExpression(expression_node, self.source, self.input_code_pages); + return .{ .number = result }; + }, + .not_expression => unreachable, + else => { + std.debug.print("{}\n", .{expression_node.id}); + @panic("TODO: evaluateDataExpression"); + }, + } + } + + pub fn writeResourceRawData(self: *Compiler, node: *Node.ResourceRawData, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + for (node.raw_data) |expression| { + const data = try self.evaluateDataExpression(expression); + defer data.deinit(self.allocator); + data.write(data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + } + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_len: u32 = @intCast(data_buffer.items.len); + try self.writeResourceHeader(writer, node.id, node.type, data_len, node.common_resource_attributes, self.state.language); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_len); + } + + pub fn writeResourceHeader(self: *Compiler, writer: anytype, id_token: Token, type_token: Token, data_size: u32, common_resource_attributes: []Token, language: res.Language) !void { + var header = try self.resourceHeader(id_token, type_token, .{ + .language = language, + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = id_token }); + } + + pub fn writeResourceDataNoPadding(writer: anytype, data_reader: anytype, data_size: u32) !void { + var limited_reader = std.io.limitedReader(data_reader, data_size); + + const FifoBuffer = std.fifo.LinearFifo(u8, .{ .Static = 4096 }); + var fifo = FifoBuffer.init(); + try fifo.pump(limited_reader.reader(), writer); + } + + pub fn writeResourceData(writer: anytype, data_reader: anytype, data_size: u32) !void { + try writeResourceDataNoPadding(writer, data_reader, data_size); + try writeDataPadding(writer, data_size); + } + + pub fn writeDataPadding(writer: anytype, data_size: u32) !void { + try writer.writeByteNTimes(0, numPaddingBytesNeeded(data_size)); + } + + pub fn numPaddingBytesNeeded(data_size: u32) u2 { + // Result is guaranteed to be between 0 and 3. + return @intCast((4 -% data_size) % 4); + } + + pub fn evaluateAcceleratorKeyExpression(self: *Compiler, node: *Node, is_virt: bool) !u16 { + if (node.isNumberExpression()) { + return evaluateNumberExpression(node, self.source, self.input_code_pages).asWord(); + } else { + std.debug.assert(node.isStringLiteral()); + const literal = @fieldParentPtr(Node.Literal, "base", node); + const bytes = SourceBytes{ + .slice = literal.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal.token), + }; + const column = literal.token.calculateColumn(self.source, 8, null); + return res.parseAcceleratorKeyString(bytes, is_virt, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal.token }, + }); + } + } + + pub fn writeAccelerators(self: *Compiler, node: *Node.Accelerators, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + self.writeAcceleratorsData(node, data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft + pub fn writeAcceleratorsData(self: *Compiler, node: *Node.Accelerators, data_writer: anytype) !void { + for (node.accelerators, 0..) |accel_node, i| { + const accelerator = @fieldParentPtr(Node.Accelerator, "base", accel_node); + var modifiers = res.AcceleratorModifiers{}; + for (accelerator.type_and_options) |type_or_option| { + const modifier = rc.AcceleratorTypeAndOptions.map.get(type_or_option.slice(self.source)).?; + modifiers.apply(modifier); + } + if (accelerator.event.isNumberExpression() and !modifiers.explicit_ascii_or_virtkey) { + return self.addErrorDetailsAndFail(.{ + .err = .accelerator_type_required, + .token = accelerator.event.getFirstToken(), + .token_span_end = accelerator.event.getLastToken(), + }); + } + const key = self.evaluateAcceleratorKeyExpression(accelerator.event, modifiers.isSet(.virtkey)) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + return self.addErrorDetailsAndFail(.{ + .err = .invalid_accelerator_key, + .token = accelerator.event.getFirstToken(), + .token_span_end = accelerator.event.getLastToken(), + .extra = .{ .accelerator_error = .{ + .err = ErrorDetails.AcceleratorError.enumFromError(e), + } }, + }); + }, + }; + const cmd_id = evaluateNumberExpression(accelerator.idvalue, self.source, self.input_code_pages); + + if (i == node.accelerators.len - 1) { + modifiers.markLast(); + } + + try data_writer.writeByte(modifiers.value); + try data_writer.writeByte(0); // padding + try data_writer.writeIntLittle(u16, key); + try data_writer.writeIntLittle(u16, cmd_id.asWord()); + try data_writer.writeIntLittle(u16, 0); // padding + } + } + + const DialogOptionalStatementValues = struct { + style: u32 = res.WS.SYSMENU | res.WS.BORDER | res.WS.POPUP, + exstyle: u32 = 0, + class: ?NameOrOrdinal = null, + menu: ?NameOrOrdinal = null, + font: ?FontStatementValues = null, + caption: ?Token = null, + }; + + pub fn writeDialog(self: *Compiler, node: *Node.Dialog, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + const resource = Resource.fromString(.{ + .slice = node.type.slice(self.source), + .code_page = self.input_code_pages.getForToken(node.type), + }); + std.debug.assert(resource == .dialog or resource == .dialogex); + + var optional_statement_values: DialogOptionalStatementValues = .{}; + defer { + if (optional_statement_values.class) |class| { + class.deinit(self.allocator); + } + if (optional_statement_values.menu) |menu| { + menu.deinit(self.allocator); + } + } + var skipped_menu_or_classes = std.ArrayList(*Node.SimpleStatement).init(self.allocator); + defer skipped_menu_or_classes.deinit(); + var last_menu: *Node.SimpleStatement = undefined; + var last_class: *Node.SimpleStatement = undefined; + var last_menu_would_be_forced_ordinal = false; + var last_menu_has_digit_as_first_char = false; + var last_menu_did_uppercase = false; + var last_class_would_be_forced_ordinal = false; + + for (node.optional_statements) |optional_statement| { + switch (optional_statement.id) { + .simple_statement => { + const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", optional_statement); + const statement_identifier = simple_statement.identifier; + const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue; + switch (statement_type) { + .style, .exstyle => { + const style = evaluateFlagsExpressionWithDefault(0, simple_statement.value, self.source, self.input_code_pages); + if (statement_type == .style) { + optional_statement_values.style = style; + } else { + optional_statement_values.exstyle = style; + } + }, + .caption => { + std.debug.assert(simple_statement.value.id == .literal); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + optional_statement_values.caption = literal_node.token; + }, + .class => { + const is_duplicate = optional_statement_values.class != null; + if (is_duplicate) { + try skipped_menu_or_classes.append(last_class); + } + const forced_ordinal = is_duplicate and optional_statement_values.class.? == .ordinal; + // In the Win32 RC compiler, if any CLASS values that are interpreted as + // an ordinal exist, it affects all future CLASS statements and forces + // them to be treated as an ordinal no matter what. + if (forced_ordinal) { + last_class_would_be_forced_ordinal = true; + } + // clear out the old one if it exists + if (optional_statement_values.class) |prev| { + prev.deinit(self.allocator); + optional_statement_values.class = null; + } + + if (simple_statement.value.isNumberExpression()) { + const class_ordinal = evaluateNumberExpression(simple_statement.value, self.source, self.input_code_pages); + optional_statement_values.class = NameOrOrdinal{ .ordinal = class_ordinal.asWord() }; + } else { + std.debug.assert(simple_statement.value.isStringLiteral()); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + const parsed = try self.parseQuotedStringAsWideString(literal_node.token); + optional_statement_values.class = NameOrOrdinal{ .name = parsed }; + } + + last_class = simple_statement; + }, + .menu => { + const is_duplicate = optional_statement_values.menu != null; + if (is_duplicate) { + try skipped_menu_or_classes.append(last_menu); + } + const forced_ordinal = is_duplicate and optional_statement_values.menu.? == .ordinal; + // In the Win32 RC compiler, if any MENU values that are interpreted as + // an ordinal exist, it affects all future MENU statements and forces + // them to be treated as an ordinal no matter what. + if (forced_ordinal) { + last_menu_would_be_forced_ordinal = true; + } + // clear out the old one if it exists + if (optional_statement_values.menu) |prev| { + prev.deinit(self.allocator); + optional_statement_values.menu = null; + } + + std.debug.assert(simple_statement.value.id == .literal); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + + const token_slice = literal_node.token.slice(self.source); + const bytes = SourceBytes{ + .slice = token_slice, + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + optional_statement_values.menu = try NameOrOrdinal.fromString(self.allocator, bytes); + + if (optional_statement_values.menu.? == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(bytes)) |win32_rc_ordinal| { + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = literal_node.token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = literal_node.token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + } + } + + // Need to keep track of some properties of the value + // in order to emit the appropriate warning(s) later on. + // See where the warning are emitted below (outside this loop) + // for the full explanation. + var did_uppercase = false; + var codepoint_i: usize = 0; + while (bytes.code_page.codepointAt(codepoint_i, bytes.slice)) |codepoint| : (codepoint_i += codepoint.byte_len) { + const c = codepoint.value; + switch (c) { + 'a'...'z' => { + did_uppercase = true; + break; + }, + else => {}, + } + } + last_menu_did_uppercase = did_uppercase; + last_menu_has_digit_as_first_char = std.ascii.isDigit(token_slice[0]); + last_menu = simple_statement; + }, + else => {}, + } + }, + .font_statement => { + const font = @fieldParentPtr(Node.FontStatement, "base", optional_statement); + if (optional_statement_values.font != null) { + optional_statement_values.font.?.node = font; + } else { + optional_statement_values.font = FontStatementValues{ .node = font }; + } + if (font.weight) |weight| { + const value = evaluateNumberExpression(weight, self.source, self.input_code_pages); + optional_statement_values.font.?.weight = value.asWord(); + } + if (font.italic) |italic| { + const value = evaluateNumberExpression(italic, self.source, self.input_code_pages); + optional_statement_values.font.?.italic = value.asWord() != 0; + } + }, + else => {}, + } + } + + for (skipped_menu_or_classes.items) |simple_statement| { + const statement_identifier = simple_statement.identifier; + const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue; + try self.addErrorDetails(.{ + .err = .duplicate_menu_or_class_skipped, + .type = .warning, + .token = simple_statement.identifier, + .token_span_start = simple_statement.base.getFirstToken(), + .token_span_end = simple_statement.base.getLastToken(), + .extra = .{ .menu_or_class = switch (statement_type) { + .menu => .menu, + .class => .class, + else => unreachable, + } }, + }); + } + // The Win32 RC compiler miscompiles the value in the following scenario: + // Multiple CLASS parameters are specified and any of them are treated as a number, then + // the last CLASS is always treated as a number no matter what + if (last_class_would_be_forced_ordinal and optional_statement_values.class.? == .name) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_class.value); + const ordinal_value = res.ForcedOrdinal.fromUtf16Le(optional_statement_values.class.?.name); + + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_class, + .type = .warning, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_class, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .menu_or_class = .class }, + }); + } + // The Win32 RC compiler miscompiles the id in two different scenarios: + // 1. The first character of the ID is a digit, in which case it is always treated as a number + // no matter what (and therefore does not match how the MENU/MENUEX id is parsed) + // 2. Multiple MENU parameters are specified and any of them are treated as a number, then + // the last MENU is always treated as a number no matter what + if ((last_menu_would_be_forced_ordinal or last_menu_has_digit_as_first_char) and optional_statement_values.menu.? == .name) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value); + const token_slice = literal_node.token.slice(self.source); + const bytes = SourceBytes{ + .slice = token_slice, + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const ordinal_value = res.ForcedOrdinal.fromBytes(bytes); + + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id, + .type = .warning, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + if (last_menu_would_be_forced_ordinal) { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .menu_or_class = .menu }, + }); + } else { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id_starts_with_digit, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + }); + } + } + // The MENU id parsing uses the exact same logic as the MENU/MENUEX resource id parsing, + // which means that it will convert ASCII characters to uppercase during the 'name' parsing. + // This turns out not to matter (`LoadMenu` does a case-insensitive lookup anyway), + // but it still makes sense to share the uppercasing logic since the MENU parameter + // here is just a reference to a MENU/MENUEX id within the .exe. + // So, because this is an intentional but inconsequential-to-the-user difference + // between resinator and the Win32 RC compiler, we only emit a hint instead of + // a warning. + if (last_menu_did_uppercase) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value); + try self.addErrorDetails(.{ + .err = .dialog_menu_id_was_uppercased, + .type = .hint, + .token = literal_node.token, + }); + } + + const x = evaluateNumberExpression(node.x, self.source, self.input_code_pages); + const y = evaluateNumberExpression(node.y, self.source, self.input_code_pages); + const width = evaluateNumberExpression(node.width, self.source, self.input_code_pages); + const height = evaluateNumberExpression(node.height, self.source, self.input_code_pages); + + // FONT statement requires DS_SETFONT, and if it's not present DS_SETFRONT must be unset + if (optional_statement_values.font) |_| { + optional_statement_values.style |= res.DS.SETFONT; + } else { + optional_statement_values.style &= ~res.DS.SETFONT; + } + // CAPTION statement implies WS_CAPTION + if (optional_statement_values.caption) |_| { + optional_statement_values.style |= res.WS.CAPTION; + } + + self.writeDialogHeaderAndStrings( + node, + data_writer, + resource, + &optional_statement_values, + x, + y, + width, + height, + ) catch |err| switch (err) { + // Dialog header and menu/class/title strings can never exceed u32 bytes + // on their own, so this error is unreachable. + error.NoSpaceLeft => unreachable, + else => |e| return e, + }; + + var controls_by_id = std.AutoHashMap(u32, *const Node.ControlStatement).init(self.allocator); + // Number of controls are guaranteed by the parser to be within maxInt(u16). + try controls_by_id.ensureTotalCapacity(@as(u16, @intCast(node.controls.len))); + defer controls_by_id.deinit(); + + for (node.controls) |control_node| { + const control = @fieldParentPtr(Node.ControlStatement, "base", control_node); + + self.writeDialogControl( + control, + data_writer, + resource, + // We know the data_buffer len is limited to u32 max. + @intCast(data_buffer.items.len), + &controls_by_id, + ) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .type = .note, + .token = control.type, + }); + }, + else => |e| return e, + }; + } + + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + fn writeDialogHeaderAndStrings( + self: *Compiler, + node: *Node.Dialog, + data_writer: anytype, + resource: Resource, + optional_statement_values: *const DialogOptionalStatementValues, + x: Number, + y: Number, + width: Number, + height: Number, + ) !void { + // Header + if (resource == .dialogex) { + const help_id: u32 = help_id: { + if (node.help_id == null) break :help_id 0; + break :help_id evaluateNumberExpression(node.help_id.?, self.source, self.input_code_pages).value; + }; + try data_writer.writeIntLittle(u16, 1); // version number, always 1 + try data_writer.writeIntLittle(u16, 0xFFFF); // signature, always 0xFFFF + try data_writer.writeIntLittle(u32, help_id); + try data_writer.writeIntLittle(u32, optional_statement_values.exstyle); + try data_writer.writeIntLittle(u32, optional_statement_values.style); + } else { + try data_writer.writeIntLittle(u32, optional_statement_values.style); + try data_writer.writeIntLittle(u32, optional_statement_values.exstyle); + } + // This limit is enforced by the parser, so we know the number of controls + // is within the range of a u16. + try data_writer.writeIntLittle(u16, @as(u16, @intCast(node.controls.len))); + try data_writer.writeIntLittle(u16, x.asWord()); + try data_writer.writeIntLittle(u16, y.asWord()); + try data_writer.writeIntLittle(u16, width.asWord()); + try data_writer.writeIntLittle(u16, height.asWord()); + + // Menu + if (optional_statement_values.menu) |menu| { + try menu.write(data_writer); + } else { + try data_writer.writeIntLittle(u16, 0); + } + // Class + if (optional_statement_values.class) |class| { + try class.write(data_writer); + } else { + try data_writer.writeIntLittle(u16, 0); + } + // Caption + if (optional_statement_values.caption) |caption| { + const parsed = try self.parseQuotedStringAsWideString(caption); + defer self.allocator.free(parsed); + try data_writer.writeAll(std.mem.sliceAsBytes(parsed[0 .. parsed.len + 1])); + } else { + try data_writer.writeIntLittle(u16, 0); + } + // Font + if (optional_statement_values.font) |font| { + try self.writeDialogFont(resource, font, data_writer); + } + } + + fn writeDialogControl( + self: *Compiler, + control: *Node.ControlStatement, + data_writer: anytype, + resource: Resource, + bytes_written_so_far: u32, + controls_by_id: *std.AutoHashMap(u32, *const Node.ControlStatement), + ) !void { + const control_type = rc.Control.map.get(control.type.slice(self.source)).?; + + // Each control must be at a 4-byte boundary. However, the Windows RC + // compiler will miscompile controls if their extra data ends on an odd offset. + // We will avoid the miscompilation and emit a warning. + const num_padding = numPaddingBytesNeeded(bytes_written_so_far); + if (num_padding == 1 or num_padding == 3) { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_padding, + .type = .warning, + .token = control.type, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_padding, + .type = .note, + .print_source_line = false, + .token = control.type, + }); + } + try data_writer.writeByteNTimes(0, num_padding); + + var style = if (control.style) |style_expression| + // Certain styles are implied by the control type + evaluateFlagsExpressionWithDefault(res.ControlClass.getImpliedStyle(control_type), style_expression, self.source, self.input_code_pages) + else + res.ControlClass.getImpliedStyle(control_type); + + var exstyle = if (control.exstyle) |exstyle_expression| + evaluateFlagsExpressionWithDefault(0, exstyle_expression, self.source, self.input_code_pages) + else + 0; + + switch (resource) { + .dialog => { + // Note: Reverse order from DIALOGEX + try data_writer.writeIntLittle(u32, style); + try data_writer.writeIntLittle(u32, exstyle); + }, + .dialogex => { + const help_id: u32 = if (control.help_id) |help_id_expression| + evaluateNumberExpression(help_id_expression, self.source, self.input_code_pages).value + else + 0; + try data_writer.writeIntLittle(u32, help_id); + // Note: Reverse order from DIALOG + try data_writer.writeIntLittle(u32, exstyle); + try data_writer.writeIntLittle(u32, style); + }, + else => unreachable, + } + + const control_x = evaluateNumberExpression(control.x, self.source, self.input_code_pages); + const control_y = evaluateNumberExpression(control.y, self.source, self.input_code_pages); + const control_width = evaluateNumberExpression(control.width, self.source, self.input_code_pages); + const control_height = evaluateNumberExpression(control.height, self.source, self.input_code_pages); + + try data_writer.writeIntLittle(u16, control_x.asWord()); + try data_writer.writeIntLittle(u16, control_y.asWord()); + try data_writer.writeIntLittle(u16, control_width.asWord()); + try data_writer.writeIntLittle(u16, control_height.asWord()); + + const control_id = evaluateNumberExpression(control.id, self.source, self.input_code_pages); + switch (resource) { + .dialog => try data_writer.writeIntLittle(u16, control_id.asWord()), + .dialogex => try data_writer.writeIntLittle(u32, control_id.value), + else => unreachable, + } + + const control_id_for_map: u32 = switch (resource) { + .dialog => control_id.asWord(), + .dialogex => control_id.value, + else => unreachable, + }; + const result = controls_by_id.getOrPutAssumeCapacity(control_id_for_map); + if (result.found_existing) { + if (!self.silent_duplicate_control_ids) { + try self.addErrorDetails(.{ + .err = .control_id_already_defined, + .type = .warning, + .token = control.id.getFirstToken(), + .token_span_end = control.id.getLastToken(), + .extra = .{ .number = control_id_for_map }, + }); + try self.addErrorDetails(.{ + .err = .control_id_already_defined, + .type = .note, + .token = result.value_ptr.*.id.getFirstToken(), + .token_span_end = result.value_ptr.*.id.getLastToken(), + .extra = .{ .number = control_id_for_map }, + }); + } + } else { + result.value_ptr.* = control; + } + + if (res.ControlClass.fromControl(control_type)) |control_class| { + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } else { + const class_node = control.class.?; + if (class_node.isNumberExpression()) { + const number = evaluateNumberExpression(class_node, self.source, self.input_code_pages); + const ordinal = NameOrOrdinal{ .ordinal = number.asWord() }; + // This is different from how the Windows RC compiles ordinals here, + // but I think that's a miscompilation/bug of the Windows implementation. + // The Windows behavior is (where LSB = least significant byte): + // - If the LSB is 0x00 => 0xFFFF0000 + // - If the LSB is < 0x80 => 0x000000 + // - If the LSB is >= 0x80 => 0x0000FF + // + // Because of this, we emit a warning about the potential miscompilation + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_class_ordinal, + .type = .warning, + .token = class_node.getFirstToken(), + .token_span_end = class_node.getLastToken(), + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_class_ordinal, + .type = .note, + .print_source_line = false, + .token = class_node.getFirstToken(), + .token_span_end = class_node.getLastToken(), + }); + // And then write out the ordinal using a proper a NameOrOrdinal encoding. + try ordinal.write(data_writer); + } else if (class_node.isStringLiteral()) { + const literal_node = @fieldParentPtr(Node.Literal, "base", class_node); + const parsed = try self.parseQuotedStringAsWideString(literal_node.token); + defer self.allocator.free(parsed); + if (rc.ControlClass.fromWideString(parsed)) |control_class| { + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } else { + // NUL acts as a terminator + // TODO: Maybe warn when parsed_terminated.len != parsed.len, since + // it seems unlikely that NUL-termination is something intentional + const parsed_terminated = std.mem.sliceTo(parsed, 0); + const name = NameOrOrdinal{ .name = parsed_terminated }; + try name.write(data_writer); + } + } else { + const literal_node = @fieldParentPtr(Node.Literal, "base", class_node); + const literal_slice = literal_node.token.slice(self.source); + // This succeeding is guaranteed by the parser + const control_class = rc.ControlClass.map.get(literal_slice) orelse unreachable; + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } + } + + if (control.text) |text_token| { + const bytes = SourceBytes{ + .slice = text_token.slice(self.source), + .code_page = self.input_code_pages.getForToken(text_token), + }; + if (text_token.isStringLiteral()) { + const text = try self.parseQuotedStringAsWideString(text_token); + defer self.allocator.free(text); + const name = NameOrOrdinal{ .name = text }; + try name.write(data_writer); + } else { + std.debug.assert(text_token.id == .number); + const number = literals.parseNumberLiteral(bytes); + const ordinal = NameOrOrdinal{ .ordinal = number.asWord() }; + try ordinal.write(data_writer); + } + } else { + try NameOrOrdinal.writeEmpty(data_writer); + } + + var extra_data_buf = std.ArrayList(u8).init(self.allocator); + defer extra_data_buf.deinit(); + // The extra data byte length must be able to fit within a u16. + var limited_extra_data_writer = limitedWriter(extra_data_buf.writer(), std.math.maxInt(u16)); + const extra_data_writer = limited_extra_data_writer.writer(); + for (control.extra_data) |data_expression| { + const data = try self.evaluateDataExpression(data_expression); + defer data.deinit(self.allocator); + data.write(extra_data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .control_extra_data_size_exceeds_max, + .token = control.type, + }); + return self.addErrorDetailsAndFail(.{ + .err = .control_extra_data_size_exceeds_max, + .type = .note, + .token = data_expression.getFirstToken(), + .token_span_end = data_expression.getLastToken(), + }); + }, + else => |e| return e, + }; + } + // We know the extra_data_buf size fits within a u16. + const extra_data_size: u16 = @intCast(extra_data_buf.items.len); + try data_writer.writeIntLittle(u16, extra_data_size); + try data_writer.writeAll(extra_data_buf.items); + } + + pub fn writeToolbar(self: *Compiler, node: *Node.Toolbar, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + const data_writer = data_buffer.writer(); + + const button_width = evaluateNumberExpression(node.button_width, self.source, self.input_code_pages); + const button_height = evaluateNumberExpression(node.button_height, self.source, self.input_code_pages); + + // I'm assuming this is some sort of version + // TODO: Try to find something mentioning this + try data_writer.writeIntLittle(u16, 1); + try data_writer.writeIntLittle(u16, button_width.asWord()); + try data_writer.writeIntLittle(u16, button_height.asWord()); + try data_writer.writeIntLittle(u16, @as(u16, @intCast(node.buttons.len))); + + for (node.buttons) |button_or_sep| { + switch (button_or_sep.id) { + .literal => { // This is always SEPARATOR + std.debug.assert(button_or_sep.cast(.literal).?.token.id == .literal); + try data_writer.writeIntLittle(u16, 0); + }, + .simple_statement => { + const value_node = button_or_sep.cast(.simple_statement).?.value; + const value = evaluateNumberExpression(value_node, self.source, self.input_code_pages); + try data_writer.writeIntLittle(u16, value.asWord()); + }, + else => unreachable, // This is a bug in the parser + } + } + + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Weight and italic carry over from previous FONT statements within a single resource, + /// so they need to be parsed ahead-of-time and stored + const FontStatementValues = struct { + weight: u16 = 0, + italic: bool = false, + node: *Node.FontStatement, + }; + + pub fn writeDialogFont(self: *Compiler, resource: Resource, values: FontStatementValues, writer: anytype) !void { + const node = values.node; + const point_size = evaluateNumberExpression(node.point_size, self.source, self.input_code_pages); + try writer.writeIntLittle(u16, point_size.asWord()); + + if (resource == .dialogex) { + try writer.writeIntLittle(u16, values.weight); + } + + if (resource == .dialogex) { + try writer.writeIntLittle(u8, @intFromBool(values.italic)); + } + + if (node.char_set) |char_set| { + const value = evaluateNumberExpression(char_set, self.source, self.input_code_pages); + try writer.writeIntLittle(u8, @as(u8, @truncate(value.value))); + } else if (resource == .dialogex) { + try writer.writeIntLittle(u8, 1); // DEFAULT_CHARSET + } + + const typeface = try self.parseQuotedStringAsWideString(node.typeface); + defer self.allocator.free(typeface); + try writer.writeAll(std.mem.sliceAsBytes(typeface[0 .. typeface.len + 1])); + } + + pub fn writeMenu(self: *Compiler, node: *Node.Menu, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + const type_bytes = SourceBytes{ + .slice = node.type.slice(self.source), + .code_page = self.input_code_pages.getForToken(node.type), + }; + const resource = Resource.fromString(type_bytes); + std.debug.assert(resource == .menu or resource == .menuex); + + self.writeMenuData(node, data_writer, resource) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft + pub fn writeMenuData(self: *Compiler, node: *Node.Menu, data_writer: anytype, resource: Resource) !void { + // menu header + const version: u16 = if (resource == .menu) 0 else 1; + try data_writer.writeIntLittle(u16, version); + const header_size: u16 = if (resource == .menu) 0 else 4; + try data_writer.writeIntLittle(u16, header_size); // cbHeaderSize + // Note: There can be extra bytes at the end of this header (`rgbExtra`), + // but they are always zero-length for us, so we don't write anything + // (the length of the rgbExtra field is inferred from the header_size). + // MENU => rgbExtra: [cbHeaderSize]u8 + // MENUEX => rgbExtra: [cbHeaderSize-4]u8 + + if (resource == .menuex) { + if (node.help_id) |help_id_node| { + const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages); + try data_writer.writeIntLittle(u32, help_id.value); + } else { + try data_writer.writeIntLittle(u32, 0); + } + } + + for (node.items, 0..) |item, i| { + const is_last = i == node.items.len - 1; + try self.writeMenuItem(item, data_writer, is_last); + } + } + + pub fn writeMenuItem(self: *Compiler, node: *Node, writer: anytype, is_last_of_parent: bool) !void { + switch (node.id) { + .menu_item_separator => { + // This is the 'alternate compability form' of the separator, see + // https://devblogs.microsoft.com/oldnewthing/20080710-00/?p=21673 + // + // The 'correct' way is to set the MF_SEPARATOR flag, but the Win32 RC + // compiler still uses this alternate form, so that's what we use too. + var flags = res.MenuItemFlags{}; + if (is_last_of_parent) flags.markLast(); + try writer.writeIntLittle(u16, flags.value); + try writer.writeIntLittle(u16, 0); // id + try writer.writeIntLittle(u16, 0); // null-terminated UTF-16 text + }, + .menu_item => { + const menu_item = @fieldParentPtr(Node.MenuItem, "base", node); + var flags = res.MenuItemFlags{}; + for (menu_item.option_list) |option_token| { + // This failing would be a bug in the parser + const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable; + flags.apply(option); + } + if (is_last_of_parent) flags.markLast(); + try writer.writeIntLittle(u16, flags.value); + + var result = evaluateNumberExpression(menu_item.result, self.source, self.input_code_pages); + try writer.writeIntLittle(u16, result.asWord()); + + var text = try self.parseQuotedStringAsWideString(menu_item.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + }, + .popup => { + const popup = @fieldParentPtr(Node.Popup, "base", node); + var flags = res.MenuItemFlags{ .value = res.MF.POPUP }; + for (popup.option_list) |option_token| { + // This failing would be a bug in the parser + const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable; + flags.apply(option); + } + if (is_last_of_parent) flags.markLast(); + try writer.writeIntLittle(u16, flags.value); + + var text = try self.parseQuotedStringAsWideString(popup.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + + for (popup.items, 0..) |item, i| { + const is_last = i == popup.items.len - 1; + try self.writeMenuItem(item, writer, is_last); + } + }, + inline .menu_item_ex, .popup_ex => |node_type| { + const menu_item = @fieldParentPtr(node_type.Type(), "base", node); + + if (menu_item.type) |flags| { + const value = evaluateNumberExpression(flags, self.source, self.input_code_pages); + try writer.writeIntLittle(u32, value.value); + } else { + try writer.writeIntLittle(u32, 0); + } + + if (menu_item.state) |state| { + const value = evaluateNumberExpression(state, self.source, self.input_code_pages); + try writer.writeIntLittle(u32, value.value); + } else { + try writer.writeIntLittle(u32, 0); + } + + if (menu_item.id) |id| { + const value = evaluateNumberExpression(id, self.source, self.input_code_pages); + try writer.writeIntLittle(u32, value.value); + } else { + try writer.writeIntLittle(u32, 0); + } + + var flags: u16 = 0; + if (is_last_of_parent) flags |= comptime @as(u16, @intCast(res.MF.END)); + // This constant doesn't seem to have a named #define, it's different than MF_POPUP + if (node_type == .popup_ex) flags |= 0x01; + try writer.writeIntLittle(u16, flags); + + var text = try self.parseQuotedStringAsWideString(menu_item.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + + // Only the combination of the flags u16 and the text bytes can cause + // non-DWORD alignment, so we can just use the byte length of those + // two values to realign to DWORD alignment. + const relevant_bytes = 2 + (text.len + 1) * 2; + try writeDataPadding(writer, @intCast(relevant_bytes)); + + if (node_type == .popup_ex) { + if (menu_item.help_id) |help_id_node| { + const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages); + try writer.writeIntLittle(u32, help_id.value); + } else { + try writer.writeIntLittle(u32, 0); + } + + for (menu_item.items, 0..) |item, i| { + const is_last = i == menu_item.items.len - 1; + try self.writeMenuItem(item, writer, is_last); + } + } + }, + else => unreachable, + } + } + + pub fn writeVersionInfo(self: *Compiler, node: *Node.VersionInfo, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The node's length field (which is inclusive of the length of all of its children) is a u16 + // so limit the node's data size so that we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u16)); + const data_writer = limited_writer.writer(); + + try data_writer.writeIntLittle(u16, 0); // placeholder size + try data_writer.writeIntLittle(u16, res.FixedFileInfo.byte_len); + try data_writer.writeIntLittle(u16, res.VersionNode.type_binary); + const key_bytes = std.mem.sliceAsBytes(res.FixedFileInfo.key[0 .. res.FixedFileInfo.key.len + 1]); + try data_writer.writeAll(key_bytes); + // The number of bytes written up to this point is always the same, since the name + // of the node is a constant (FixedFileInfo.key). The total number of bytes + // written so far is 38, so we need 2 padding bytes to get back to DWORD alignment + try data_writer.writeIntLittle(u16, 0); + + var fixed_file_info = res.FixedFileInfo{}; + for (node.fixed_info) |fixed_info| { + switch (fixed_info.id) { + .version_statement => { + const version_statement = @fieldParentPtr(Node.VersionStatement, "base", fixed_info); + const version_type = rc.VersionInfo.map.get(version_statement.type.slice(self.source)).?; + + // Ensure that all parts are cleared for each version, to properly account for + // potential duplicate PRODUCTVERSION/FILEVERSION statements + switch (version_type) { + .file_version => @memset(&fixed_file_info.file_version.parts, 0), + .product_version => @memset(&fixed_file_info.product_version.parts, 0), + else => unreachable, + } + + for (version_statement.parts, 0..) |part, i| { + const part_value = evaluateNumberExpression(part, self.source, self.input_code_pages); + if (part_value.is_long) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = part.getFirstToken(), + .token_span_end = part.getLastToken(), + .extra = .{ .statement_with_u16_param = switch (version_type) { + .file_version => .fileversion, + .product_version => .productversion, + else => unreachable, + } }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = part.getFirstToken(), + .token_span_end = part.getLastToken(), + .extra = .{ .statement_with_u16_param = switch (version_type) { + .file_version => .fileversion, + .product_version => .productversion, + else => unreachable, + } }, + }); + } + switch (version_type) { + .file_version => { + fixed_file_info.file_version.parts[i] = part_value.asWord(); + }, + .product_version => { + fixed_file_info.product_version.parts[i] = part_value.asWord(); + }, + else => unreachable, + } + } + }, + .simple_statement => { + const statement = @fieldParentPtr(Node.SimpleStatement, "base", fixed_info); + const statement_type = rc.VersionInfo.map.get(statement.identifier.slice(self.source)).?; + const value = evaluateNumberExpression(statement.value, self.source, self.input_code_pages); + switch (statement_type) { + .file_flags_mask => fixed_file_info.file_flags_mask = value.value, + .file_flags => fixed_file_info.file_flags = value.value, + .file_os => fixed_file_info.file_os = value.value, + .file_type => fixed_file_info.file_type = value.value, + .file_subtype => fixed_file_info.file_subtype = value.value, + else => unreachable, + } + }, + else => unreachable, + } + } + try fixed_file_info.write(data_writer); + + for (node.block_statements) |statement| { + self.writeVersionNode(statement, data_writer, &data_buffer) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .version_node_size_exceeds_max, + .token = node.id, + }); + return self.addErrorDetailsAndFail(.{ + .err = .version_node_size_exceeds_max, + .type = .note, + .token = statement.getFirstToken(), + .token_span_end = statement.getLastToken(), + }); + }, + else => |e| return e, + }; + } + + // We know that data_buffer.items.len is within the limits of a u16, since we + // limited the writer to maxInt(u16) + const data_size: u16 = @intCast(data_buffer.items.len); + // And now that we know the full size of this node (including its children), set its size + std.mem.writeIntLittle(u16, data_buffer.items[0..2], data_size); + + var header = try self.resourceHeader(node.id, node.versioninfo, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects writer to be a LimitedWriter limited to u16, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft, and that buf.items.len + /// will never be able to exceed maxInt(u16). + pub fn writeVersionNode(self: *Compiler, node: *Node, writer: anytype, buf: *std.ArrayList(u8)) !void { + // We can assume that buf.items.len will never be able to exceed the limits of a u16 + try writeDataPadding(writer, @as(u16, @intCast(buf.items.len))); + + const node_and_children_size_offset = buf.items.len; + try writer.writeIntLittle(u16, 0); // placeholder for size + const data_size_offset = buf.items.len; + try writer.writeIntLittle(u16, 0); // placeholder for data size + const data_type_offset = buf.items.len; + // Data type is string unless the node contains values that are numbers. + try writer.writeIntLittle(u16, res.VersionNode.type_string); + + switch (node.id) { + inline .block, .block_value => |node_type| { + const block_or_value = @fieldParentPtr(node_type.Type(), "base", node); + const parsed_key = try self.parseQuotedStringAsWideString(block_or_value.key); + defer self.allocator.free(parsed_key); + + const parsed_key_to_first_null = std.mem.sliceTo(parsed_key, 0); + try writer.writeAll(std.mem.sliceAsBytes(parsed_key_to_first_null[0 .. parsed_key_to_first_null.len + 1])); + + var has_number_value: bool = false; + for (block_or_value.values) |value_value_node_uncasted| { + const value_value_node = value_value_node_uncasted.cast(.block_value_value).?; + if (value_value_node.expression.isNumberExpression()) { + has_number_value = true; + break; + } + } + // The units used here are dependent on the type. If there are any numbers, then + // this is a byte count. If there are only strings, then this is a count of + // UTF-16 code units. + // + // The Win32 RC compiler miscompiles this count in the case of values that + // have a mix of numbers and strings. This is detected and a warning is emitted + // during parsing, so we can just do the correct thing here. + var values_size: usize = 0; + + try writeDataPadding(writer, @intCast(buf.items.len)); + + for (block_or_value.values, 0..) |value_value_node_uncasted, i| { + const value_value_node = value_value_node_uncasted.cast(.block_value_value).?; + const value_node = value_value_node.expression; + if (value_node.isNumberExpression()) { + const number = evaluateNumberExpression(value_node, self.source, self.input_code_pages); + // This is used to write u16 or u32 depending on the number's suffix + const data_wrapper = Data{ .number = number }; + try data_wrapper.write(writer); + // Numbers use byte count + values_size += if (number.is_long) 4 else 2; + } else { + std.debug.assert(value_node.isStringLiteral()); + const literal_node = value_node.cast(.literal).?; + const parsed_value = try self.parseQuotedStringAsWideString(literal_node.token); + defer self.allocator.free(parsed_value); + + const parsed_to_first_null = std.mem.sliceTo(parsed_value, 0); + try writer.writeAll(std.mem.sliceAsBytes(parsed_to_first_null)); + // Strings use UTF-16 code-unit count including the null-terminator, but + // only if there are no number values in the list. + var value_size = parsed_to_first_null.len; + if (has_number_value) value_size *= 2; // 2 bytes per UTF-16 code unit + values_size += value_size; + // The null-terminator is only included if there's a trailing comma + // or this is the last value. If the value evaluates to empty, then + // it never gets a null terminator. If there was an explicit null-terminator + // in the string, we still need to potentially add one since we already + // sliced to the terminator. + const is_last = i == block_or_value.values.len - 1; + const is_empty = parsed_to_first_null.len == 0; + const is_only = block_or_value.values.len == 1; + if ((!is_empty or !is_only) and (is_last or value_value_node.trailing_comma)) { + try writer.writeIntLittle(u16, 0); + values_size += if (has_number_value) 2 else 1; + } + } + } + var data_size_slice = buf.items[data_size_offset..]; + std.mem.writeIntLittle(u16, data_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(values_size))); + + if (has_number_value) { + const data_type_slice = buf.items[data_type_offset..]; + std.mem.writeIntLittle(u16, data_type_slice[0..@sizeOf(u16)], res.VersionNode.type_binary); + } + + if (node_type == .block) { + const block = block_or_value; + for (block.children) |child| { + try self.writeVersionNode(child, writer, buf); + } + } + }, + else => unreachable, + } + + const node_and_children_size = buf.items.len - node_and_children_size_offset; + const node_and_children_size_slice = buf.items[node_and_children_size_offset..]; + std.mem.writeIntLittle(u16, node_and_children_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(node_and_children_size))); + } + + pub fn writeStringTable(self: *Compiler, node: *Node.StringTable) !void { + const language = getLanguageFromOptionalStatements(node.optional_statements, self.source, self.input_code_pages) orelse self.state.language; + + for (node.strings) |string_node| { + const string = @fieldParentPtr(Node.StringTableString, "base", string_node); + const string_id_data = try self.evaluateDataExpression(string.id); + const string_id = string_id_data.number.asWord(); + + self.state.string_tables.set( + self.arena, + language, + string_id, + string.string, + &node.base, + self.source, + self.input_code_pages, + self.state.version, + self.state.characteristics, + ) catch |err| switch (err) { + error.StringAlreadyDefined => { + // It might be nice to have these errors point to the ids rather than the + // string tokens, but that would mean storing the id token of each string + // which doesn't seem worth it just for slightly better error messages. + try self.addErrorDetails(ErrorDetails{ + .err = .string_already_defined, + .token = string.string, + .extra = .{ .string_and_language = .{ .id = string_id, .language = language } }, + }); + const existing_def_table = self.state.string_tables.tables.getPtr(language).?; + const existing_definition = existing_def_table.get(string_id).?; + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .string_already_defined, + .type = .note, + .token = existing_definition, + .extra = .{ .string_and_language = .{ .id = string_id, .language = language } }, + }); + }, + error.OutOfMemory => |e| return e, + }; + } + } + + /// Expects this to be a top-level LANGUAGE statement + pub fn writeLanguageStatement(self: *Compiler, node: *Node.LanguageStatement) void { + const primary = Compiler.evaluateNumberExpression(node.primary_language_id, self.source, self.input_code_pages); + const sublanguage = Compiler.evaluateNumberExpression(node.sublanguage_id, self.source, self.input_code_pages); + self.state.language.primary_language_id = @truncate(primary.value); + self.state.language.sublanguage_id = @truncate(sublanguage.value); + } + + /// Expects this to be a top-level VERSION or CHARACTERISTICS statement + pub fn writeTopLevelSimpleStatement(self: *Compiler, node: *Node.SimpleStatement) void { + const value = Compiler.evaluateNumberExpression(node.value, self.source, self.input_code_pages); + const statement_type = rc.TopLevelKeywords.map.get(node.identifier.slice(self.source)).?; + switch (statement_type) { + .characteristics => self.state.characteristics = value.value, + .version => self.state.version = value.value, + else => unreachable, + } + } + + pub const ResourceHeaderOptions = struct { + language: ?res.Language = null, + data_size: DWORD = 0, + }; + + pub fn resourceHeader(self: *Compiler, id_token: Token, type_token: Token, options: ResourceHeaderOptions) !ResourceHeader { + const id_bytes = self.sourceBytesForToken(id_token); + const type_bytes = self.sourceBytesForToken(type_token); + return ResourceHeader.init( + self.allocator, + id_bytes, + type_bytes, + options.data_size, + options.language orelse self.state.language, + self.state.version, + self.state.characteristics, + ) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.TypeNonAsciiOrdinal => { + const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes).?; + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = type_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = type_token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + }, + error.IdNonAsciiOrdinal => { + const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes).?; + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = id_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = id_token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + }, + }; + } + + pub const ResourceHeader = struct { + name_value: NameOrOrdinal, + type_value: NameOrOrdinal, + language: res.Language, + memory_flags: MemoryFlags, + data_size: DWORD, + version: DWORD, + characteristics: DWORD, + data_version: DWORD = 0, + + pub const InitError = error{ OutOfMemory, IdNonAsciiOrdinal, TypeNonAsciiOrdinal }; + + pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) InitError!ResourceHeader { + const type_value = type: { + const resource_type = Resource.fromString(type_bytes); + if (res.RT.fromResource(resource_type)) |rt_constant| { + break :type NameOrOrdinal{ .ordinal = @intFromEnum(rt_constant) }; + } else { + break :type try NameOrOrdinal.fromString(allocator, type_bytes); + } + }; + errdefer type_value.deinit(allocator); + if (type_value == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes)) |_| { + return error.TypeNonAsciiOrdinal; + } + } + + const name_value = try NameOrOrdinal.fromString(allocator, id_bytes); + errdefer name_value.deinit(allocator); + if (name_value == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes)) |_| { + return error.IdNonAsciiOrdinal; + } + } + + const predefined_resource_type = type_value.predefinedResourceType(); + + return ResourceHeader{ + .name_value = name_value, + .type_value = type_value, + .data_size = data_size, + .memory_flags = MemoryFlags.defaults(predefined_resource_type), + .language = language, + .version = version, + .characteristics = characteristics, + }; + } + + pub fn deinit(self: ResourceHeader, allocator: Allocator) void { + self.name_value.deinit(allocator); + self.type_value.deinit(allocator); + } + + pub const SizeInfo = struct { + bytes: u32, + padding_after_name: u2, + }; + + fn calcSize(self: ResourceHeader) error{Overflow}!SizeInfo { + var header_size: u32 = 8; + header_size = try std.math.add( + u32, + header_size, + std.math.cast(u32, self.name_value.byteLen()) orelse return error.Overflow, + ); + header_size = try std.math.add( + u32, + header_size, + std.math.cast(u32, self.type_value.byteLen()) orelse return error.Overflow, + ); + const padding_after_name = numPaddingBytesNeeded(header_size); + header_size = try std.math.add(u32, header_size, padding_after_name); + header_size = try std.math.add(u32, header_size, 16); + return .{ .bytes = header_size, .padding_after_name = padding_after_name }; + } + + pub fn writeAssertNoOverflow(self: ResourceHeader, writer: anytype) !void { + return self.writeSizeInfo(writer, self.calcSize() catch unreachable); + } + + pub fn write(self: ResourceHeader, writer: anytype, err_ctx: errors.DiagnosticsContext) !void { + const size_info = self.calcSize() catch { + try err_ctx.diagnostics.append(.{ + .err = .resource_data_size_exceeds_max, + .token = err_ctx.token, + }); + return error.CompileError; + }; + return self.writeSizeInfo(writer, size_info); + } + + fn writeSizeInfo(self: ResourceHeader, writer: anytype, size_info: SizeInfo) !void { + try writer.writeIntLittle(DWORD, self.data_size); // DataSize + try writer.writeIntLittle(DWORD, size_info.bytes); // HeaderSize + try self.type_value.write(writer); // TYPE + try self.name_value.write(writer); // NAME + try writer.writeByteNTimes(0, size_info.padding_after_name); + + try writer.writeIntLittle(DWORD, self.data_version); // DataVersion + try writer.writeIntLittle(WORD, self.memory_flags.value); // MemoryFlags + try writer.writeIntLittle(WORD, self.language.asInt()); // LanguageId + try writer.writeIntLittle(DWORD, self.version); // Version + try writer.writeIntLittle(DWORD, self.characteristics); // Characteristics + } + + pub fn predefinedResourceType(self: ResourceHeader) ?res.RT { + return self.type_value.predefinedResourceType(); + } + + pub fn applyMemoryFlags(self: *ResourceHeader, tokens: []Token, source: []const u8) void { + applyToMemoryFlags(&self.memory_flags, tokens, source); + } + + pub fn applyOptionalStatements(self: *ResourceHeader, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void { + applyToOptionalStatements(&self.language, &self.version, &self.characteristics, statements, source, code_page_lookup); + } + }; + + fn applyToMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void { + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags.set(attribute); + } + } + + /// RT_GROUP_ICON and RT_GROUP_CURSOR have their own special rules for memory flags + fn applyToGroupMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void { + // There's probably a cleaner implementation of this, but this will result in the same + // flags as the Win32 RC compiler for all 986,410 K-permutations of memory flags + // for an ICON resource. + // + // This was arrived at by iterating over the permutations and creating a + // list where each line looks something like this: + // MOVEABLE PRELOAD -> 0x1050 (MOVEABLE|PRELOAD|DISCARDABLE) + // + // and then noticing a few things: + + // 1. Any permutation that does not have PRELOAD in it just uses the + // default flags. + const initial_flags = flags.*; + var flags_set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty(); + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags_set.insert(attribute); + } + if (!flags_set.contains(.preload)) return; + + // 2. Any permutation of flags where applying only the PRELOAD and LOADONCALL flags + // results in no actual change by the end will just use the default flags. + // For example, `PRELOAD LOADONCALL` will result in default flags, but + // `LOADONCALL PRELOAD` will have PRELOAD set after they are both applied in order. + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + switch (attribute) { + .preload, .loadoncall => flags.set(attribute), + else => {}, + } + } + if (flags.value == initial_flags.value) return; + + // 3. If none of DISCARDABLE, SHARED, or PURE is specified, then PRELOAD + // implies `flags &= ~SHARED` and LOADONCALL implies `flags |= SHARED` + const shared_set = comptime blk: { + var set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty(); + set.insert(.discardable); + set.insert(.shared); + set.insert(.pure); + break :blk set; + }; + const discardable_shared_or_pure_specified = flags_set.intersectWith(shared_set).count() != 0; + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags.setGroup(attribute, !discardable_shared_or_pure_specified); + } + } + + /// Only handles the 'base' optional statements that are shared between resource types. + fn applyToOptionalStatements(language: *res.Language, version: *u32, characteristics: *u32, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void { + for (statements) |node| switch (node.id) { + .language_statement => { + const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node); + language.* = languageFromLanguageStatement(language_statement, source, code_page_lookup); + }, + .simple_statement => { + const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", node); + const statement_type = rc.OptionalStatements.map.get(simple_statement.identifier.slice(source)) orelse continue; + const result = Compiler.evaluateNumberExpression(simple_statement.value, source, code_page_lookup); + switch (statement_type) { + .version => version.* = result.value, + .characteristics => characteristics.* = result.value, + else => unreachable, // only VERSION and CHARACTERISTICS should be in an optional statements list + } + }, + else => {}, + }; + } + + pub fn languageFromLanguageStatement(language_statement: *const Node.LanguageStatement, source: []const u8, code_page_lookup: *const CodePageLookup) res.Language { + const primary = Compiler.evaluateNumberExpression(language_statement.primary_language_id, source, code_page_lookup); + const sublanguage = Compiler.evaluateNumberExpression(language_statement.sublanguage_id, source, code_page_lookup); + return .{ + .primary_language_id = @truncate(primary.value), + .sublanguage_id = @truncate(sublanguage.value), + }; + } + + pub fn getLanguageFromOptionalStatements(statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) ?res.Language { + for (statements) |node| switch (node.id) { + .language_statement => { + const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node); + return languageFromLanguageStatement(language_statement, source, code_page_lookup); + }, + else => continue, + }; + return null; + } + + pub fn writeEmptyResource(writer: anytype) !void { + const header = ResourceHeader{ + .name_value = .{ .ordinal = 0 }, + .type_value = .{ .ordinal = 0 }, + .language = .{ + .primary_language_id = 0, + .sublanguage_id = 0, + }, + .memory_flags = .{ .value = 0 }, + .data_size = 0, + .version = 0, + .characteristics = 0, + }; + try header.writeAssertNoOverflow(writer); + } + + pub fn sourceBytesForToken(self: *Compiler, token: Token) SourceBytes { + return .{ + .slice = token.slice(self.source), + .code_page = self.input_code_pages.getForToken(token), + }; + } + + /// Helper that calls parseQuotedStringAsWideString with the relevant context + /// Resulting slice is allocated by `self.allocator`. + pub fn parseQuotedStringAsWideString(self: *Compiler, token: Token) ![:0]u16 { + return literals.parseQuotedStringAsWideString( + self.allocator, + self.sourceBytesForToken(token), + .{ + .start_column = token.calculateColumn(self.source, 8, null), + .diagnostics = .{ .diagnostics = self.diagnostics, .token = token }, + }, + ); + } + + /// Helper that calls parseQuotedStringAsAsciiString with the relevant context + /// Resulting slice is allocated by `self.allocator`. + pub fn parseQuotedStringAsAsciiString(self: *Compiler, token: Token) ![]u8 { + return literals.parseQuotedStringAsAsciiString( + self.allocator, + self.sourceBytesForToken(token), + .{ + .start_column = token.calculateColumn(self.source, 8, null), + .diagnostics = .{ .diagnostics = self.diagnostics, .token = token }, + }, + ); + } + + fn addErrorDetails(self: *Compiler, details: ErrorDetails) Allocator.Error!void { + try self.diagnostics.append(details); + } + + fn addErrorDetailsAndFail(self: *Compiler, details: ErrorDetails) error{ CompileError, OutOfMemory } { + try self.addErrorDetails(details); + return error.CompileError; + } +}; + +pub const OpenSearchPathError = std.fs.Dir.OpenError; + +fn openSearchPathDir(dir: std.fs.Dir, path: []const u8) OpenSearchPathError!std.fs.Dir { + // Validate the search path to avoid possible unreachable on invalid paths, + // see https://github.com/ziglang/zig/issues/15607 for why this is currently necessary. + try validateSearchPath(path); + return dir.openDir(path, .{}); +} + +/// Very crude attempt at validating a path. This is imperfect +/// and AFAIK it is effectively impossible to implement perfect path +/// validation, since it ultimately depends on the underlying filesystem. +/// Note that this function won't be necessary if/when +/// https://github.com/ziglang/zig/issues/15607 +/// is accepted/implemented. +fn validateSearchPath(path: []const u8) error{BadPathName}!void { + switch (builtin.os.tag) { + .windows => { + // This will return error.BadPathName on non-Win32 namespaced paths + // (e.g. the NT \??\ prefix, the device \\.\ prefix, etc). + // Those path types are something of an unavoidable way to + // still hit unreachable during the openDir call. + var component_iterator = try std.fs.path.componentIterator(path); + while (component_iterator.next()) |component| { + // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file + if (std.mem.indexOfAny(u8, component.name, "\x00<>:\"|?*") != null) return error.BadPathName; + } + }, + else => { + if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName; + }, + } +} + +pub const SearchDir = struct { + dir: std.fs.Dir, + path: ?[]const u8, + + pub fn deinit(self: *SearchDir, allocator: Allocator) void { + self.dir.close(); + if (self.path) |path| { + allocator.free(path); + } + } +}; + +/// Slurps the first `size` bytes read into `slurped_header` +pub fn HeaderSlurpingReader(comptime size: usize, comptime ReaderType: anytype) type { + return struct { + child_reader: ReaderType, + bytes_read: u64 = 0, + slurped_header: [size]u8 = [_]u8{0x00} ** size, + + pub const Error = ReaderType.Error; + pub const Reader = std.io.Reader(*@This(), Error, read); + + pub fn read(self: *@This(), buf: []u8) Error!usize { + const amt = try self.child_reader.read(buf); + if (self.bytes_read < size) { + const bytes_to_add = @min(amt, size - self.bytes_read); + const end_index = self.bytes_read + bytes_to_add; + std.mem.copy(u8, self.slurped_header[self.bytes_read..end_index], buf[0..bytes_to_add]); + } + self.bytes_read += amt; + return amt; + } + + pub fn reader(self: *@This()) Reader { + return .{ .context = self }; + } + }; +} + +pub fn headerSlurpingReader(comptime size: usize, reader: anytype) HeaderSlurpingReader(size, @TypeOf(reader)) { + return .{ .child_reader = reader }; +} + +/// Sort of like std.io.LimitedReader, but a Writer. +/// Returns an error if writing the requested number of bytes +/// would ever exceed bytes_left, i.e. it does not always +/// write up to the limit and instead will error if the +/// limit would be breached if the entire slice was written. +pub fn LimitedWriter(comptime WriterType: type) type { + return struct { + inner_writer: WriterType, + bytes_left: u64, + + pub const Error = error{NoSpaceLeft} || WriterType.Error; + pub const Writer = std.io.Writer(*Self, Error, write); + + const Self = @This(); + + pub fn write(self: *Self, bytes: []const u8) Error!usize { + if (bytes.len > self.bytes_left) return error.NoSpaceLeft; + const amt = try self.inner_writer.write(bytes); + self.bytes_left -= amt; + return amt; + } + + pub fn writer(self: *Self) Writer { + return .{ .context = self }; + } + }; +} + +/// Returns an initialised `LimitedWriter` +/// `bytes_left` is a `u64` to be able to take 64 bit file offsets +pub fn limitedWriter(inner_writer: anytype, bytes_left: u64) LimitedWriter(@TypeOf(inner_writer)) { + return .{ .inner_writer = inner_writer, .bytes_left = bytes_left }; +} + +test "limitedWriter basic usage" { + var buf: [4]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buf); + var limited_stream = limitedWriter(fbs.writer(), 4); + var writer = limited_stream.writer(); + + try std.testing.expectEqual(@as(usize, 3), try writer.write("123")); + try std.testing.expectEqualSlices(u8, "123", buf[0..3]); + try std.testing.expectError(error.NoSpaceLeft, writer.write("45")); + try std.testing.expectEqual(@as(usize, 1), try writer.write("4")); + try std.testing.expectEqualSlices(u8, "1234", buf[0..4]); + try std.testing.expectError(error.NoSpaceLeft, writer.write("5")); +} + +pub const FontDir = struct { + fonts: std.ArrayListUnmanaged(Font) = .{}, + /// To keep track of which ids are set and where they were set from + ids: std.AutoHashMapUnmanaged(u16, Token) = .{}, + + pub const Font = struct { + id: u16, + header_bytes: [148]u8, + }; + + pub fn deinit(self: *FontDir, allocator: Allocator) void { + self.fonts.deinit(allocator); + } + + pub fn add(self: *FontDir, allocator: Allocator, font: Font, id_token: Token) !void { + try self.ids.putNoClobber(allocator, font.id, id_token); + try self.fonts.append(allocator, font); + } + + pub fn writeResData(self: *FontDir, compiler: *Compiler, writer: anytype) !void { + if (self.fonts.items.len == 0) return; + + // We know the number of fonts is limited to maxInt(u16) because fonts + // must have a valid and unique u16 ordinal ID (trying to specify a FONT + // with e.g. id 65537 will wrap around to 1 and be ignored if there's already + // a font with that ID in the file). + const num_fonts: u16 = @intCast(self.fonts.items.len); + + // u16 count + [(u16 id + 150 bytes) for each font] + // Note: This works out to a maximum data_size of 9,961,322. + const data_size: u32 = 2 + (2 + 150) * num_fonts; + + var header = Compiler.ResourceHeader{ + .name_value = try NameOrOrdinal.nameFromString(compiler.allocator, .{ .slice = "FONTDIR", .code_page = .windows1252 }), + .type_value = NameOrOrdinal{ .ordinal = @intFromEnum(res.RT.FONTDIR) }, + .memory_flags = res.MemoryFlags.defaults(res.RT.FONTDIR), + .language = compiler.state.language, + .version = compiler.state.version, + .characteristics = compiler.state.characteristics, + .data_size = data_size, + }; + defer header.deinit(compiler.allocator); + + try header.writeAssertNoOverflow(writer); + try writer.writeIntLittle(u16, num_fonts); + for (self.fonts.items) |font| { + // The format of the FONTDIR is a strange beast. + // Technically, each FONT is seemingly meant to be written as a + // FONTDIRENTRY with two trailing NUL-terminated strings corresponding to + // the 'device name' and 'face name' of the .FNT file, but: + // + // 1. When dealing with .FNT files, the Win32 implementation + // gets the device name and face name from the wrong locations, + // so it's basically never going to write the real device/face name + // strings. + // 2. When dealing with files 76-140 bytes long, the Win32 implementation + // can just crash (if there are no NUL bytes in the file). + // 3. The 32-bit Win32 rc.exe uses a 148 byte size for the portion of + // the FONTDIRENTRY before the NUL-terminated strings, which + // does not match the documented FONTDIRENTRY size that (presumably) + // this format is meant to be using, so anything iterating the + // FONTDIR according to the available documentation will get bogus results. + // 4. The FONT resource can be used for non-.FNT types like TTF and OTF, + // in which case emulating the Win32 behavior of unconditionally + // interpreting the bytes as a .FNT and trying to grab device/face names + // from random bytes in the TTF/OTF file can lead to weird behavior + // and errors in the Win32 implementation (for example, the device/face + // name fields are offsets into the file where the NUL-terminated + // string is located, but the Win32 implementation actually treats + // them as signed so if they are negative then the Win32 implementation + // will error; this happening for TTF fonts would just be a bug + // since the TTF could otherwise be valid) + // 5. The FONTDIR resource doesn't actually seem to be used at all by + // anything that I've found, and instead in Windows 3.0 and newer + // it seems like the FONT resources are always just iterated/accessed + // directly without ever looking at the FONTDIR. + // + // All of these combined means that we: + // - Do not need or want to emulate Win32 behavior here + // - For maximum simplicity and compatibility, we just write the first + // 148 bytes of the file without any interpretation (padded with + // zeroes to get up to 148 bytes if necessary), and then + // unconditionally write two NUL bytes, meaning that we always + // write 'device name' and 'face name' as if they were 0-length + // strings. + // + // This gives us byte-for-byte .RES compatibility in the common case while + // allowing us to avoid any erroneous errors caused by trying to read + // the face/device name from a bogus location. Note that the Win32 + // implementation never actually writes the real device/face name here + // anyway (except in the bizarre case that a .FNT file has the proper + // device/face name offsets within a reserved section of the .FNT file) + // so there's no feasible way that anything can actually think that the + // device name/face name in the FONTDIR is reliable. + + // First, the ID is written, though + try writer.writeIntLittle(u16, font.id); + try writer.writeAll(&font.header_bytes); + try writer.writeByteNTimes(0, 2); + } + try Compiler.writeDataPadding(writer, data_size); + } +}; + +pub const StringTablesByLanguage = struct { + /// String tables for each language are written to the .res file in order depending on + /// when the first STRINGTABLE for the language was defined, and all blocks for a given + /// language are written contiguously. + /// Using an ArrayHashMap here gives us this property for free. + tables: std.AutoArrayHashMapUnmanaged(res.Language, StringTable) = .{}, + + pub fn deinit(self: *StringTablesByLanguage, allocator: Allocator) void { + self.tables.deinit(allocator); + } + + pub fn set( + self: *StringTablesByLanguage, + allocator: Allocator, + language: res.Language, + id: u16, + string_token: Token, + node: *Node, + source: []const u8, + code_page_lookup: *const CodePageLookup, + version: u32, + characteristics: u32, + ) StringTable.SetError!void { + var get_or_put_result = try self.tables.getOrPut(allocator, language); + if (!get_or_put_result.found_existing) { + get_or_put_result.value_ptr.* = StringTable{}; + } + return get_or_put_result.value_ptr.set(allocator, id, string_token, node, source, code_page_lookup, version, characteristics); + } +}; + +pub const StringTable = struct { + /// Blocks are written to the .res file in order depending on when the first string + /// was added to the block (i.e. `STRINGTABLE { 16 "b" 0 "a" }` would then get written + /// with block ID 2 (the one with "b") first and block ID 1 (the one with "a") second). + /// Using an ArrayHashMap here gives us this property for free. + blocks: std.AutoArrayHashMapUnmanaged(u16, Block) = .{}, + + pub const Block = struct { + strings: std.ArrayListUnmanaged(Token) = .{}, + set_indexes: std.bit_set.IntegerBitSet(16) = .{ .mask = 0 }, + memory_flags: MemoryFlags = MemoryFlags.defaults(res.RT.STRING), + characteristics: u32, + version: u32, + + /// Returns the index to insert the string into the `strings` list. + /// Returns null if the string should be appended. + fn getInsertionIndex(self: *Block, index: u8) ?u8 { + std.debug.assert(!self.set_indexes.isSet(index)); + + const first_set = self.set_indexes.findFirstSet() orelse return null; + if (first_set > index) return 0; + + const last_set = 15 - @clz(self.set_indexes.mask); + if (index > last_set) return null; + + var bit = first_set + 1; + var insertion_index: u8 = 1; + while (bit != index) : (bit += 1) { + if (self.set_indexes.isSet(bit)) insertion_index += 1; + } + return insertion_index; + } + + fn getTokenIndex(self: *Block, string_index: u8) ?u8 { + const count = self.strings.items.len; + if (count == 0) return null; + if (count == 1) return 0; + + const first_set = self.set_indexes.findFirstSet() orelse unreachable; + if (first_set == string_index) return 0; + const last_set = 15 - @clz(self.set_indexes.mask); + if (last_set == string_index) return @intCast(count - 1); + + if (first_set == last_set) return null; + + var bit = first_set + 1; + var token_index: u8 = 1; + while (bit < last_set) : (bit += 1) { + if (!self.set_indexes.isSet(bit)) continue; + if (bit == string_index) return token_index; + token_index += 1; + } + return null; + } + + fn dump(self: *Block) void { + var bit_it = self.set_indexes.iterator(.{}); + var string_index: usize = 0; + while (bit_it.next()) |bit_index| { + const token = self.strings.items[string_index]; + std.debug.print("{}: [{}] {any}\n", .{ bit_index, string_index, token }); + string_index += 1; + } + } + + pub fn applyAttributes(self: *Block, string_table: *Node.StringTable, source: []const u8, code_page_lookup: *const CodePageLookup) void { + Compiler.applyToMemoryFlags(&self.memory_flags, string_table.common_resource_attributes, source); + var dummy_language: res.Language = undefined; + Compiler.applyToOptionalStatements(&dummy_language, &self.version, &self.characteristics, string_table.optional_statements, source, code_page_lookup); + } + + fn trimToDoubleNUL(comptime T: type, str: []const T) []const T { + var last_was_null = false; + for (str, 0..) |c, i| { + if (c == 0) { + if (last_was_null) return str[0 .. i - 1]; + last_was_null = true; + } else { + last_was_null = false; + } + } + return str; + } + + test "trimToDoubleNUL" { + try std.testing.expectEqualStrings("a\x00b", trimToDoubleNUL(u8, "a\x00b")); + try std.testing.expectEqualStrings("a", trimToDoubleNUL(u8, "a\x00\x00b")); + } + + pub fn writeResData(self: *Block, compiler: *Compiler, language: res.Language, block_id: u16, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(compiler.allocator); + defer data_buffer.deinit(); + const data_writer = data_buffer.writer(); + + var i: u8 = 0; + var string_i: u8 = 0; + while (true) : (i += 1) { + if (!self.set_indexes.isSet(i)) { + try data_writer.writeIntLittle(u16, 0); + if (i == 15) break else continue; + } + + const string_token = self.strings.items[string_i]; + const slice = string_token.slice(compiler.source); + const column = string_token.calculateColumn(compiler.source, 8, null); + const code_page = compiler.input_code_pages.getForToken(string_token); + const bytes = SourceBytes{ .slice = slice, .code_page = code_page }; + const utf16_string = try literals.parseQuotedStringAsWideString(compiler.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = compiler.diagnostics, .token = string_token }, + }); + defer compiler.allocator.free(utf16_string); + + const trimmed_string = trim: { + // Two NUL characters in a row act as a terminator + // Note: This is only the case for STRINGTABLE strings + var trimmed = trimToDoubleNUL(u16, utf16_string); + // We also want to trim any trailing NUL characters + break :trim std.mem.trimRight(u16, trimmed, &[_]u16{0}); + }; + + // String literals are limited to maxInt(u15) codepoints, so these UTF-16 encoded + // strings are limited to maxInt(u15) * 2 = 65,534 code units (since 2 is the + // maximum number of UTF-16 code units per codepoint). + // This leaves room for exactly one NUL terminator. + var string_len_in_utf16_code_units: u16 = @intCast(trimmed_string.len); + // If the option is set, then a NUL terminator is added unconditionally. + // We already trimmed any trailing NULs, so we know it will be a new addition to the string. + if (compiler.null_terminate_string_table_strings) string_len_in_utf16_code_units += 1; + try data_writer.writeIntLittle(u16, string_len_in_utf16_code_units); + for (trimmed_string) |wc| { + try data_writer.writeIntLittle(u16, wc); + } + if (compiler.null_terminate_string_table_strings) { + try data_writer.writeIntLittle(u16, 0); + } + + if (i == 15) break; + string_i += 1; + } + + // This intCast will never be able to fail due to the length constraints on string literals. + // + // - STRINGTABLE resource definitions can can only provide one string literal per index. + // - STRINGTABLE strings are limited to maxInt(u16) UTF-16 code units (see 'string_len_in_utf16_code_units' + // above), which means that the maximum number of bytes per string literal is + // 2 * maxInt(u16) = 131,070 (since there are 2 bytes per UTF-16 code unit). + // - Each Block/RT_STRING resource includes exactly 16 strings and each have a 2 byte + // length field, so the maximum number of total bytes in a RT_STRING resource's data is + // 16 * (131,070 + 2) = 2,097,152 which is well within the u32 max. + // + // Note: The string literal maximum length is enforced by the lexer. + const data_size: u32 = @intCast(data_buffer.items.len); + + const header = Compiler.ResourceHeader{ + .name_value = .{ .ordinal = block_id }, + .type_value = .{ .ordinal = @intFromEnum(res.RT.STRING) }, + .memory_flags = self.memory_flags, + .language = language, + .version = self.version, + .characteristics = self.characteristics, + .data_size = data_size, + }; + // The only variable parts of the header are name and type, which in this case + // we fully control and know are numbers, so they have a fixed size. + try header.writeAssertNoOverflow(writer); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try Compiler.writeResourceData(writer, data_fbs.reader(), data_size); + } + }; + + pub fn deinit(self: *StringTable, allocator: Allocator) void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + entry.value_ptr.strings.deinit(allocator); + } + self.blocks.deinit(allocator); + } + + const SetError = error{StringAlreadyDefined} || Allocator.Error; + + pub fn set( + self: *StringTable, + allocator: Allocator, + id: u16, + string_token: Token, + node: *Node, + source: []const u8, + code_page_lookup: *const CodePageLookup, + version: u32, + characteristics: u32, + ) SetError!void { + const block_id = (id / 16) + 1; + const string_index: u8 = @intCast(id & 0xF); + + var get_or_put_result = try self.blocks.getOrPut(allocator, block_id); + if (!get_or_put_result.found_existing) { + get_or_put_result.value_ptr.* = Block{ .version = version, .characteristics = characteristics }; + get_or_put_result.value_ptr.applyAttributes(node.cast(.string_table).?, source, code_page_lookup); + } else { + if (get_or_put_result.value_ptr.set_indexes.isSet(string_index)) { + return error.StringAlreadyDefined; + } + } + + var block = get_or_put_result.value_ptr; + if (block.getInsertionIndex(string_index)) |insertion_index| { + try block.strings.insert(allocator, insertion_index, string_token); + } else { + try block.strings.append(allocator, string_token); + } + block.set_indexes.set(string_index); + } + + pub fn get(self: *StringTable, id: u16) ?Token { + const block_id = (id / 16) + 1; + const string_index: u8 = @intCast(id & 0xF); + + const block = self.blocks.getPtr(block_id) orelse return null; + const token_index = block.getTokenIndex(string_index) orelse return null; + return block.strings.items[token_index]; + } + + pub fn dump(self: *StringTable) !void { + var it = self.iterator(); + while (it.next()) |entry| { + std.debug.print("block: {}\n", .{entry.key_ptr.*}); + entry.value_ptr.dump(); + } + } +}; + +test "StringTable" { + const S = struct { + fn makeDummyToken(id: usize) Token { + return Token{ + .id = .invalid, + .start = id, + .end = id, + .line_number = id, + }; + } + }; + const allocator = std.testing.allocator; + var string_table = StringTable{}; + defer string_table.deinit(allocator); + + var code_page_lookup = CodePageLookup.init(allocator, .windows1252); + defer code_page_lookup.deinit(); + + var dummy_node = Node.StringTable{ + .type = S.makeDummyToken(0), + .common_resource_attributes = &.{}, + .optional_statements = &.{}, + .begin_token = S.makeDummyToken(0), + .strings = &.{}, + .end_token = S.makeDummyToken(0), + }; + + // randomize an array of ids 0-99 + var ids = ids: { + var buf: [100]u16 = undefined; + var i: u16 = 0; + while (i < buf.len) : (i += 1) { + buf[i] = i; + } + break :ids buf; + }; + var prng = std.rand.DefaultPrng.init(0); + var random = prng.random(); + random.shuffle(u16, &ids); + + // set each one in the randomized order + for (ids) |id| { + try string_table.set(allocator, id, S.makeDummyToken(id), &dummy_node.base, "", &code_page_lookup, 0, 0); + } + + // make sure each one exists and is the right value when gotten + var id: u16 = 0; + while (id < 100) : (id += 1) { + const dummy = S.makeDummyToken(id); + try std.testing.expectError(error.StringAlreadyDefined, string_table.set(allocator, id, dummy, &dummy_node.base, "", &code_page_lookup, 0, 0)); + try std.testing.expectEqual(dummy, string_table.get(id).?); + } + + // make sure non-existent string ids are not found + try std.testing.expectEqual(@as(?Token, null), string_table.get(100)); +} diff --git a/src/resinator/errors.zig b/src/resinator/errors.zig new file mode 100644 index 000000000000..33cb19682b32 --- /dev/null +++ b/src/resinator/errors.zig @@ -0,0 +1,1033 @@ +const std = @import("std"); +const Token = @import("lex.zig").Token; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const utils = @import("utils.zig"); +const rc = @import("rc.zig"); +const res = @import("res.zig"); +const ico = @import("ico.zig"); +const bmp = @import("bmp.zig"); +const parse = @import("parse.zig"); +const CodePage = @import("code_pages.zig").CodePage; + +pub const Diagnostics = struct { + errors: std.ArrayListUnmanaged(ErrorDetails) = .{}, + /// Append-only, cannot handle removing strings. + /// Expects to own all strings within the list. + strings: std.ArrayListUnmanaged([]const u8) = .{}, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) Diagnostics { + return .{ + .allocator = allocator, + }; + } + + pub fn deinit(self: *Diagnostics) void { + self.errors.deinit(self.allocator); + for (self.strings.items) |str| { + self.allocator.free(str); + } + self.strings.deinit(self.allocator); + } + + pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void { + try self.errors.append(self.allocator, error_details); + } + + const SmallestStringIndexType = std.meta.Int(.unsigned, @min( + @bitSizeOf(ErrorDetails.FileOpenError.FilenameStringIndex), + @min( + @bitSizeOf(ErrorDetails.IconReadError.FilenameStringIndex), + @bitSizeOf(ErrorDetails.BitmapReadError.FilenameStringIndex), + ), + )); + + /// Returns the index of the added string as the SmallestStringIndexType + /// in order to avoid needing to `@intCast` it at callsites of putString. + /// Instead, this function will error if the index would ever exceed the + /// smallest FilenameStringIndex of an ErrorDetails type. + pub fn putString(self: *Diagnostics, str: []const u8) !SmallestStringIndexType { + if (self.strings.items.len >= std.math.maxInt(SmallestStringIndexType)) { + return error.OutOfMemory; // ran out of string indexes + } + const dupe = try self.allocator.dupe(u8, str); + const index = self.strings.items.len; + try self.strings.append(self.allocator, dupe); + return @intCast(index); + } + + pub fn renderToStdErr(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, tty_config: std.io.tty.Config, source_mappings: ?SourceMappings) void { + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + const stderr = std.io.getStdErr().writer(); + for (self.errors.items) |err_details| { + renderErrorMessage(self.allocator, stderr, tty_config, cwd, err_details, source, self.strings.items, source_mappings) catch return; + } + } + + pub fn renderToStdErrDetectTTY(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, source_mappings: ?SourceMappings) void { + const tty_config = std.io.tty.detectConfig(std.io.getStdErr()); + return self.renderToStdErr(cwd, source, tty_config, source_mappings); + } + + pub fn contains(self: *const Diagnostics, err: ErrorDetails.Error) bool { + for (self.errors.items) |details| { + if (details.err == err) return true; + } + return false; + } + + pub fn containsAny(self: *const Diagnostics, errors: []const ErrorDetails.Error) bool { + for (self.errors.items) |details| { + for (errors) |err| { + if (details.err == err) return true; + } + } + return false; + } +}; + +/// Contains enough context to append errors/warnings/notes etc +pub const DiagnosticsContext = struct { + diagnostics: *Diagnostics, + token: Token, +}; + +pub const ErrorDetails = struct { + err: Error, + token: Token, + /// If non-null, should be before `token`. If null, `token` is assumed to be the start. + token_span_start: ?Token = null, + /// If non-null, should be after `token`. If null, `token` is assumed to be the end. + token_span_end: ?Token = null, + type: Type = .err, + print_source_line: bool = true, + extra: union { + none: void, + expected: Token.Id, + number: u32, + expected_types: ExpectedTypes, + resource: rc.Resource, + string_and_language: StringAndLanguage, + file_open_error: FileOpenError, + icon_read_error: IconReadError, + icon_dir: IconDirContext, + bmp_read_error: BitmapReadError, + accelerator_error: AcceleratorError, + statement_with_u16_param: StatementWithU16Param, + menu_or_class: enum { class, menu }, + } = .{ .none = {} }, + + pub const Type = enum { + /// Fatal error, stops compilation + err, + /// Warning that does not affect compilation result + warning, + /// A note that typically provides further context for a warning/error + note, + /// An invisible diagnostic that is not printed to stderr but can + /// provide information useful when comparing the behavior of different + /// implementations. For example, a hint is emitted when a FONTDIR resource + /// was included in the .RES file which is significant because rc.exe + /// does something different than us, but ultimately it's not important + /// enough to be a warning/note. + hint, + }; + + comptime { + // all fields in the extra union should be 32 bits or less + for (std.meta.fields(std.meta.fieldInfo(ErrorDetails, .extra).type)) |field| { + std.debug.assert(@bitSizeOf(field.type) <= 32); + } + } + + pub const StatementWithU16Param = enum(u32) { + fileversion, + productversion, + language, + }; + + pub const StringAndLanguage = packed struct(u32) { + id: u16, + language: res.Language, + }; + + pub const FileOpenError = packed struct(u32) { + err: FileOpenErrorEnum, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(FileOpenErrorEnum)); + pub const FileOpenErrorEnum = std.meta.FieldEnum(std.fs.File.OpenError); + + pub fn enumFromError(err: std.fs.File.OpenError) FileOpenErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.FileOpenError.FileOpenErrorEnum, @errorName(e)), + }; + } + }; + + pub const IconReadError = packed struct(u32) { + err: IconReadErrorEnum, + icon_type: enum(u1) { cursor, icon }, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(IconReadErrorEnum) - 1); + pub const IconReadErrorEnum = std.meta.FieldEnum(ico.ReadError); + + pub fn enumFromError(err: ico.ReadError) IconReadErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.IconReadError.IconReadErrorEnum, @errorName(e)), + }; + } + }; + + pub const IconDirContext = packed struct(u32) { + icon_type: enum(u1) { cursor, icon }, + icon_format: ico.ImageFormat, + index: u16, + bitmap_version: ico.BitmapHeader.Version = .unknown, + _: Padding = 0, + + pub const Padding = std.meta.Int(.unsigned, 15 - @bitSizeOf(ico.BitmapHeader.Version) - @bitSizeOf(ico.ImageFormat)); + }; + + pub const BitmapReadError = packed struct(u32) { + err: BitmapReadErrorEnum, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(BitmapReadErrorEnum)); + pub const BitmapReadErrorEnum = std.meta.FieldEnum(bmp.ReadError); + + pub fn enumFromError(err: bmp.ReadError) BitmapReadErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.BitmapReadError.BitmapReadErrorEnum, @errorName(e)), + }; + } + }; + + pub const BitmapUnsupportedDIB = packed struct(u32) { + dib_version: ico.BitmapHeader.Version, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(ico.BitmapHeader.Version)); + }; + + pub const AcceleratorError = packed struct(u32) { + err: AcceleratorErrorEnum, + _: Padding = 0, + + pub const Padding = std.meta.Int(.unsigned, 32 - @bitSizeOf(AcceleratorErrorEnum)); + pub const AcceleratorErrorEnum = std.meta.FieldEnum(res.ParseAcceleratorKeyStringError); + + pub fn enumFromError(err: res.ParseAcceleratorKeyStringError) AcceleratorErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.AcceleratorError.AcceleratorErrorEnum, @errorName(e)), + }; + } + }; + + pub const ExpectedTypes = packed struct(u32) { + number: bool = false, + number_expression: bool = false, + string_literal: bool = false, + accelerator_type_or_option: bool = false, + control_class: bool = false, + literal: bool = false, + // Note: This being 0 instead of undefined is arbitrary and something of a workaround, + // see https://github.com/ziglang/zig/issues/15395 + _: u26 = 0, + + pub const strings = std.ComptimeStringMap([]const u8, .{ + .{ "number", "number" }, + .{ "number_expression", "number expression" }, + .{ "string_literal", "quoted string literal" }, + .{ "accelerator_type_or_option", "accelerator type or option [ASCII, VIRTKEY, etc]" }, + .{ "control_class", "control class [BUTTON, EDIT, etc]" }, + .{ "literal", "unquoted literal" }, + }); + + pub fn writeCommaSeparated(self: ExpectedTypes, writer: anytype) !void { + const struct_info = @typeInfo(ExpectedTypes).Struct; + const num_real_fields = struct_info.fields.len - 1; + const num_padding_bits = @bitSizeOf(ExpectedTypes) - num_real_fields; + const mask = std.math.maxInt(struct_info.backing_integer.?) >> num_padding_bits; + const relevant_bits_only = @as(struct_info.backing_integer.?, @bitCast(self)) & mask; + const num_set_bits = @popCount(relevant_bits_only); + + var i: usize = 0; + inline for (struct_info.fields) |field_info| { + if (field_info.type != bool) continue; + if (i == num_set_bits) return; + if (@field(self, field_info.name)) { + try writer.writeAll(strings.get(field_info.name).?); + i += 1; + if (num_set_bits > 2 and i != num_set_bits) { + try writer.writeAll(", "); + } else if (i != num_set_bits) { + try writer.writeByte(' '); + } + if (num_set_bits > 1 and i == num_set_bits - 1) { + try writer.writeAll("or "); + } + } + } + } + }; + + pub const Error = enum { + // Lexer + unfinished_string_literal, + string_literal_too_long, + invalid_number_with_exponent, + invalid_digit_character_in_number_literal, + illegal_byte, + illegal_byte_outside_string_literals, + illegal_codepoint_outside_string_literals, + illegal_byte_order_mark, + illegal_private_use_character, + found_c_style_escaped_quote, + code_page_pragma_missing_left_paren, + code_page_pragma_missing_right_paren, + code_page_pragma_invalid_code_page, + code_page_pragma_not_integer, + code_page_pragma_overflow, + code_page_pragma_unsupported_code_page, + + // Parser + unfinished_raw_data_block, + unfinished_string_table_block, + /// `expected` is populated. + expected_token, + /// `expected_types` is populated + expected_something_else, + /// `resource` is populated + resource_type_cant_use_raw_data, + /// `resource` is populated + id_must_be_ordinal, + /// `resource` is populated + name_or_id_not_allowed, + string_resource_as_numeric_type, + ascii_character_not_equivalent_to_virtual_key_code, + empty_menu_not_allowed, + rc_would_miscompile_version_value_padding, + rc_would_miscompile_version_value_byte_count, + code_page_pragma_in_included_file, + nested_resource_level_exceeds_max, + too_many_dialog_controls, + nested_expression_level_exceeds_max, + close_paren_expression, + unary_plus_expression, + rc_could_miscompile_control_params, + + // Compiler + /// `string_and_language` is populated + string_already_defined, + font_id_already_defined, + /// `file_open_error` is populated + file_open_error, + /// `accelerator_error` is populated + invalid_accelerator_key, + accelerator_type_required, + rc_would_miscompile_control_padding, + rc_would_miscompile_control_class_ordinal, + /// `icon_dir` is populated + rc_would_error_on_icon_dir, + /// `icon_dir` is populated + format_not_supported_in_icon_dir, + /// `resource` is populated and contains the expected type + icon_dir_and_resource_type_mismatch, + /// `icon_read_error` is populated + icon_read_error, + /// `icon_dir` is populated + rc_would_error_on_bitmap_version, + /// `icon_dir` is populated + max_icon_ids_exhausted, + /// `bmp_read_error` is populated + bmp_read_error, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of ignored bytes. + bmp_ignored_palette_bytes, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of missing bytes. + bmp_missing_palette_bytes, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of miscompiled bytes. + rc_would_miscompile_bmp_palette_padding, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of two `u64`s (native endian). The first contains the number of missing + /// palette bytes and the second contains the max number of missing palette bytes. + /// If type is `.note`, then `extra` is `none`. + bmp_too_many_missing_palette_bytes, + resource_header_size_exceeds_max, + resource_data_size_exceeds_max, + control_extra_data_size_exceeds_max, + version_node_size_exceeds_max, + fontdir_size_exceeds_max, + /// `number` is populated and contains a string index for the filename + number_expression_as_filename, + /// `number` is populated and contains the control ID that is a duplicate + control_id_already_defined, + /// `number` is populated and contains the disallowed codepoint + invalid_filename, + /// `statement_with_u16_param` is populated + rc_would_error_u16_with_l_suffix, + result_contains_fontdir, + /// `number` is populated and contains the ordinal value that the id would be miscompiled to + rc_would_miscompile_dialog_menu_id, + /// `number` is populated and contains the ordinal value that the value would be miscompiled to + rc_would_miscompile_dialog_class, + /// `menu_or_class` is populated and contains the type of the parameter statement + rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + rc_would_miscompile_dialog_menu_id_starts_with_digit, + dialog_menu_id_was_uppercased, + /// `menu_or_class` is populated and contains the type of the parameter statement + duplicate_menu_or_class_skipped, + invalid_digit_character_in_ordinal, + + // Literals + /// `number` is populated + rc_would_miscompile_codepoint_byte_swap, + /// `number` is populated + rc_would_miscompile_codepoint_skip, + tab_converted_to_spaces, + + // General (used in various places) + /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation + win32_non_ascii_ordinal, + }; + + pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void { + switch (self.err) { + .unfinished_string_literal => { + return writer.print("unfinished string literal at '{s}', expected closing '\"'", .{self.token.nameForErrorDisplay(source)}); + }, + .string_literal_too_long => { + return writer.print("string literal too long (max is currently {} characters)", .{self.extra.number}); + }, + .invalid_number_with_exponent => { + return writer.print("base 10 number literal with exponent is not allowed: {s}", .{self.token.slice(source)}); + }, + .invalid_digit_character_in_number_literal => switch (self.type) { + .err, .warning => return writer.writeAll("non-ASCII digit characters are not allowed in number literals"), + .note => return writer.writeAll("the Win32 RC compiler allows non-ASCII digit characters, but will miscompile them"), + .hint => return, + }, + .illegal_byte => { + return writer.print("character '{s}' is not allowed", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))}); + }, + .illegal_byte_outside_string_literals => { + return writer.print("character '{s}' is not allowed outside of string literals", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))}); + }, + .illegal_codepoint_outside_string_literals => { + // This is somewhat hacky, but we know that: + // - This error is only possible with codepoints outside of the Windows-1252 character range + // - So, the only supported code page that could generate this error is UTF-8 + // Therefore, we just assume the token bytes are UTF-8 and decode them to get the illegal + // codepoint. + // + // FIXME: Support other code pages if they become relevant + const bytes = self.token.slice(source); + const codepoint = std.unicode.utf8Decode(bytes) catch unreachable; + return writer.print("codepoint 4}> is not allowed outside of string literals", .{codepoint}); + }, + .illegal_byte_order_mark => { + return writer.writeAll("byte order mark is not allowed"); + }, + .illegal_private_use_character => { + return writer.writeAll("private use character is not allowed"); + }, + .found_c_style_escaped_quote => { + return writer.writeAll("escaping quotes with \\\" is not allowed (use \"\" instead)"); + }, + .code_page_pragma_missing_left_paren => { + return writer.writeAll("expected left parenthesis after 'code_page' in #pragma code_page"); + }, + .code_page_pragma_missing_right_paren => { + return writer.writeAll("expected right parenthesis after '' in #pragma code_page"); + }, + .code_page_pragma_invalid_code_page => { + return writer.writeAll("invalid or unknown code page in #pragma code_page"); + }, + .code_page_pragma_not_integer => { + return writer.writeAll("code page is not a valid integer in #pragma code_page"); + }, + .code_page_pragma_overflow => { + return writer.writeAll("code page too large in #pragma code_page"); + }, + .code_page_pragma_unsupported_code_page => { + // We know that the token slice is a well-formed #pragma code_page(N), so + // we can skip to the first ( and then get the number that follows + const token_slice = self.token.slice(source); + var number_start = std.mem.indexOfScalar(u8, token_slice, '(').? + 1; + while (std.ascii.isWhitespace(token_slice[number_start])) { + number_start += 1; + } + var number_slice = token_slice[number_start..number_start]; + while (std.ascii.isDigit(token_slice[number_start + number_slice.len])) { + number_slice.len += 1; + } + const number = std.fmt.parseUnsigned(u16, number_slice, 10) catch unreachable; + const code_page = CodePage.getByIdentifier(number) catch unreachable; + // TODO: Improve or maybe add a note making it more clear that the code page + // is valid and that the code page is unsupported purely due to a limitation + // in this compiler. + return writer.print("unsupported code page '{s} (id={})' in #pragma code_page", .{ @tagName(code_page), number }); + }, + .unfinished_raw_data_block => { + return writer.print("unfinished raw data block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)}); + }, + .unfinished_string_table_block => { + return writer.print("unfinished STRINGTABLE block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)}); + }, + .expected_token => { + return writer.print("expected '{s}', got '{s}'", .{ self.extra.expected.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) }); + }, + .expected_something_else => { + try writer.writeAll("expected "); + try self.extra.expected_types.writeCommaSeparated(writer); + return writer.print("; got '{s}'", .{self.token.nameForErrorDisplay(source)}); + }, + .resource_type_cant_use_raw_data => switch (self.type) { + .err, .warning => try writer.print("expected '', found '{s}' (resource type '{s}' can't use raw data)", .{ self.token.nameForErrorDisplay(source), self.extra.resource.nameForErrorDisplay() }), + .note => try writer.print("if '{s}' is intended to be a filename, it must be specified as a quoted string literal", .{self.token.nameForErrorDisplay(source)}), + .hint => return, + }, + .id_must_be_ordinal => { + try writer.print("id of resource type '{s}' must be an ordinal (u16), got '{s}'", .{ self.extra.resource.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) }); + }, + .name_or_id_not_allowed => { + try writer.print("name or id is not allowed for resource type '{s}'", .{self.extra.resource.nameForErrorDisplay()}); + }, + .string_resource_as_numeric_type => switch (self.type) { + .err, .warning => try writer.writeAll("the number 6 (RT_STRING) cannot be used as a resource type"), + .note => try writer.writeAll("using RT_STRING directly likely results in an invalid .res file, use a STRINGTABLE instead"), + .hint => return, + }, + .ascii_character_not_equivalent_to_virtual_key_code => { + // TODO: Better wording? This is what the Win32 RC compiler emits. + // This occurs when VIRTKEY and a control code is specified ("^c", etc) + try writer.writeAll("ASCII character not equivalent to virtual key code"); + }, + .empty_menu_not_allowed => { + try writer.print("empty menu of type '{s}' not allowed", .{self.token.nameForErrorDisplay(source)}); + }, + .rc_would_miscompile_version_value_padding => switch (self.type) { + .err, .warning => return writer.print("the padding before this quoted string value would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider adding a comma between the key and the quoted string", .{}), + .hint => return, + }, + .rc_would_miscompile_version_value_byte_count => switch (self.type) { + .err, .warning => return writer.print("the byte count of this value would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, do not mix numbers and strings within a value", .{}), + .hint => return, + }, + .code_page_pragma_in_included_file => { + try writer.print("#pragma code_page is not supported in an included resource file", .{}); + }, + .nested_resource_level_exceeds_max => switch (self.type) { + .err, .warning => { + const max = switch (self.extra.resource) { + .versioninfo => parse.max_nested_version_level, + .menu, .menuex => parse.max_nested_menu_level, + else => unreachable, + }; + return writer.print("{s} contains too many nested children (max is {})", .{ self.extra.resource.nameForErrorDisplay(), max }); + }, + .note => return writer.print("max {s} nesting level exceeded here", .{self.extra.resource.nameForErrorDisplay()}), + .hint => return, + }, + .too_many_dialog_controls => switch (self.type) { + .err, .warning => return writer.print("{s} contains too many controls (max is {})", .{ self.extra.resource.nameForErrorDisplay(), std.math.maxInt(u16) }), + .note => return writer.writeAll("maximum number of controls exceeded here"), + .hint => return, + }, + .nested_expression_level_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("expression contains too many syntax levels (max is {})", .{parse.max_nested_expression_level}), + .note => return writer.print("maximum expression level exceeded here", .{}), + .hint => return, + }, + .close_paren_expression => { + try writer.writeAll("the Win32 RC compiler would accept ')' as a valid expression, but it would be skipped over and potentially lead to unexpected outcomes"); + }, + .unary_plus_expression => { + try writer.writeAll("the Win32 RC compiler may accept '+' as a unary operator here, but it is not supported in this implementation; consider omitting the unary +"); + }, + .rc_could_miscompile_control_params => switch (self.type) { + .err, .warning => return writer.print("this token could be erroneously skipped over by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider adding a comma after the style parameter", .{}), + .hint => return, + }, + .string_already_defined => switch (self.type) { + // TODO: better printing of language, using constant names from WinNT.h + .err, .warning => return writer.print("string with id {d} (0x{X}) already defined for language {d},{d}", .{ self.extra.string_and_language.id, self.extra.string_and_language.id, self.extra.string_and_language.language.primary_language_id, self.extra.string_and_language.language.sublanguage_id }), + .note => return writer.print("previous definition of string with id {d} (0x{X}) here", .{ self.extra.string_and_language.id, self.extra.string_and_language.id }), + .hint => return, + }, + .font_id_already_defined => switch (self.type) { + .err => return writer.print("font with id {d} already defined", .{self.extra.number}), + .warning => return writer.print("skipped duplicate font with id {d}", .{self.extra.number}), + .note => return writer.print("previous definition of font with id {d} here", .{self.extra.number}), + .hint => return, + }, + .file_open_error => { + try writer.print("unable to open file '{s}': {s}", .{ strings[self.extra.file_open_error.filename_string_index], @tagName(self.extra.file_open_error.err) }); + }, + .invalid_accelerator_key => { + try writer.print("invalid accelerator key '{s}': {s}", .{ self.token.nameForErrorDisplay(source), @tagName(self.extra.accelerator_error.err) }); + }, + .accelerator_type_required => { + try writer.print("accelerator type [ASCII or VIRTKEY] required when key is an integer", .{}); + }, + .rc_would_miscompile_control_padding => switch (self.type) { + .err, .warning => return writer.print("the padding before this control would be miscompiled by the Win32 RC compiler (it would insert 2 extra bytes of padding)", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider removing any 'control data' blocks from the controls in this dialog", .{}), + .hint => return, + }, + .rc_would_miscompile_control_class_ordinal => switch (self.type) { + .err, .warning => return writer.print("the control class of this CONTROL would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider specifying the control class using a string (BUTTON, EDIT, etc) instead of a number", .{}), + .hint => return, + }, + .rc_would_error_on_icon_dir => switch (self.type) { + .err, .warning => return writer.print("the resource at index {} of this {s} has the format '{s}'; this would be an error in the Win32 RC compiler", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type), @tagName(self.extra.icon_dir.icon_format) }), + .note => { + // The only note supported is one specific to exactly this combination + if (!(self.extra.icon_dir.icon_type == .icon and self.extra.icon_dir.icon_format == .riff)) unreachable; + try writer.print("animated RIFF icons within resource groups may not be well supported, consider using an animated icon file (.ani) instead", .{}); + }, + .hint => return, + }, + .format_not_supported_in_icon_dir => { + try writer.print("resource with format '{s}' (at index {}) is not allowed in {s} resource groups", .{ @tagName(self.extra.icon_dir.icon_format), self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }); + }, + .icon_dir_and_resource_type_mismatch => { + const unexpected_type: rc.Resource = if (self.extra.resource == .icon) .cursor else .icon; + // TODO: Better wording + try writer.print("resource type '{s}' does not match type '{s}' specified in the file", .{ self.extra.resource.nameForErrorDisplay(), unexpected_type.nameForErrorDisplay() }); + }, + .icon_read_error => { + try writer.print("unable to read {s} file '{s}': {s}", .{ @tagName(self.extra.icon_read_error.icon_type), strings[self.extra.icon_read_error.filename_string_index], @tagName(self.extra.icon_read_error.err) }); + }, + .rc_would_error_on_bitmap_version => switch (self.type) { + .err => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this version is no longer allowed and should be upgraded to '{s}'", .{ + self.extra.icon_dir.index, + @tagName(self.extra.icon_dir.icon_type), + self.extra.icon_dir.bitmap_version.nameForErrorDisplay(), + ico.BitmapHeader.Version.@"nt3.1".nameForErrorDisplay(), + }), + .warning => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this would be an error in the Win32 RC compiler", .{ + self.extra.icon_dir.index, + @tagName(self.extra.icon_dir.icon_type), + self.extra.icon_dir.bitmap_version.nameForErrorDisplay(), + }), + .note => unreachable, + .hint => return, + }, + .max_icon_ids_exhausted => switch (self.type) { + .err, .warning => try writer.print("maximum global icon/cursor ids exhausted (max is {})", .{std.math.maxInt(u16) - 1}), + .note => try writer.print("maximum icon/cursor id exceeded at index {} of this {s}", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }), + .hint => return, + }, + .bmp_read_error => { + try writer.print("invalid bitmap file '{s}': {s}", .{ strings[self.extra.bmp_read_error.filename_string_index], @tagName(self.extra.bmp_read_error.err) }); + }, + .bmp_ignored_palette_bytes => { + const bytes = strings[self.extra.number]; + const ignored_bytes = std.mem.readIntNative(u64, bytes[0..8]); + try writer.print("bitmap has {d} extra bytes preceding the pixel data which will be ignored", .{ignored_bytes}); + }, + .bmp_missing_palette_bytes => { + const bytes = strings[self.extra.number]; + const missing_bytes = std.mem.readIntNative(u64, bytes[0..8]); + try writer.print("bitmap has {d} missing color palette bytes which will be padded with zeroes", .{missing_bytes}); + }, + .rc_would_miscompile_bmp_palette_padding => { + const bytes = strings[self.extra.number]; + const miscompiled_bytes = std.mem.readIntNative(u64, bytes[0..8]); + try writer.print("the missing color palette bytes would be miscompiled by the Win32 RC compiler (the added padding bytes would include {d} bytes of the pixel data)", .{miscompiled_bytes}); + }, + .bmp_too_many_missing_palette_bytes => switch (self.type) { + .err, .warning => { + const bytes = strings[self.extra.number]; + const missing_bytes = std.mem.readIntNative(u64, bytes[0..8]); + const max_missing_bytes = std.mem.readIntNative(u64, bytes[8..16]); + try writer.print("bitmap has {} missing color palette bytes which exceeds the maximum of {}", .{ missing_bytes, max_missing_bytes }); + }, + // TODO: command line option + .note => try writer.writeAll("the maximum number of missing color palette bytes is configurable via <>"), + .hint => return, + }, + .resource_header_size_exceeds_max => { + try writer.print("resource's header length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}); + }, + .resource_data_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("resource's data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}), + .note => return writer.print("maximum data length exceeded here", .{}), + .hint => return, + }, + .control_extra_data_size_exceeds_max => switch (self.type) { + .err, .warning => try writer.print("control data length exceeds maximum of {} bytes", .{std.math.maxInt(u16)}), + .note => return writer.print("maximum control data length exceeded here", .{}), + .hint => return, + }, + .version_node_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("version node tree size exceeds maximum of {} bytes", .{std.math.maxInt(u16)}), + .note => return writer.print("maximum tree size exceeded while writing this child", .{}), + .hint => return, + }, + .fontdir_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("FONTDIR data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}), + .note => return writer.writeAll("this is likely due to the size of the combined lengths of the device/face names of all FONT resources"), + .hint => return, + }, + .number_expression_as_filename => switch (self.type) { + .err, .warning => return writer.writeAll("filename cannot be specified using a number expression, consider using a quoted string instead"), + .note => return writer.print("the Win32 RC compiler would evaluate this number expression as the filename '{s}'", .{strings[self.extra.number]}), + .hint => return, + }, + .control_id_already_defined => switch (self.type) { + .err, .warning => return writer.print("control with id {d} already defined for this dialog", .{self.extra.number}), + .note => return writer.print("previous definition of control with id {d} here", .{self.extra.number}), + .hint => return, + }, + .invalid_filename => { + const disallowed_codepoint = self.extra.number; + if (disallowed_codepoint < 128 and std.ascii.isPrint(@intCast(disallowed_codepoint))) { + try writer.print("evaluated filename contains a disallowed character: '{c}'", .{@as(u8, @intCast(disallowed_codepoint))}); + } else { + try writer.print("evaluated filename contains a disallowed codepoint: 4}>", .{disallowed_codepoint}); + } + }, + .rc_would_error_u16_with_l_suffix => switch (self.type) { + .err, .warning => return writer.print("this {s} parameter would be an error in the Win32 RC compiler", .{@tagName(self.extra.statement_with_u16_param)}), + .note => return writer.writeAll("to avoid the error, remove any L suffixes from numbers within the parameter"), + .hint => return, + }, + .result_contains_fontdir => return, + .rc_would_miscompile_dialog_menu_id => switch (self.type) { + .err, .warning => return writer.print("the id of this menu would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("the Win32 RC compiler would evaluate the id as the ordinal/number value {d}", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_dialog_class => switch (self.type) { + .err, .warning => return writer.print("this class would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("the Win32 RC compiler would evaluate it as the ordinal/number value {d}", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal => switch (self.type) { + .err, .warning => return, + .note => return writer.print("to avoid the potential miscompilation, only specify one {s} per dialog resource", .{@tagName(self.extra.menu_or_class)}), + .hint => return, + }, + .rc_would_miscompile_dialog_menu_id_starts_with_digit => switch (self.type) { + .err, .warning => return, + .note => return writer.writeAll("to avoid the potential miscompilation, the first character of the id should not be a digit"), + .hint => return, + }, + .dialog_menu_id_was_uppercased => return, + .duplicate_menu_or_class_skipped => { + return writer.print("this {s} was ignored; when multiple {s} statements are specified, only the last takes precedence", .{ + @tagName(self.extra.menu_or_class), + @tagName(self.extra.menu_or_class), + }); + }, + .invalid_digit_character_in_ordinal => { + return writer.writeAll("non-ASCII digit characters are not allowed in ordinal (number) values"); + }, + .rc_would_miscompile_codepoint_byte_swap => switch (self.type) { + .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the bytes of the UTF-16 code unit would be swapped)", .{self.extra.number}), + .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_codepoint_skip => switch (self.type) { + .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the codepoint would be missing from the compiled resource)", .{self.extra.number}), + .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}), + .hint => return, + }, + .tab_converted_to_spaces => switch (self.type) { + .err, .warning => return writer.writeAll("the tab character(s) in this string will be converted into a variable number of spaces (determined by the column of the tab character in the .rc file)"), + .note => return writer.writeAll("to include the tab character itself in a string, the escape sequence \\t should be used"), + .hint => return, + }, + .win32_non_ascii_ordinal => switch (self.type) { + .err, .warning => unreachable, + .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}), + .hint => return, + }, + } + } + + pub const VisualTokenInfo = struct { + before_len: usize, + point_offset: usize, + after_len: usize, + }; + + pub fn visualTokenInfo(self: ErrorDetails, source_line_start: usize, source_line_end: usize) VisualTokenInfo { + // Note: A perfect solution here would involve full grapheme cluster + // awareness, but oh well. This will give incorrect offsets + // if there are any multibyte codepoints within the relevant span, + // and even more inflated for grapheme clusters. + // + // We mitigate this slightly when we know we'll be pointing at + // something that displays as 1 character. + return switch (self.err) { + // These can technically be more than 1 byte depending on encoding, + // but they always refer to one visual character/grapheme. + .illegal_byte, + .illegal_byte_outside_string_literals, + .illegal_codepoint_outside_string_literals, + .illegal_byte_order_mark, + .illegal_private_use_character, + => .{ + .before_len = 0, + .point_offset = self.token.start - source_line_start, + .after_len = 0, + }, + else => .{ + .before_len = before: { + const start = @max(source_line_start, if (self.token_span_start) |span_start| span_start.start else self.token.start); + break :before self.token.start - start; + }, + .point_offset = self.token.start - source_line_start, + .after_len = after: { + const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end); + if (end == self.token.start) break :after 0; + break :after end - self.token.start - 1; + }, + }, + }; + } +}; + +pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_config: std.io.tty.Config, cwd: std.fs.Dir, err_details: ErrorDetails, source: []const u8, strings: []const []const u8, source_mappings: ?SourceMappings) !void { + if (err_details.type == .hint) return; + + const source_line_start = err_details.token.getLineStart(source); + const column = err_details.token.calculateColumn(source, 1, source_line_start); + + // var counting_writer_container = std.io.countingWriter(writer); + // const counting_writer = counting_writer_container.writer(); + + const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings) |mappings| mappings.get(err_details.token.line_number) else null; + const corresponding_file: ?[]const u8 = if (source_mappings) |mappings| mappings.files.get(corresponding_span.?.filename_offset) else null; + + const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number; + + try tty_config.setColor(writer, .bold); + if (corresponding_file) |file| { + try writer.writeAll(file); + } else { + try tty_config.setColor(writer, .dim); + try writer.writeAll(""); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + } + try writer.print(":{d}:{d}: ", .{ err_line, column }); + switch (err_details.type) { + .err => { + try tty_config.setColor(writer, .red); + try writer.writeAll("error: "); + }, + .warning => { + try tty_config.setColor(writer, .yellow); + try writer.writeAll("warning: "); + }, + .note => { + try tty_config.setColor(writer, .cyan); + try writer.writeAll("note: "); + }, + .hint => unreachable, + } + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + try err_details.render(writer, source, strings); + try writer.writeByte('\n'); + try tty_config.setColor(writer, .reset); + + if (!err_details.print_source_line) { + try writer.writeByte('\n'); + return; + } + + const source_line = err_details.token.getLine(source, source_line_start); + const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len); + + // Need this to determine if the 'line originated from' note is worth printing + var source_line_for_display_buf = try std.ArrayList(u8).initCapacity(allocator, source_line.len); + defer source_line_for_display_buf.deinit(); + try writeSourceSlice(source_line_for_display_buf.writer(), source_line); + + // TODO: General handling of long lines, not tied to this specific error + if (err_details.err == .string_literal_too_long) { + const before_slice = source_line[0..@min(source_line.len, visual_info.point_offset + 16)]; + try writeSourceSlice(writer, before_slice); + try tty_config.setColor(writer, .dim); + try writer.writeAll("<...truncated...>"); + try tty_config.setColor(writer, .reset); + } else { + try writer.writeAll(source_line_for_display_buf.items); + } + try writer.writeByte('\n'); + + try tty_config.setColor(writer, .green); + const num_spaces = visual_info.point_offset - visual_info.before_len; + try writer.writeByteNTimes(' ', num_spaces); + try writer.writeByteNTimes('~', visual_info.before_len); + try writer.writeByte('^'); + if (visual_info.after_len > 0) { + var num_squiggles = visual_info.after_len; + if (err_details.err == .string_literal_too_long) { + num_squiggles = @min(num_squiggles, 15); + } + try writer.writeByteNTimes('~', num_squiggles); + } + try writer.writeByte('\n'); + try tty_config.setColor(writer, .reset); + + if (source_mappings) |_| { + var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?); + defer corresponding_lines.deinit(allocator); + + if (!corresponding_lines.worth_printing_note) return; + + try tty_config.setColor(writer, .bold); + if (corresponding_file) |file| { + try writer.writeAll(file); + } else { + try tty_config.setColor(writer, .dim); + try writer.writeAll(""); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + } + try writer.print(":{d}:{d}: ", .{ err_line, column }); + try tty_config.setColor(writer, .cyan); + try writer.writeAll("note: "); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + try writer.writeAll("this line originated from line"); + if (corresponding_span.?.start_line != corresponding_span.?.end_line) { + try writer.print("s {}-{}", .{ corresponding_span.?.start_line, corresponding_span.?.end_line }); + } else { + try writer.print(" {}", .{corresponding_span.?.start_line}); + } + try writer.print(" of file '{s}'\n", .{corresponding_file.?}); + try tty_config.setColor(writer, .reset); + + if (!corresponding_lines.worth_printing_lines) return; + + if (corresponding_lines.lines_is_error_message) { + try tty_config.setColor(writer, .red); + try writer.writeAll(" | "); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .dim); + try writer.writeAll(corresponding_lines.lines.items); + try tty_config.setColor(writer, .reset); + try writer.writeAll("\n\n"); + return; + } + + try writer.writeAll(corresponding_lines.lines.items); + try writer.writeAll("\n\n"); + } +} + +const CorrespondingLines = struct { + worth_printing_note: bool = true, + worth_printing_lines: bool = true, + lines: std.ArrayListUnmanaged(u8) = .{}, + lines_is_error_message: bool = false, + + pub fn init(allocator: std.mem.Allocator, cwd: std.fs.Dir, err_details: ErrorDetails, lines_for_comparison: []const u8, corresponding_span: SourceMappings.SourceSpan, corresponding_file: []const u8) !CorrespondingLines { + var corresponding_lines = CorrespondingLines{}; + + // We don't do line comparison for this error, so don't print the note if the line + // number is different + if (err_details.err == .string_literal_too_long and err_details.token.line_number == corresponding_span.start_line) { + corresponding_lines.worth_printing_note = false; + return corresponding_lines; + } + + // Don't print the originating line for this error, we know it's really long + if (err_details.err == .string_literal_too_long) { + corresponding_lines.worth_printing_lines = false; + return corresponding_lines; + } + + var writer = corresponding_lines.lines.writer(allocator); + if (utils.openFileNotDir(cwd, corresponding_file, .{})) |file| { + defer file.close(); + var buffered_reader = std.io.bufferedReader(file.reader()); + writeLinesFromStream(writer, buffered_reader.reader(), corresponding_span.start_line, corresponding_span.end_line) catch |err| switch (err) { + error.LinesNotFound => { + corresponding_lines.lines.clearRetainingCapacity(); + try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)}); + corresponding_lines.lines_is_error_message = true; + return corresponding_lines; + }, + else => |e| return e, + }; + } else |err| { + corresponding_lines.lines.clearRetainingCapacity(); + try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)}); + corresponding_lines.lines_is_error_message = true; + return corresponding_lines; + } + + // If the lines are the same as they were before preprocessing, skip printing the note entirely + if (std.mem.eql(u8, lines_for_comparison, corresponding_lines.lines.items)) { + corresponding_lines.worth_printing_note = false; + } + return corresponding_lines; + } + + pub fn deinit(self: *CorrespondingLines, allocator: std.mem.Allocator) void { + self.lines.deinit(allocator); + } +}; + +fn writeSourceSlice(writer: anytype, slice: []const u8) !void { + for (slice) |c| try writeSourceByte(writer, c); +} + +inline fn writeSourceByte(writer: anytype, byte: u8) !void { + switch (byte) { + '\x00'...'\x08', '\x0E'...'\x1F', '\x7F' => try writer.writeAll("�"), + // \r is seemingly ignored by the RC compiler so skipping it when printing source lines + // could help avoid confusing output (e.g. RC\rDATA if printed verbatim would show up + // in the console as DATA but the compiler reads it as RCDATA) + // + // NOTE: This is irrelevant when using the clang preprocessor, because unpaired \r + // characters get converted to \n, but may become relevant if another + // preprocessor is used instead. + '\r' => {}, + '\t', '\x0B', '\x0C' => try writer.writeByte(' '), + else => try writer.writeByte(byte), + } +} + +pub fn writeLinesFromStream(writer: anytype, input: anytype, start_line: usize, end_line: usize) !void { + var line_num: usize = 1; + while (try readByteOrEof(input)) |byte| { + switch (byte) { + '\n' => { + if (line_num == end_line) return; + if (line_num >= start_line) try writeSourceByte(writer, byte); + line_num += 1; + }, + else => { + if (line_num >= start_line) try writeSourceByte(writer, byte); + }, + } + } + if (line_num != end_line) { + return error.LinesNotFound; + } +} + +pub fn readByteOrEof(reader: anytype) !?u8 { + return reader.readByte() catch |err| switch (err) { + error.EndOfStream => return null, + else => |e| return e, + }; +} diff --git a/src/resinator/ico.zig b/src/resinator/ico.zig new file mode 100644 index 000000000000..205f5a0e594a --- /dev/null +++ b/src/resinator/ico.zig @@ -0,0 +1,310 @@ +//! https://devblogs.microsoft.com/oldnewthing/20120720-00/?p=7083 +//! https://learn.microsoft.com/en-us/previous-versions/ms997538(v=msdn.10) +//! https://learn.microsoft.com/en-us/windows/win32/menurc/newheader +//! https://learn.microsoft.com/en-us/windows/win32/menurc/resdir +//! https://learn.microsoft.com/en-us/windows/win32/menurc/localheader + +const std = @import("std"); + +pub const ReadError = std.mem.Allocator.Error || error{ InvalidHeader, InvalidImageType, ImpossibleDataSize, UnexpectedEOF, ReadError }; + +pub fn read(allocator: std.mem.Allocator, reader: anytype, max_size: u64) ReadError!IconDir { + // Some Reader implementations have an empty ReadError error set which would + // cause 'unreachable else' if we tried to use an else in the switch, so we + // need to detect this case and not try to translate to ReadError + const empty_reader_errorset = @typeInfo(@TypeOf(reader).Error).ErrorSet == null or @typeInfo(@TypeOf(reader).Error).ErrorSet.?.len == 0; + if (empty_reader_errorset) { + return readAnyError(allocator, reader, max_size) catch |err| switch (err) { + error.EndOfStream => error.UnexpectedEOF, + else => |e| return e, + }; + } else { + return readAnyError(allocator, reader, max_size) catch |err| switch (err) { + error.OutOfMemory, + error.InvalidHeader, + error.InvalidImageType, + error.ImpossibleDataSize, + => |e| return e, + error.EndOfStream => error.UnexpectedEOF, + // The remaining errors are dependent on the `reader`, so + // we just translate them all to generic ReadError + else => error.ReadError, + }; + } +} + +// TODO: This seems like a somewhat strange pattern, could be a better way +// to do this. Maybe it makes more sense to handle the translation +// at the call site instead of having a helper function here. +pub fn readAnyError(allocator: std.mem.Allocator, reader: anytype, max_size: u64) !IconDir { + const reserved = try reader.readIntLittle(u16); + if (reserved != 0) { + return error.InvalidHeader; + } + + const image_type = reader.readEnum(ImageType, .Little) catch |err| switch (err) { + error.InvalidValue => return error.InvalidImageType, + else => |e| return e, + }; + + const num_images = try reader.readIntLittle(u16); + + // To avoid over-allocation in the case of a file that says it has way more + // entries than it actually does, we use an ArrayList with a conservatively + // limited initial capacity instead of allocating the entire slice at once. + const initial_capacity = @min(num_images, 8); + var entries = try std.ArrayList(Entry).initCapacity(allocator, initial_capacity); + errdefer entries.deinit(); + + var i: usize = 0; + while (i < num_images) : (i += 1) { + var entry: Entry = undefined; + entry.width = try reader.readByte(); + entry.height = try reader.readByte(); + entry.num_colors = try reader.readByte(); + entry.reserved = try reader.readByte(); + switch (image_type) { + .icon => { + entry.type_specific_data = .{ .icon = .{ + .color_planes = try reader.readIntLittle(u16), + .bits_per_pixel = try reader.readIntLittle(u16), + } }; + }, + .cursor => { + entry.type_specific_data = .{ .cursor = .{ + .hotspot_x = try reader.readIntLittle(u16), + .hotspot_y = try reader.readIntLittle(u16), + } }; + }, + } + entry.data_size_in_bytes = try reader.readIntLittle(u32); + entry.data_offset_from_start_of_file = try reader.readIntLittle(u32); + // Validate that the offset/data size is feasible + if (@as(u64, entry.data_offset_from_start_of_file) + entry.data_size_in_bytes > max_size) { + return error.ImpossibleDataSize; + } + // and that the data size is large enough for at least the header of an image + // Note: This avoids needing to deal with a miscompilation from the Win32 RC + // compiler when the data size of an image is specified as zero but there + // is data to-be-read at the offset. The Win32 RC compiler will output + // an ICON/CURSOR resource with a bogus size in its header but with no actual + // data bytes in it, leading to an invalid .res. Similarly, if, for example, + // there is valid PNG data at the image's offset, but the size is specified + // as fewer bytes than the PNG header, then the Win32 RC compiler will still + // treat it as a PNG (e.g. unconditionally set num_planes to 1) but the data + // of the resource will only be 1 byte so treating it as a PNG doesn't make + // sense (especially not when you have to read past the data size to determine + // that it's a PNG). + if (entry.data_size_in_bytes < 16) { + return error.ImpossibleDataSize; + } + try entries.append(entry); + } + + return .{ + .image_type = image_type, + .entries = try entries.toOwnedSlice(), + .allocator = allocator, + }; +} + +pub const ImageType = enum(u16) { + icon = 1, + cursor = 2, +}; + +pub const IconDir = struct { + image_type: ImageType, + /// Note: entries.len will always fit into a u16, since the field containing the + /// number of images in an ico file is a u16. + entries: []Entry, + allocator: std.mem.Allocator, + + pub fn deinit(self: IconDir) void { + self.allocator.free(self.entries); + } + + pub const res_header_byte_len = 6; + + pub fn getResDataSize(self: IconDir) u32 { + // maxInt(u16) * Entry.res_byte_len = 917,490 which is well within the u32 range. + // Note: self.entries.len is limited to maxInt(u16) + return @intCast(IconDir.res_header_byte_len + self.entries.len * Entry.res_byte_len); + } + + pub fn writeResData(self: IconDir, writer: anytype, first_image_id: u16) !void { + try writer.writeIntLittle(u16, 0); + try writer.writeIntLittle(u16, @intFromEnum(self.image_type)); + // We know that entries.len must fit into a u16 + try writer.writeIntLittle(u16, @as(u16, @intCast(self.entries.len))); + + var image_id = first_image_id; + for (self.entries) |entry| { + try entry.writeResData(writer, image_id); + image_id += 1; + } + } +}; + +pub const Entry = struct { + // Icons are limited to u8 sizes, cursors can have u16, + // so we store as u16 and truncate when needed. + width: u16, + height: u16, + num_colors: u8, + /// This should always be zero, but whatever value it is gets + /// carried over so we need to store it + reserved: u8, + type_specific_data: union(ImageType) { + icon: struct { + color_planes: u16, + bits_per_pixel: u16, + }, + cursor: struct { + hotspot_x: u16, + hotspot_y: u16, + }, + }, + data_size_in_bytes: u32, + data_offset_from_start_of_file: u32, + + pub const res_byte_len = 14; + + pub fn writeResData(self: Entry, writer: anytype, id: u16) !void { + switch (self.type_specific_data) { + .icon => |icon_data| { + try writer.writeIntLittle(u8, @as(u8, @truncate(self.width))); + try writer.writeIntLittle(u8, @as(u8, @truncate(self.height))); + try writer.writeIntLittle(u8, self.num_colors); + try writer.writeIntLittle(u8, self.reserved); + try writer.writeIntLittle(u16, icon_data.color_planes); + try writer.writeIntLittle(u16, icon_data.bits_per_pixel); + try writer.writeIntLittle(u32, self.data_size_in_bytes); + }, + .cursor => |cursor_data| { + try writer.writeIntLittle(u16, self.width); + try writer.writeIntLittle(u16, self.height); + try writer.writeIntLittle(u16, cursor_data.hotspot_x); + try writer.writeIntLittle(u16, cursor_data.hotspot_y); + try writer.writeIntLittle(u32, self.data_size_in_bytes + 4); + }, + } + try writer.writeIntLittle(u16, id); + } +}; + +test "icon" { + const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16; + var fbs = std.io.fixedBufferStream(data); + const icon = try read(std.testing.allocator, fbs.reader(), data.len); + defer icon.deinit(); + + try std.testing.expectEqual(ImageType.icon, icon.image_type); + try std.testing.expectEqual(@as(usize, 1), icon.entries.len); +} + +test "icon too many images" { + // Note that with verifying that all data sizes are within the file bounds and >= 16, + // it's not possible to hit EOF when looking for more RESDIR structures, since they are + // themselves 16 bytes long, so we'll always hit ImpossibleDataSize instead. + const data = "\x00\x00\x01\x00\x02\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16; + var fbs = std.io.fixedBufferStream(data); + try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len)); +} + +test "icon data size past EOF" { + const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x01\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16; + var fbs = std.io.fixedBufferStream(data); + try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len)); +} + +test "icon data offset past EOF" { + const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x17\x00\x00\x00" ++ [_]u8{0} ** 16; + var fbs = std.io.fixedBufferStream(data); + try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len)); +} + +test "icon data size too small" { + const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x0F\x00\x00\x00\x16\x00\x00\x00"; + var fbs = std.io.fixedBufferStream(data); + try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len)); +} + +pub const ImageFormat = enum { + dib, + png, + riff, + + const riff_header = std.mem.readIntNative(u32, "RIFF"); + const png_signature = std.mem.readIntNative(u64, "\x89PNG\r\n\x1a\n"); + const ihdr_code = std.mem.readIntNative(u32, "IHDR"); + const acon_form_type = std.mem.readIntNative(u32, "ACON"); + + pub fn detect(header_bytes: *const [16]u8) ImageFormat { + if (std.mem.readIntNative(u32, header_bytes[0..4]) == riff_header) return .riff; + if (std.mem.readIntNative(u64, header_bytes[0..8]) == png_signature) return .png; + return .dib; + } + + pub fn validate(format: ImageFormat, header_bytes: *const [16]u8) bool { + return switch (format) { + .png => std.mem.readIntNative(u32, header_bytes[12..16]) == ihdr_code, + .riff => std.mem.readIntNative(u32, header_bytes[8..12]) == acon_form_type, + .dib => true, + }; + } +}; + +/// Contains only the fields of BITMAPINFOHEADER (WinGDI.h) that are both: +/// - relevant to what we need, and +/// - are shared between all versions of BITMAPINFOHEADER (V4, V5). +pub const BitmapHeader = extern struct { + bcSize: u32, + bcWidth: i32, + bcHeight: i32, + bcPlanes: u16, + bcBitCount: u16, + + pub fn version(self: *const BitmapHeader) Version { + return Version.get(self.bcSize); + } + + /// https://en.wikipedia.org/wiki/BMP_file_format#DIB_header_(bitmap_information_header) + pub const Version = enum { + unknown, + @"win2.0", // Windows 2.0 or later + @"nt3.1", // Windows NT, 3.1x or later + @"nt4.0", // Windows NT 4.0, 95 or later + @"nt5.0", // Windows NT 5.0, 98 or later + + pub fn get(header_size: u32) Version { + return switch (header_size) { + len(.@"win2.0") => .@"win2.0", + len(.@"nt3.1") => .@"nt3.1", + len(.@"nt4.0") => .@"nt4.0", + len(.@"nt5.0") => .@"nt5.0", + else => .unknown, + }; + } + + pub fn len(comptime v: Version) comptime_int { + return switch (v) { + .@"win2.0" => 12, + .@"nt3.1" => 40, + .@"nt4.0" => 108, + .@"nt5.0" => 124, + .unknown => unreachable, + }; + } + + pub fn nameForErrorDisplay(v: Version) []const u8 { + return switch (v) { + .unknown => "unknown", + .@"win2.0" => "Windows 2.0 (BITMAPCOREHEADER)", + .@"nt3.1" => "Windows NT, 3.1x (BITMAPINFOHEADER)", + .@"nt4.0" => "Windows NT 4.0, 95 (BITMAPV4HEADER)", + .@"nt5.0" => "Windows NT 5.0, 98 (BITMAPV5HEADER)", + }; + } + }; +}; diff --git a/src/resinator/lang.zig b/src/resinator/lang.zig new file mode 100644 index 000000000000..d43380fa052b --- /dev/null +++ b/src/resinator/lang.zig @@ -0,0 +1,877 @@ +const std = @import("std"); + +/// This function is specific to how the Win32 RC command line interprets +/// language IDs specified as integers. +/// - Always interpreted as hexadecimal, but explicit 0x prefix is also allowed +/// - Wraps on overflow of u16 +/// - Stops parsing on any invalid hexadecimal digits +/// - Errors if a digit is not the first char +/// - `-` (negative) prefix is allowed +pub fn parseInt(str: []const u8) error{InvalidLanguageId}!u16 { + var result: u16 = 0; + const radix: u8 = 16; + var buf = str; + + const Prefix = enum { none, minus }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + else => {}, + } + + if (buf.len > 2 and buf[0] == '0' and buf[1] == 'x') { + buf = buf[2..]; + } + + for (buf, 0..) |c, i| { + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + 'a'...'f', 'A'...'F', '0'...'9' => std.fmt.charToDigit(c, radix) catch break, + else => { + // First digit must be valid + if (i == 0) { + return error.InvalidLanguageId; + } + break; + }, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result = 0 -% result, + } + + return result; +} + +test parseInt { + try std.testing.expectEqual(@as(u16, 0x16), try parseInt("16")); + try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1A")); + try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1Azzzz")); + try std.testing.expectEqual(@as(u16, 0xffff), try parseInt("-1")); + try std.testing.expectEqual(@as(u16, 0xffea), try parseInt("-0x16")); + try std.testing.expectEqual(@as(u16, 0x0), try parseInt("0o100")); + try std.testing.expectEqual(@as(u16, 0x1), try parseInt("10001")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("--1")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("0xha")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("¹")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("~1")); +} + +/// This function is specific to how the Win32 RC command line interprets +/// language tags: invalid tags are rejected, but tags that don't have +/// a specific assigned ID but are otherwise valid enough will get +/// converted to an ID of LOCALE_CUSTOM_UNSPECIFIED. +pub fn tagToInt(tag: []const u8) error{InvalidLanguageTag}!u16 { + const maybe_id = try tagToId(tag); + if (maybe_id) |id| { + return @intFromEnum(id); + } else { + return LOCALE_CUSTOM_UNSPECIFIED; + } +} + +pub fn tagToId(tag: []const u8) error{InvalidLanguageTag}!?LanguageId { + const parsed = try parse(tag); + // There are currently no language tags with assigned IDs that have + // multiple suffixes, so we can skip the lookup. + if (parsed.multiple_suffixes) return null; + const longest_known_tag = comptime blk: { + var len = 0; + for (@typeInfo(LanguageId).Enum.fields) |field| { + if (field.name.len > len) len = field.name.len; + } + break :blk len; + }; + // If the tag is longer than the longest tag that has an assigned ID, + // then we can skip the lookup. + if (tag.len > longest_known_tag) return null; + var normalized_buf: [longest_known_tag]u8 = undefined; + // To allow e.g. `de-de_phoneb` to get looked up as `de-de`, we need to + // omit the suffix, but only if the tag contains a valid alternate sort order. + var tag_to_normalize = if (parsed.isSuffixValidSortOrder()) tag[0 .. tag.len - (parsed.suffix.?.len + 1)] else tag; + const normalized_tag = normalizeTag(tag_to_normalize, &normalized_buf); + return std.meta.stringToEnum(LanguageId, normalized_tag) orelse { + // special case for a tag that has been mapped to the same ID + // twice. + if (std.mem.eql(u8, "ff_latn_ng", normalized_tag)) { + return LanguageId.ff_ng; + } + return null; + }; +} + +test tagToId { + try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("ar-ae")).?); + try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("AR_AE")).?); + try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-ng")).?); + // Special case + try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-Latn-NG")).?); +} + +test "exhaustive tagToId" { + inline for (@typeInfo(LanguageId).Enum.fields) |field| { + const id = tagToId(field.name) catch |err| { + std.debug.print("tag: {s}\n", .{field.name}); + return err; + }; + try std.testing.expectEqual(@field(LanguageId, field.name), id orelse { + std.debug.print("tag: {s}, got null\n", .{field.name}); + return error.TestExpectedEqual; + }); + } + var buf: [32]u8 = undefined; + inline for (valid_alternate_sorts) |parsed_sort| { + var fbs = std.io.fixedBufferStream(&buf); + const writer = fbs.writer(); + writer.writeAll(parsed_sort.language_code) catch unreachable; + writer.writeAll("-") catch unreachable; + writer.writeAll(parsed_sort.country_code.?) catch unreachable; + writer.writeAll("-") catch unreachable; + writer.writeAll(parsed_sort.suffix.?) catch unreachable; + const expected_field_name = comptime field: { + var name_buf: [5]u8 = undefined; + std.mem.copy(u8, &name_buf, parsed_sort.language_code); + name_buf[2] = '_'; + std.mem.copy(u8, name_buf[3..], parsed_sort.country_code.?); + break :field name_buf; + }; + const expected = @field(LanguageId, &expected_field_name); + const id = tagToId(fbs.getWritten()) catch |err| { + std.debug.print("tag: {s}\n", .{fbs.getWritten()}); + return err; + }; + try std.testing.expectEqual(expected, id orelse { + std.debug.print("tag: {s}, expected: {}, got null\n", .{ fbs.getWritten(), expected }); + return error.TestExpectedEqual; + }); + } +} + +fn normalizeTag(tag: []const u8, buf: []u8) []u8 { + std.debug.assert(buf.len >= tag.len); + for (tag, 0..) |c, i| { + if (c == '-') + buf[i] = '_' + else + buf[i] = std.ascii.toLower(c); + } + return buf[0..tag.len]; +} + +/// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/%5bMS-LCID%5d.pdf#%5B%7B%22num%22%3A72%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C69%2C574%2C0%5D +/// "When an LCID is requested for a locale without a +/// permanent LCID assignment, nor a temporary +/// assignment as above, the protocol will respond +/// with LOCALE_CUSTOM_UNSPECIFIED for all such +/// locales. Because this single value is used for +/// numerous possible locale names, it is impossible to +/// round trip this locale, even temporarily. +/// Applications should discard this value as soon as +/// possible and never persist it. If the system is +/// forced to respond to a request for +/// LCID_CUSTOM_UNSPECIFIED, it will fall back to +/// the current user locale. This is often incorrect but +/// may prevent an application or component from +/// failing. As the meaning of this temporary LCID is +/// unstable, it should never be used for interchange +/// or persisted data. This is a 1-to-many relationship +/// that is very unstable." +pub const LOCALE_CUSTOM_UNSPECIFIED = 0x1000; + +pub const LANG_ENGLISH = 0x09; +pub const SUBLANG_ENGLISH_US = 0x01; + +/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers +pub fn MAKELANGID(primary: u10, sublang: u6) u16 { + return (@as(u16, primary) << 10) | sublang; +} + +/// Language tag format expressed as a regular expression (rough approximation): +/// +/// [a-zA-Z]{1,3}([-_][a-zA-Z]{4})?([-_][a-zA-Z]{2})?([-_][a-zA-Z0-9]{1,8})? +/// lang | script | country | suffix +/// +/// Notes: +/// - If lang code is 1 char, it seems to mean that everything afterwards uses suffix +/// parsing rules (e.g. `a-0` and `a-00000000` are allowed). +/// - There can also be any number of trailing suffix parts as long as they each +/// would be a valid suffix part, e.g. `en-us-blah-blah1-blah2-blah3` is allowed. +/// - When doing lookups, trailing suffix parts are taken into account, e.g. +/// `ca-es-valencia` is not considered equivalent to `ca-es-valencia-blah`. +/// - A suffix is only allowed if: +/// + Lang code is 1 char long, or +/// + A country code is present, or +/// + A script tag is not present and: +/// - the suffix is numeric-only and has a length of 3, or +/// - the lang is `qps` and the suffix is `ploca` or `plocm` +pub fn parse(lang_tag: []const u8) error{InvalidLanguageTag}!Parsed { + var it = std.mem.splitAny(u8, lang_tag, "-_"); + const lang_code = it.first(); + const is_valid_lang_code = lang_code.len >= 1 and lang_code.len <= 3 and isAllAlphabetic(lang_code); + if (!is_valid_lang_code) return error.InvalidLanguageTag; + var parsed = Parsed{ + .language_code = lang_code, + }; + // The second part could be a script tag, a country code, or a suffix + if (it.next()) |part_str| { + // The lang code being length 1 behaves strangely, so fully special case it. + if (lang_code.len == 1) { + // This is almost certainly not the 'right' way to do this, but I don't have a method + // to determine how exactly these language tags are parsed, and it seems like + // suffix parsing rules apply generally (digits allowed, length of 1 to 8). + // + // However, because we want to be able to lookup `x-iv-mathan` normally without + // `multiple_suffixes` being set to true, we need to make sure to treat two-length + // alphabetic parts as a country code. + if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } + // Everything else, though, we can just throw into the suffix as long as the normal + // rules apply. + else if (part_str.len > 0 and part_str.len <= 8 and isAllAlphanumeric(part_str)) { + parsed.suffix = part_str; + } else { + return error.InvalidLanguageTag; + } + } else if (part_str.len == 4 and isAllAlphabetic(part_str)) { + parsed.script_tag = part_str; + } else if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } + // Only a 3-len numeric suffix is allowed as the second part of a tag + else if (part_str.len == 3 and isAllNumeric(part_str)) { + parsed.suffix = part_str; + } + // Special case for qps-ploca and qps-plocm + else if (std.ascii.eqlIgnoreCase(lang_code, "qps") and + (std.ascii.eqlIgnoreCase(part_str, "ploca") or + std.ascii.eqlIgnoreCase(part_str, "plocm"))) + { + parsed.suffix = part_str; + } else { + return error.InvalidLanguageTag; + } + } else { + // If there's no part besides a 1-len lang code, then it is malformed + if (lang_code.len == 1) return error.InvalidLanguageTag; + return parsed; + } + if (parsed.script_tag != null) { + if (it.next()) |part_str| { + if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } else { + // Suffix is not allowed when a country code is not present. + return error.InvalidLanguageTag; + } + } else { + return parsed; + } + } + // We've now parsed any potential script tag/country codes, so anything remaining + // is a suffix + while (it.next()) |part_str| { + if (part_str.len == 0 or part_str.len > 8 or !isAllAlphanumeric(part_str)) { + return error.InvalidLanguageTag; + } + if (parsed.suffix == null) { + parsed.suffix = part_str; + } else { + // In theory we could return early here but we still want to validate + // that each part is a valid suffix all the way to the end, e.g. + // we should reject `en-us-suffix-a-b-c-!!!` because of the invalid `!!!` + // suffix part. + parsed.multiple_suffixes = true; + } + } + return parsed; +} + +pub const Parsed = struct { + language_code: []const u8, + script_tag: ?[]const u8 = null, + country_code: ?[]const u8 = null, + /// Can be a sort order (e.g. phoneb) or something like valencia, 001, etc + suffix: ?[]const u8 = null, + /// There can be any number of suffixes, but we don't need to care what their + /// values are, we just need to know if any exist so that e.g. `ca-es-valencia-blah` + /// can be seen as different from `ca-es-valencia`. Storing this as a bool + /// allows us to avoid needing either (a) dynamic allocation or (b) a limit to + /// the number of suffixes allowed when parsing. + multiple_suffixes: bool = false, + + pub fn isSuffixValidSortOrder(self: Parsed) bool { + if (self.country_code == null) return false; + if (self.suffix == null) return false; + if (self.script_tag != null) return false; + if (self.multiple_suffixes) return false; + for (valid_alternate_sorts) |valid_sort| { + if (std.ascii.eqlIgnoreCase(valid_sort.language_code, self.language_code) and + std.ascii.eqlIgnoreCase(valid_sort.country_code.?, self.country_code.?) and + std.ascii.eqlIgnoreCase(valid_sort.suffix.?, self.suffix.?)) + { + return true; + } + } + return false; + } +}; + +/// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f +/// See the table following this text: "Alternate sorts can be selected by using one of the identifiers from the following table." +const valid_alternate_sorts = [_]Parsed{ + // Note: x-IV-mathan is omitted due to how lookups are implemented. + // This table is used to make e.g. `de-de_phoneb` get looked up + // as `de-de` (the suffix is omitted for the lookup), but x-iv-mathan + // instead needs to be looked up with the suffix included because + // `x-iv` is not a tag with an assigned ID. + .{ .language_code = "de", .country_code = "de", .suffix = "phoneb" }, + .{ .language_code = "hu", .country_code = "hu", .suffix = "tchncl" }, + .{ .language_code = "ka", .country_code = "ge", .suffix = "modern" }, + .{ .language_code = "zh", .country_code = "cn", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "sg", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "mo", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "tw", .suffix = "pronun" }, + .{ .language_code = "zh", .country_code = "tw", .suffix = "radstr" }, + .{ .language_code = "ja", .country_code = "jp", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "hk", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "mo", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "cn", .suffix = "phoneb" }, + .{ .language_code = "zh", .country_code = "sg", .suffix = "phoneb" }, +}; + +test "parse" { + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + }, try parse("en")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .country_code = "us", + }, try parse("en-us")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .suffix = "123", + }, try parse("en-123")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .suffix = "123", + .multiple_suffixes = true, + }, try parse("en-123-blah")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .country_code = "us", + .suffix = "123", + .multiple_suffixes = true, + }, try parse("en-us_123-blah")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "eng", + .script_tag = "Latn", + }, try parse("eng-Latn")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "eng", + .script_tag = "Latn", + }, try parse("eng-Latn")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "ff", + .script_tag = "Latn", + .country_code = "NG", + }, try parse("ff-Latn-NG")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "qps", + .suffix = "Plocm", + }, try parse("qps-Plocm")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "qps", + .suffix = "ploca", + }, try parse("qps-ploca")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "x", + .country_code = "IV", + .suffix = "mathan", + }, try parse("x-IV-mathan")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "a", + }, try parse("a-a")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "000", + }, try parse("a-000")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "00000000", + }, try parse("a-00000000")); + // suffix not allowed if script tag is present without country code + try std.testing.expectError(error.InvalidLanguageTag, parse("eng-Latn-suffix")); + // suffix must be 3 numeric digits if neither script tag nor country code is present + try std.testing.expectError(error.InvalidLanguageTag, parse("eng-suffix")); + try std.testing.expectError(error.InvalidLanguageTag, parse("en-plocm")); + // 1-len lang code is not allowed if it's the only part + try std.testing.expectError(error.InvalidLanguageTag, parse("e")); +} + +fn isAllAlphabetic(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isAlphabetic(c)) return false; + } + return true; +} + +fn isAllAlphanumeric(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isAlphanumeric(c)) return false; + } + return true; +} + +fn isAllNumeric(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isDigit(c)) return false; + } + return true; +} + +/// Derived from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f +/// - Protocol Revision: 15.0 +/// - Language / Language ID / Language Tag table in Appendix A +/// - Removed all rows that have Language ID 0x1000 (LOCALE_CUSTOM_UNSPECIFIED) +/// - Normalized each language tag (lowercased, replaced all `-` with `_`) +/// - There is one special case where two tags are mapped to the same ID, the following +/// has been omitted and must be special cased during lookup to map to the ID ff_ng / 0x0467. +/// ff_latn_ng = 0x0467, // Fulah (Latin), Nigeria +/// - x_iv_mathan has been added which is not in the table but does appear in the Alternate sorts +/// table as 0x007F (LANG_INVARIANT). +pub const LanguageId = enum(u16) { + // Language tag = Language ID, // Language, Location (or type) + af = 0x0036, // Afrikaans + af_za = 0x0436, // Afrikaans, South Africa + sq = 0x001C, // Albanian + sq_al = 0x041C, // Albanian, Albania + gsw = 0x0084, // Alsatian + gsw_fr = 0x0484, // Alsatian, France + am = 0x005E, // Amharic + am_et = 0x045E, // Amharic, Ethiopia + ar = 0x0001, // Arabic + ar_dz = 0x1401, // Arabic, Algeria + ar_bh = 0x3C01, // Arabic, Bahrain + ar_eg = 0x0c01, // Arabic, Egypt + ar_iq = 0x0801, // Arabic, Iraq + ar_jo = 0x2C01, // Arabic, Jordan + ar_kw = 0x3401, // Arabic, Kuwait + ar_lb = 0x3001, // Arabic, Lebanon + ar_ly = 0x1001, // Arabic, Libya + ar_ma = 0x1801, // Arabic, Morocco + ar_om = 0x2001, // Arabic, Oman + ar_qa = 0x4001, // Arabic, Qatar + ar_sa = 0x0401, // Arabic, Saudi Arabia + ar_sy = 0x2801, // Arabic, Syria + ar_tn = 0x1C01, // Arabic, Tunisia + ar_ae = 0x3801, // Arabic, U.A.E. + ar_ye = 0x2401, // Arabic, Yemen + hy = 0x002B, // Armenian + hy_am = 0x042B, // Armenian, Armenia + as = 0x004D, // Assamese + as_in = 0x044D, // Assamese, India + az_cyrl = 0x742C, // Azerbaijani (Cyrillic) + az_cyrl_az = 0x082C, // Azerbaijani (Cyrillic), Azerbaijan + az = 0x002C, // Azerbaijani (Latin) + az_latn = 0x782C, // Azerbaijani (Latin) + az_latn_az = 0x042C, // Azerbaijani (Latin), Azerbaijan + bn = 0x0045, // Bangla + bn_bd = 0x0845, // Bangla, Bangladesh + bn_in = 0x0445, // Bangla, India + ba = 0x006D, // Bashkir + ba_ru = 0x046D, // Bashkir, Russia + eu = 0x002D, // Basque + eu_es = 0x042D, // Basque, Spain + be = 0x0023, // Belarusian + be_by = 0x0423, // Belarusian, Belarus + bs_cyrl = 0x641A, // Bosnian (Cyrillic) + bs_cyrl_ba = 0x201A, // Bosnian (Cyrillic), Bosnia and Herzegovina + bs_latn = 0x681A, // Bosnian (Latin) + bs = 0x781A, // Bosnian (Latin) + bs_latn_ba = 0x141A, // Bosnian (Latin), Bosnia and Herzegovina + br = 0x007E, // Breton + br_fr = 0x047E, // Breton, France + bg = 0x0002, // Bulgarian + bg_bg = 0x0402, // Bulgarian, Bulgaria + my = 0x0055, // Burmese + my_mm = 0x0455, // Burmese, Myanmar + ca = 0x0003, // Catalan + ca_es = 0x0403, // Catalan, Spain + tzm_arab_ma = 0x045F, // Central Atlas Tamazight (Arabic), Morocco + ku = 0x0092, // Central Kurdish + ku_arab = 0x7c92, // Central Kurdish + ku_arab_iq = 0x0492, // Central Kurdish, Iraq + chr = 0x005C, // Cherokee + chr_cher = 0x7c5C, // Cherokee + chr_cher_us = 0x045C, // Cherokee, United States + zh_hans = 0x0004, // Chinese (Simplified) + zh = 0x7804, // Chinese (Simplified) + zh_cn = 0x0804, // Chinese (Simplified), People's Republic of China + zh_sg = 0x1004, // Chinese (Simplified), Singapore + zh_hant = 0x7C04, // Chinese (Traditional) + zh_hk = 0x0C04, // Chinese (Traditional), Hong Kong S.A.R. + zh_mo = 0x1404, // Chinese (Traditional), Macao S.A.R. + zh_tw = 0x0404, // Chinese (Traditional), Taiwan + co = 0x0083, // Corsican + co_fr = 0x0483, // Corsican, France + hr = 0x001A, // Croatian + hr_hr = 0x041A, // Croatian, Croatia + hr_ba = 0x101A, // Croatian (Latin), Bosnia and Herzegovina + cs = 0x0005, // Czech + cs_cz = 0x0405, // Czech, Czech Republic + da = 0x0006, // Danish + da_dk = 0x0406, // Danish, Denmark + prs = 0x008C, // Dari + prs_af = 0x048C, // Dari, Afghanistan + dv = 0x0065, // Divehi + dv_mv = 0x0465, // Divehi, Maldives + nl = 0x0013, // Dutch + nl_be = 0x0813, // Dutch, Belgium + nl_nl = 0x0413, // Dutch, Netherlands + dz_bt = 0x0C51, // Dzongkha, Bhutan + en = 0x0009, // English + en_au = 0x0C09, // English, Australia + en_bz = 0x2809, // English, Belize + en_ca = 0x1009, // English, Canada + en_029 = 0x2409, // English, Caribbean + en_hk = 0x3C09, // English, Hong Kong + en_in = 0x4009, // English, India + en_ie = 0x1809, // English, Ireland + en_jm = 0x2009, // English, Jamaica + en_my = 0x4409, // English, Malaysia + en_nz = 0x1409, // English, New Zealand + en_ph = 0x3409, // English, Republic of the Philippines + en_sg = 0x4809, // English, Singapore + en_za = 0x1C09, // English, South Africa + en_tt = 0x2c09, // English, Trinidad and Tobago + en_ae = 0x4C09, // English, United Arab Emirates + en_gb = 0x0809, // English, United Kingdom + en_us = 0x0409, // English, United States + en_zw = 0x3009, // English, Zimbabwe + et = 0x0025, // Estonian + et_ee = 0x0425, // Estonian, Estonia + fo = 0x0038, // Faroese + fo_fo = 0x0438, // Faroese, Faroe Islands + fil = 0x0064, // Filipino + fil_ph = 0x0464, // Filipino, Philippines + fi = 0x000B, // Finnish + fi_fi = 0x040B, // Finnish, Finland + fr = 0x000C, // French + fr_be = 0x080C, // French, Belgium + fr_cm = 0x2c0C, // French, Cameroon + fr_ca = 0x0c0C, // French, Canada + fr_029 = 0x1C0C, // French, Caribbean + fr_cd = 0x240C, // French, Congo, DRC + fr_ci = 0x300C, // French, Côte d'Ivoire + fr_fr = 0x040C, // French, France + fr_ht = 0x3c0C, // French, Haiti + fr_lu = 0x140C, // French, Luxembourg + fr_ml = 0x340C, // French, Mali + fr_ma = 0x380C, // French, Morocco + fr_mc = 0x180C, // French, Principality of Monaco + fr_re = 0x200C, // French, Reunion + fr_sn = 0x280C, // French, Senegal + fr_ch = 0x100C, // French, Switzerland + fy = 0x0062, // Frisian + fy_nl = 0x0462, // Frisian, Netherlands + ff = 0x0067, // Fulah + ff_latn = 0x7C67, // Fulah (Latin) + ff_ng = 0x0467, // Fulah, Nigeria + ff_latn_sn = 0x0867, // Fulah, Senegal + gl = 0x0056, // Galician + gl_es = 0x0456, // Galician, Spain + ka = 0x0037, // Georgian + ka_ge = 0x0437, // Georgian, Georgia + de = 0x0007, // German + de_at = 0x0C07, // German, Austria + de_de = 0x0407, // German, Germany + de_li = 0x1407, // German, Liechtenstein + de_lu = 0x1007, // German, Luxembourg + de_ch = 0x0807, // German, Switzerland + el = 0x0008, // Greek + el_gr = 0x0408, // Greek, Greece + kl = 0x006F, // Greenlandic + kl_gl = 0x046F, // Greenlandic, Greenland + gn = 0x0074, // Guarani + gn_py = 0x0474, // Guarani, Paraguay + gu = 0x0047, // Gujarati + gu_in = 0x0447, // Gujarati, India + ha = 0x0068, // Hausa (Latin) + ha_latn = 0x7C68, // Hausa (Latin) + ha_latn_ng = 0x0468, // Hausa (Latin), Nigeria + haw = 0x0075, // Hawaiian + haw_us = 0x0475, // Hawaiian, United States + he = 0x000D, // Hebrew + he_il = 0x040D, // Hebrew, Israel + hi = 0x0039, // Hindi + hi_in = 0x0439, // Hindi, India + hu = 0x000E, // Hungarian + hu_hu = 0x040E, // Hungarian, Hungary + is = 0x000F, // Icelandic + is_is = 0x040F, // Icelandic, Iceland + ig = 0x0070, // Igbo + ig_ng = 0x0470, // Igbo, Nigeria + id = 0x0021, // Indonesian + id_id = 0x0421, // Indonesian, Indonesia + iu = 0x005D, // Inuktitut (Latin) + iu_latn = 0x7C5D, // Inuktitut (Latin) + iu_latn_ca = 0x085D, // Inuktitut (Latin), Canada + iu_cans = 0x785D, // Inuktitut (Syllabics) + iu_cans_ca = 0x045d, // Inuktitut (Syllabics), Canada + ga = 0x003C, // Irish + ga_ie = 0x083C, // Irish, Ireland + it = 0x0010, // Italian + it_it = 0x0410, // Italian, Italy + it_ch = 0x0810, // Italian, Switzerland + ja = 0x0011, // Japanese + ja_jp = 0x0411, // Japanese, Japan + kn = 0x004B, // Kannada + kn_in = 0x044B, // Kannada, India + kr_latn_ng = 0x0471, // Kanuri (Latin), Nigeria + ks = 0x0060, // Kashmiri + ks_arab = 0x0460, // Kashmiri, Perso-Arabic + ks_deva_in = 0x0860, // Kashmiri (Devanagari), India + kk = 0x003F, // Kazakh + kk_kz = 0x043F, // Kazakh, Kazakhstan + km = 0x0053, // Khmer + km_kh = 0x0453, // Khmer, Cambodia + quc = 0x0086, // K'iche + quc_latn_gt = 0x0486, // K'iche, Guatemala + rw = 0x0087, // Kinyarwanda + rw_rw = 0x0487, // Kinyarwanda, Rwanda + sw = 0x0041, // Kiswahili + sw_ke = 0x0441, // Kiswahili, Kenya + kok = 0x0057, // Konkani + kok_in = 0x0457, // Konkani, India + ko = 0x0012, // Korean + ko_kr = 0x0412, // Korean, Korea + ky = 0x0040, // Kyrgyz + ky_kg = 0x0440, // Kyrgyz, Kyrgyzstan + lo = 0x0054, // Lao + lo_la = 0x0454, // Lao, Lao P.D.R. + la_va = 0x0476, // Latin, Vatican City + lv = 0x0026, // Latvian + lv_lv = 0x0426, // Latvian, Latvia + lt = 0x0027, // Lithuanian + lt_lt = 0x0427, // Lithuanian, Lithuania + dsb = 0x7C2E, // Lower Sorbian + dsb_de = 0x082E, // Lower Sorbian, Germany + lb = 0x006E, // Luxembourgish + lb_lu = 0x046E, // Luxembourgish, Luxembourg + mk = 0x002F, // Macedonian + mk_mk = 0x042F, // Macedonian, North Macedonia + ms = 0x003E, // Malay + ms_bn = 0x083E, // Malay, Brunei Darussalam + ms_my = 0x043E, // Malay, Malaysia + ml = 0x004C, // Malayalam + ml_in = 0x044C, // Malayalam, India + mt = 0x003A, // Maltese + mt_mt = 0x043A, // Maltese, Malta + mi = 0x0081, // Maori + mi_nz = 0x0481, // Maori, New Zealand + arn = 0x007A, // Mapudungun + arn_cl = 0x047A, // Mapudungun, Chile + mr = 0x004E, // Marathi + mr_in = 0x044E, // Marathi, India + moh = 0x007C, // Mohawk + moh_ca = 0x047C, // Mohawk, Canada + mn = 0x0050, // Mongolian (Cyrillic) + mn_cyrl = 0x7850, // Mongolian (Cyrillic) + mn_mn = 0x0450, // Mongolian (Cyrillic), Mongolia + mn_mong = 0x7C50, // Mongolian (Traditional Mongolian) + mn_mong_cn = 0x0850, // Mongolian (Traditional Mongolian), People's Republic of China + mn_mong_mn = 0x0C50, // Mongolian (Traditional Mongolian), Mongolia + ne = 0x0061, // Nepali + ne_in = 0x0861, // Nepali, India + ne_np = 0x0461, // Nepali, Nepal + no = 0x0014, // Norwegian (Bokmal) + nb = 0x7C14, // Norwegian (Bokmal) + nb_no = 0x0414, // Norwegian (Bokmal), Norway + nn = 0x7814, // Norwegian (Nynorsk) + nn_no = 0x0814, // Norwegian (Nynorsk), Norway + oc = 0x0082, // Occitan + oc_fr = 0x0482, // Occitan, France + @"or" = 0x0048, // Odia + or_in = 0x0448, // Odia, India + om = 0x0072, // Oromo + om_et = 0x0472, // Oromo, Ethiopia + ps = 0x0063, // Pashto + ps_af = 0x0463, // Pashto, Afghanistan + fa = 0x0029, // Persian + fa_ir = 0x0429, // Persian, Iran + pl = 0x0015, // Polish + pl_pl = 0x0415, // Polish, Poland + pt = 0x0016, // Portuguese + pt_br = 0x0416, // Portuguese, Brazil + pt_pt = 0x0816, // Portuguese, Portugal + qps_ploca = 0x05FE, // Pseudo Language, Pseudo locale for east Asian/complex script localization testing + qps_ploc = 0x0501, // Pseudo Language, Pseudo locale used for localization testing + qps_plocm = 0x09FF, // Pseudo Language, Pseudo locale used for localization testing of mirrored locales + pa = 0x0046, // Punjabi + pa_arab = 0x7C46, // Punjabi + pa_in = 0x0446, // Punjabi, India + pa_arab_pk = 0x0846, // Punjabi, Islamic Republic of Pakistan + quz = 0x006B, // Quechua + quz_bo = 0x046B, // Quechua, Bolivia + quz_ec = 0x086B, // Quechua, Ecuador + quz_pe = 0x0C6B, // Quechua, Peru + ro = 0x0018, // Romanian + ro_md = 0x0818, // Romanian, Moldova + ro_ro = 0x0418, // Romanian, Romania + rm = 0x0017, // Romansh + rm_ch = 0x0417, // Romansh, Switzerland + ru = 0x0019, // Russian + ru_md = 0x0819, // Russian, Moldova + ru_ru = 0x0419, // Russian, Russia + sah = 0x0085, // Sakha + sah_ru = 0x0485, // Sakha, Russia + smn = 0x703B, // Sami (Inari) + smn_fi = 0x243B, // Sami (Inari), Finland + smj = 0x7C3B, // Sami (Lule) + smj_no = 0x103B, // Sami (Lule), Norway + smj_se = 0x143B, // Sami (Lule), Sweden + se = 0x003B, // Sami (Northern) + se_fi = 0x0C3B, // Sami (Northern), Finland + se_no = 0x043B, // Sami (Northern), Norway + se_se = 0x083B, // Sami (Northern), Sweden + sms = 0x743B, // Sami (Skolt) + sms_fi = 0x203B, // Sami (Skolt), Finland + sma = 0x783B, // Sami (Southern) + sma_no = 0x183B, // Sami (Southern), Norway + sma_se = 0x1C3B, // Sami (Southern), Sweden + sa = 0x004F, // Sanskrit + sa_in = 0x044F, // Sanskrit, India + gd = 0x0091, // Scottish Gaelic + gd_gb = 0x0491, // Scottish Gaelic, United Kingdom + sr_cyrl = 0x6C1A, // Serbian (Cyrillic) + sr_cyrl_ba = 0x1C1A, // Serbian (Cyrillic), Bosnia and Herzegovina + sr_cyrl_me = 0x301A, // Serbian (Cyrillic), Montenegro + sr_cyrl_rs = 0x281A, // Serbian (Cyrillic), Serbia + sr_cyrl_cs = 0x0C1A, // Serbian (Cyrillic), Serbia and Montenegro (Former) + sr_latn = 0x701A, // Serbian (Latin) + sr = 0x7C1A, // Serbian (Latin) + sr_latn_ba = 0x181A, // Serbian (Latin), Bosnia and Herzegovina + sr_latn_me = 0x2c1A, // Serbian (Latin), Montenegro + sr_latn_rs = 0x241A, // Serbian (Latin), Serbia + sr_latn_cs = 0x081A, // Serbian (Latin), Serbia and Montenegro (Former) + nso = 0x006C, // Sesotho sa Leboa + nso_za = 0x046C, // Sesotho sa Leboa, South Africa + tn = 0x0032, // Setswana + tn_bw = 0x0832, // Setswana, Botswana + tn_za = 0x0432, // Setswana, South Africa + sd = 0x0059, // Sindhi + sd_arab = 0x7C59, // Sindhi + sd_arab_pk = 0x0859, // Sindhi, Islamic Republic of Pakistan + si = 0x005B, // Sinhala + si_lk = 0x045B, // Sinhala, Sri Lanka + sk = 0x001B, // Slovak + sk_sk = 0x041B, // Slovak, Slovakia + sl = 0x0024, // Slovenian + sl_si = 0x0424, // Slovenian, Slovenia + so = 0x0077, // Somali + so_so = 0x0477, // Somali, Somalia + st = 0x0030, // Sotho + st_za = 0x0430, // Sotho, South Africa + es = 0x000A, // Spanish + es_ar = 0x2C0A, // Spanish, Argentina + es_ve = 0x200A, // Spanish, Bolivarian Republic of Venezuela + es_bo = 0x400A, // Spanish, Bolivia + es_cl = 0x340A, // Spanish, Chile + es_co = 0x240A, // Spanish, Colombia + es_cr = 0x140A, // Spanish, Costa Rica + es_cu = 0x5c0A, // Spanish, Cuba + es_do = 0x1c0A, // Spanish, Dominican Republic + es_ec = 0x300A, // Spanish, Ecuador + es_sv = 0x440A, // Spanish, El Salvador + es_gt = 0x100A, // Spanish, Guatemala + es_hn = 0x480A, // Spanish, Honduras + es_419 = 0x580A, // Spanish, Latin America + es_mx = 0x080A, // Spanish, Mexico + es_ni = 0x4C0A, // Spanish, Nicaragua + es_pa = 0x180A, // Spanish, Panama + es_py = 0x3C0A, // Spanish, Paraguay + es_pe = 0x280A, // Spanish, Peru + es_pr = 0x500A, // Spanish, Puerto Rico + es_es_tradnl = 0x040A, // Spanish, Spain + es_es = 0x0c0A, // Spanish, Spain + es_us = 0x540A, // Spanish, United States + es_uy = 0x380A, // Spanish, Uruguay + sv = 0x001D, // Swedish + sv_fi = 0x081D, // Swedish, Finland + sv_se = 0x041D, // Swedish, Sweden + syr = 0x005A, // Syriac + syr_sy = 0x045A, // Syriac, Syria + tg = 0x0028, // Tajik (Cyrillic) + tg_cyrl = 0x7C28, // Tajik (Cyrillic) + tg_cyrl_tj = 0x0428, // Tajik (Cyrillic), Tajikistan + tzm = 0x005F, // Tamazight (Latin) + tzm_latn = 0x7C5F, // Tamazight (Latin) + tzm_latn_dz = 0x085F, // Tamazight (Latin), Algeria + ta = 0x0049, // Tamil + ta_in = 0x0449, // Tamil, India + ta_lk = 0x0849, // Tamil, Sri Lanka + tt = 0x0044, // Tatar + tt_ru = 0x0444, // Tatar, Russia + te = 0x004A, // Telugu + te_in = 0x044A, // Telugu, India + th = 0x001E, // Thai + th_th = 0x041E, // Thai, Thailand + bo = 0x0051, // Tibetan + bo_cn = 0x0451, // Tibetan, People's Republic of China + ti = 0x0073, // Tigrinya + ti_er = 0x0873, // Tigrinya, Eritrea + ti_et = 0x0473, // Tigrinya, Ethiopia + ts = 0x0031, // Tsonga + ts_za = 0x0431, // Tsonga, South Africa + tr = 0x001F, // Turkish + tr_tr = 0x041F, // Turkish, Turkey + tk = 0x0042, // Turkmen + tk_tm = 0x0442, // Turkmen, Turkmenistan + uk = 0x0022, // Ukrainian + uk_ua = 0x0422, // Ukrainian, Ukraine + hsb = 0x002E, // Upper Sorbian + hsb_de = 0x042E, // Upper Sorbian, Germany + ur = 0x0020, // Urdu + ur_in = 0x0820, // Urdu, India + ur_pk = 0x0420, // Urdu, Islamic Republic of Pakistan + ug = 0x0080, // Uyghur + ug_cn = 0x0480, // Uyghur, People's Republic of China + uz_cyrl = 0x7843, // Uzbek (Cyrillic) + uz_cyrl_uz = 0x0843, // Uzbek (Cyrillic), Uzbekistan + uz = 0x0043, // Uzbek (Latin) + uz_latn = 0x7C43, // Uzbek (Latin) + uz_latn_uz = 0x0443, // Uzbek (Latin), Uzbekistan + ca_es_valencia = 0x0803, // Valencian, Spain + ve = 0x0033, // Venda + ve_za = 0x0433, // Venda, South Africa + vi = 0x002A, // Vietnamese + vi_vn = 0x042A, // Vietnamese, Vietnam + cy = 0x0052, // Welsh + cy_gb = 0x0452, // Welsh, United Kingdom + wo = 0x0088, // Wolof + wo_sn = 0x0488, // Wolof, Senegal + xh = 0x0034, // Xhosa + xh_za = 0x0434, // Xhosa, South Africa + ii = 0x0078, // Yi + ii_cn = 0x0478, // Yi, People's Republic of China + yi_001 = 0x043D, // Yiddish, World + yo = 0x006A, // Yoruba + yo_ng = 0x046A, // Yoruba, Nigeria + zu = 0x0035, // Zulu + zu_za = 0x0435, // Zulu, South Africa + + /// Special case + x_iv_mathan = 0x007F, // LANG_INVARIANT, "math alphanumeric sorting" +}; diff --git a/src/resinator/lex.zig b/src/resinator/lex.zig new file mode 100644 index 000000000000..98bb416a7be9 --- /dev/null +++ b/src/resinator/lex.zig @@ -0,0 +1,1104 @@ +//! Expects to be run after the C preprocessor and after `removeComments`. +//! This means that the lexer assumes that: +//! - Splices ('\' at the end of a line) have been handled/collapsed. +//! - Preprocessor directives and macros have been expanded (any remaining should be skipped with the exception of `#pragma code_page`). +//! - All comments have been removed. + +const std = @import("std"); +const ErrorDetails = @import("errors.zig").ErrorDetails; +const columnsUntilTabStop = @import("literals.zig").columnsUntilTabStop; +const code_pages = @import("code_pages.zig"); +const CodePage = code_pages.CodePage; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit; + +const dumpTokensDuringTests = false; + +pub const default_max_string_literal_codepoints = 4097; + +pub const Token = struct { + id: Id, + start: usize, + end: usize, + line_number: usize, + + pub const Id = enum { + literal, + number, + quoted_ascii_string, + quoted_wide_string, + operator, + begin, + end, + comma, + open_paren, + close_paren, + /// This Id is only used for errors, the Lexer will never return one + /// of these from a `next` call. + preprocessor_command, + invalid, + eof, + + pub fn nameForErrorDisplay(self: Id) []const u8 { + return switch (self) { + .literal => "", + .number => "", + .quoted_ascii_string => "", + .quoted_wide_string => "", + .operator => "", + .begin => "<'{' or BEGIN>", + .end => "<'}' or END>", + .comma => ",", + .open_paren => "(", + .close_paren => ")", + .preprocessor_command => "", + .invalid => unreachable, + .eof => "", + }; + } + }; + + pub fn slice(self: Token, buffer: []const u8) []const u8 { + return buffer[self.start..self.end]; + } + + pub fn nameForErrorDisplay(self: Token, buffer: []const u8) []const u8 { + return switch (self.id) { + .eof => self.id.nameForErrorDisplay(), + else => self.slice(buffer), + }; + } + + pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize { + const line_start = maybe_line_start orelse token.getLineStart(source); + + var i: usize = line_start; + var column: usize = 0; + while (i < token.start) : (i += 1) { + const c = source[i]; + switch (c) { + '\t' => column += columnsUntilTabStop(column, tab_columns), + else => column += 1, + } + } + return column; + } + + // TODO: This doesn't necessarily match up with how we count line numbers, but where a line starts + // has a knock-on effect on calculateColumn. More testing is needed to determine what needs + // to be changed to make this both (1) match how line numbers are counted and (2) match how + // the Win32 RC compiler counts tab columns. + // + // (the TODO in currentIndexFormsLineEndingPair should be taken into account as well) + pub fn getLineStart(token: Token, source: []const u8) usize { + const line_start = line_start: { + if (token.start != 0) { + // start checking at the byte before the token + var index = token.start - 1; + while (true) { + if (source[index] == '\n') break :line_start @min(source.len - 1, index + 1); + if (index != 0) index -= 1 else break; + } + } + break :line_start 0; + }; + return line_start; + } + + pub fn getLine(token: Token, source: []const u8, maybe_line_start: ?usize) []const u8 { + const line_start = maybe_line_start orelse token.getLineStart(source); + + var line_end = line_start + 1; + while (line_end < source.len and source[line_end] != '\n') : (line_end += 1) {} + while (line_end > 0 and source[line_end - 1] == '\r') : (line_end -= 1) {} + + return source[line_start..line_end]; + } + + pub fn isStringLiteral(token: Token) bool { + return token.id == .quoted_ascii_string or token.id == .quoted_wide_string; + } +}; + +pub const LineHandler = struct { + line_number: usize = 1, + buffer: []const u8, + last_line_ending_index: ?usize = null, + + /// Like incrementLineNumber but checks that the current char is a line ending first. + /// Returns the new line number if it was incremented, null otherwise. + pub fn maybeIncrementLineNumber(self: *LineHandler, cur_index: usize) ?usize { + const c = self.buffer[cur_index]; + if (c == '\r' or c == '\n') { + return self.incrementLineNumber(cur_index); + } + return null; + } + + /// Increments line_number appropriately (handling line ending pairs) + /// and returns the new line number if it was incremented, or null otherwise. + pub fn incrementLineNumber(self: *LineHandler, cur_index: usize) ?usize { + if (self.currentIndexFormsLineEndingPair(cur_index)) { + self.last_line_ending_index = null; + return null; + } else { + self.line_number += 1; + self.last_line_ending_index = cur_index; + return self.line_number; + } + } + + /// \r\n and \n\r pairs are treated as a single line ending (but not \r\r \n\n) + /// expects self.index and last_line_ending_index (if non-null) to contain line endings + /// + /// TODO: This is not really how the Win32 RC compiler handles line endings. Instead, it + /// seems to drop all carriage returns during preprocessing and then replace all + /// remaining line endings with well-formed CRLF pairs (e.g. `abc` becomes `abc`). + /// Handling this the same as the Win32 RC compiler would need control over the preprocessor, + /// since Clang converts unpaired into unpaired . + pub fn currentIndexFormsLineEndingPair(self: *const LineHandler, cur_index: usize) bool { + if (self.last_line_ending_index == null) return false; + + // must immediately precede the current index, we know cur_index must + // be >= 1 since last_line_ending_index is non-null (so if the subtraction + // overflows it is a bug at the callsite of this function). + if (self.last_line_ending_index.? != cur_index - 1) return false; + + const cur_line_ending = self.buffer[cur_index]; + const last_line_ending = self.buffer[self.last_line_ending_index.?]; + + // sanity check + std.debug.assert(cur_line_ending == '\r' or cur_line_ending == '\n'); + std.debug.assert(last_line_ending == '\r' or last_line_ending == '\n'); + + // can't be \n\n or \r\r + if (last_line_ending == cur_line_ending) return false; + + return true; + } +}; + +pub const LexError = error{ + UnfinishedStringLiteral, + StringLiteralTooLong, + InvalidNumberWithExponent, + InvalidDigitCharacterInNumberLiteral, + IllegalByte, + IllegalByteOutsideStringLiterals, + IllegalCodepointOutsideStringLiterals, + IllegalByteOrderMark, + IllegalPrivateUseCharacter, + FoundCStyleEscapedQuote, + CodePagePragmaMissingLeftParen, + CodePagePragmaMissingRightParen, + /// Can be caught and ignored + CodePagePragmaInvalidCodePage, + CodePagePragmaNotInteger, + CodePagePragmaOverflow, + CodePagePragmaUnsupportedCodePage, + /// Can be caught and ignored + CodePagePragmaInIncludedFile, +}; + +pub const Lexer = struct { + const Self = @This(); + + buffer: []const u8, + index: usize, + line_handler: LineHandler, + at_start_of_line: bool = true, + error_context_token: ?Token = null, + current_code_page: CodePage, + default_code_page: CodePage, + source_mappings: ?*SourceMappings, + max_string_literal_codepoints: u15, + /// Needed to determine whether or not the output code page should + /// be set in the parser. + seen_pragma_code_pages: u2 = 0, + + pub const Error = LexError; + + pub const LexerOptions = struct { + default_code_page: CodePage = .windows1252, + source_mappings: ?*SourceMappings = null, + max_string_literal_codepoints: u15 = default_max_string_literal_codepoints, + }; + + pub fn init(buffer: []const u8, options: LexerOptions) Self { + return Self{ + .buffer = buffer, + .index = 0, + .current_code_page = options.default_code_page, + .default_code_page = options.default_code_page, + .source_mappings = options.source_mappings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + .line_handler = .{ .buffer = buffer }, + }; + } + + pub fn dump(self: *Self, token: *const Token) void { + std.debug.print("{s}:{d}: {s}\n", .{ @tagName(token.id), token.line_number, std.fmt.fmtSliceEscapeLower(token.slice(self.buffer)) }); + } + + pub const LexMethod = enum { + whitespace_delimiter_only, + normal, + normal_expect_operator, + }; + + pub fn next(self: *Self, comptime method: LexMethod) LexError!Token { + switch (method) { + .whitespace_delimiter_only => return self.nextWhitespaceDelimeterOnly(), + .normal => return self.nextNormal(), + .normal_expect_operator => return self.nextNormalWithContext(.expect_operator), + } + } + + const StateWhitespaceDelimiterOnly = enum { + start, + literal, + preprocessor, + semicolon, + }; + + pub fn nextWhitespaceDelimeterOnly(self: *Self) LexError!Token { + const start_index = self.index; + var result = Token{ + .id = .eof, + .start = start_index, + .end = undefined, + .line_number = self.line_handler.line_number, + }; + var state = StateWhitespaceDelimiterOnly.start; + + while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) { + const c = codepoint.value; + try self.checkForIllegalCodepoint(codepoint, false); + switch (state) { + .start => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + result.line_number = self.incrementLineNumber(); + }, + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.start = self.index + 1; + }, + // NBSP only counts as whitespace at the start of a line (but + // can be intermixed with other whitespace). Who knows why. + '\xA0' => if (self.at_start_of_line) { + result.start = self.index + codepoint.byte_len; + } else { + state = .literal; + self.at_start_of_line = false; + }, + '#' => { + if (self.at_start_of_line) { + state = .preprocessor; + } else { + state = .literal; + } + self.at_start_of_line = false; + }, + // Semi-colon acts as a line-terminator, but in this lexing mode + // that's only true if it's at the start of a line. + ';' => { + if (self.at_start_of_line) { + state = .semicolon; + } + self.at_start_of_line = false; + }, + else => { + state = .literal; + self.at_start_of_line = false; + }, + }, + .literal => switch (c) { + '\r', '\n', ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.id = .literal; + break; + }, + else => {}, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + .semicolon => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + } + } else { // got EOF + switch (state) { + .start, .semicolon => {}, + .literal => { + result.id = .literal; + }, + .preprocessor => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index; + }, + } + } + + result.end = self.index; + return result; + } + + const StateNormal = enum { + start, + literal_or_quoted_wide_string, + quoted_ascii_string, + quoted_wide_string, + quoted_ascii_string_escape, + quoted_wide_string_escape, + quoted_ascii_string_maybe_end, + quoted_wide_string_maybe_end, + literal, + number_literal, + preprocessor, + semicolon, + // end + e, + en, + // begin + b, + be, + beg, + begi, + }; + + /// TODO: A not-terrible name + pub fn nextNormal(self: *Self) LexError!Token { + return self.nextNormalWithContext(.any); + } + + pub fn nextNormalWithContext(self: *Self, context: enum { expect_operator, any }) LexError!Token { + const start_index = self.index; + var result = Token{ + .id = .eof, + .start = start_index, + .end = undefined, + .line_number = self.line_handler.line_number, + }; + var state = StateNormal.start; + + // Note: The Windows RC compiler uses a non-standard method of computing + // length for its 'string literal too long' errors; it isn't easily + // explained or intuitive (it's sort-of pre-parsed byte length but with + // a few of exceptions/edge cases). + // + // It also behaves strangely with non-ASCII codepoints, e.g. even though the default + // limit is 4097, you can only have 4094 € codepoints (1 UTF-16 code unit each), + // and 2048 𐐷 codepoints (2 UTF-16 code units each). + // + // TODO: Understand this more, bring it more in line with how the Win32 limits work. + // Alternatively, do something that makes more sense but may be more permissive. + var string_literal_length: usize = 0; + var string_literal_collapsing_whitespace: bool = false; + var still_could_have_exponent: bool = true; + var exponent_index: ?usize = null; + while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) { + const c = codepoint.value; + const in_string_literal = switch (state) { + .quoted_ascii_string, + .quoted_wide_string, + .quoted_ascii_string_escape, + .quoted_wide_string_escape, + .quoted_ascii_string_maybe_end, + .quoted_wide_string_maybe_end, + => + // If the current line is not the same line as the start of the string literal, + // then we want to treat the current codepoint as 'not in a string literal' + // for the purposes of detecting illegal codepoints. This means that we will + // error on illegal-outside-string-literal characters that are outside string + // literals from the perspective of a C preprocessor, but that may be + // inside string literals from the perspective of the RC lexer. For example, + // "hello + // @" + // will be treated as a single string literal by the RC lexer but the Win32 + // preprocessor will consider this an unclosed string literal followed by + // the character @ and ", and will therefore error since the Win32 RC preprocessor + // errors on the @ character outside string literals. + // + // By doing this here, we can effectively emulate the Win32 RC preprocessor behavior + // at lex-time, and avoid the need for a separate step that checks for this edge-case + // specifically. + result.line_number == self.line_handler.line_number, + else => false, + }; + try self.checkForIllegalCodepoint(codepoint, in_string_literal); + switch (state) { + .start => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + result.line_number = self.incrementLineNumber(); + }, + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.start = self.index + 1; + }, + // NBSP only counts as whitespace at the start of a line (but + // can be intermixed with other whitespace). Who knows why. + '\xA0' => if (self.at_start_of_line) { + result.start = self.index + codepoint.byte_len; + } else { + state = .literal; + self.at_start_of_line = false; + }, + 'L', 'l' => { + state = .literal_or_quoted_wide_string; + self.at_start_of_line = false; + }, + 'E', 'e' => { + state = .e; + self.at_start_of_line = false; + }, + 'B', 'b' => { + state = .b; + self.at_start_of_line = false; + }, + '"' => { + state = .quoted_ascii_string; + self.at_start_of_line = false; + string_literal_collapsing_whitespace = false; + string_literal_length = 0; + }, + '+', '&', '|' => { + self.index += 1; + result.id = .operator; + self.at_start_of_line = false; + break; + }, + '-' => { + if (context == .expect_operator) { + self.index += 1; + result.id = .operator; + self.at_start_of_line = false; + break; + } else { + state = .number_literal; + still_could_have_exponent = true; + exponent_index = null; + self.at_start_of_line = false; + } + }, + '0'...'9', '~' => { + state = .number_literal; + still_could_have_exponent = true; + exponent_index = null; + self.at_start_of_line = false; + }, + '#' => { + if (self.at_start_of_line) { + state = .preprocessor; + } else { + state = .literal; + } + self.at_start_of_line = false; + }, + ';' => { + state = .semicolon; + self.at_start_of_line = false; + }, + '{', '}' => { + self.index += 1; + result.id = if (c == '{') .begin else .end; + self.at_start_of_line = false; + break; + }, + '(', ')' => { + self.index += 1; + result.id = if (c == '(') .open_paren else .close_paren; + self.at_start_of_line = false; + break; + }, + ',' => { + self.index += 1; + result.id = .comma; + self.at_start_of_line = false; + break; + }, + else => { + if (isNonAsciiDigit(c)) { + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidDigitCharacterInNumberLiteral; + } + state = .literal; + self.at_start_of_line = false; + }, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + // Semi-colon acts as a line-terminator--everything is skipped until + // the next line. + .semicolon => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + .number_literal => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + => { + // zig fmt: on + result.id = .number; + break; + }, + '0'...'9' => { + if (exponent_index) |exp_i| { + if (self.index - 1 == exp_i) { + // Note: This being an error is a quirk of the preprocessor used by + // the Win32 RC compiler. + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidNumberWithExponent; + } + } + }, + 'e', 'E' => { + if (still_could_have_exponent) { + exponent_index = self.index; + still_could_have_exponent = false; + } + }, + else => { + if (isNonAsciiDigit(c)) { + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidDigitCharacterInNumberLiteral; + } + still_could_have_exponent = false; + }, + }, + .literal_or_quoted_wide_string => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + // zig fmt: on + => { + result.id = .literal; + break; + }, + '"' => { + state = .quoted_wide_string; + string_literal_collapsing_whitespace = false; + string_literal_length = 0; + }, + else => { + state = .literal; + }, + }, + .literal => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + => { + // zig fmt: on + result.id = .literal; + break; + }, + else => {}, + }, + .e => switch (c) { + 'N', 'n' => { + state = .en; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .en => switch (c) { + 'D', 'd' => { + result.id = .end; + self.index += 1; + break; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .b => switch (c) { + 'E', 'e' => { + state = .be; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .be => switch (c) { + 'G', 'g' => { + state = .beg; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .beg => switch (c) { + 'I', 'i' => { + state = .begi; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .begi => switch (c) { + 'N', 'n' => { + result.id = .begin; + self.index += 1; + break; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .quoted_ascii_string, .quoted_wide_string => switch (c) { + '"' => { + state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end; + }, + '\\' => { + state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape; + }, + '\r' => { + // \r doesn't count towards string literal length + + // Increment line number but don't affect the result token's line number + _ = self.incrementLineNumber(); + }, + '\n' => { + // first \n expands to <\n> + if (!string_literal_collapsing_whitespace) { + string_literal_length += 2; + string_literal_collapsing_whitespace = true; + } + // the rest are collapsed into the <\n> + + // Increment line number but don't affect the result token's line number + _ = self.incrementLineNumber(); + }, + // only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing + '\t', ' ', '\x0b', '\x0c' => { + if (!string_literal_collapsing_whitespace) { + if (c == '\t') { + // Literal tab characters are counted as the number of space characters + // needed to reach the next 8-column tab stop. + // + // This implemention is ineffecient but hopefully it's enough of an + // edge case that it doesn't matter too much. Literal tab characters in + // string literals being replaced by a variable number of spaces depending + // on which column the tab character is located in the source .rc file seems + // like it has extremely limited use-cases, so it seems unlikely that it's used + // in real .rc files. + var dummy_token = Token{ + .start = self.index, + .end = self.index, + .line_number = self.line_handler.line_number, + .id = .invalid, + }; + dummy_token.start = self.index; + const current_column = dummy_token.calculateColumn(self.buffer, 8, null); + string_literal_length += columnsUntilTabStop(current_column, 8); + } else { + string_literal_length += 1; + } + } + }, + else => { + string_literal_collapsing_whitespace = false; + string_literal_length += 1; + }, + }, + .quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) { + '"' => { + self.error_context_token = .{ + .id = .invalid, + .start = self.index - 1, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.FoundCStyleEscapedQuote; + }, + else => { + state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string; + }, + }, + .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) { + '"' => { + state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + // Escaped quotes only count as 1 char for string literal length checks, + // so we don't increment string_literal_length here. + }, + else => { + result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + break; + }, + }, + } + } else { // got EOF + switch (state) { + .start, .semicolon => {}, + .literal_or_quoted_wide_string, .literal, .e, .en, .b, .be, .beg, .begi => { + result.id = .literal; + }, + .preprocessor => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index; + }, + .number_literal => { + result.id = .number; + }, + .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => { + result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + }, + .quoted_ascii_string, + .quoted_wide_string, + .quoted_ascii_string_escape, + .quoted_wide_string_escape, + => { + self.error_context_token = .{ + .id = .eof, + .start = self.index, + .end = self.index, + .line_number = self.line_handler.line_number, + }; + return LexError.UnfinishedStringLiteral; + }, + } + } + + if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) { + if (string_literal_length > self.max_string_literal_codepoints) { + self.error_context_token = result; + return LexError.StringLiteralTooLong; + } + } + + result.end = self.index; + return result; + } + + /// Increments line_number appropriately (handling line ending pairs) + /// and returns the new line number. + fn incrementLineNumber(self: *Self) usize { + _ = self.line_handler.incrementLineNumber(self.index); + self.at_start_of_line = true; + return self.line_handler.line_number; + } + + fn checkForIllegalCodepoint(self: *Self, codepoint: code_pages.Codepoint, in_string_literal: bool) LexError!void { + const err = switch (codepoint.value) { + // 0x00 = NUL + // 0x1A = Substitute (treated as EOF) + // NOTE: 0x1A gets treated as EOF by the clang preprocessor so after a .rc file + // is run through the clang preprocessor it will no longer have 0x1A characters in it. + // 0x7F = DEL (treated as a context-specific terminator by the Windows RC compiler) + 0x00, 0x1A, 0x7F => error.IllegalByte, + // 0x01...0x03 result in strange 'macro definition too big' errors when used outside of string literals + // 0x04 is valid but behaves strangely (sort of acts as a 'skip the next character' instruction) + 0x01...0x04 => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return, + // @ and ` both result in error RC2018: unknown character '0x60' (and subsequently + // fatal error RC1116: RC terminating after preprocessor errors) if they are ever used + // outside of string literals. Not exactly sure why this would be the case, though. + // TODO: Make sure there aren't any exceptions + '@', '`' => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return, + // The Byte Order Mark is mostly skipped over by the Windows RC compiler, but + // there are edge cases where it leads to cryptic 'compiler limit : macro definition too big' + // errors (e.g. a BOM within a number literal). By making this illegal we avoid having to + // deal with a lot of edge cases and remove the potential footgun of the bytes of a BOM + // being 'missing' when included in a string literal (the Windows RC compiler acts as + // if the codepoint was never part of the string literal). + '\u{FEFF}' => error.IllegalByteOrderMark, + // Similar deal with this private use codepoint, it gets skipped/ignored by the + // RC compiler (but without the cryptic errors). Silently dropping bytes still seems like + // enough of a footgun with no real use-cases that it's still worth erroring instead of + // emulating the RC compiler's behavior, though. + '\u{E000}' => error.IllegalPrivateUseCharacter, + // These codepoints lead to strange errors when used outside of string literals, + // and miscompilations when used within string literals. We avoid the miscompilation + // within string literals and emit a warning, but outside of string literals it makes + // more sense to just disallow these codepoints. + 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => if (!in_string_literal) error.IllegalCodepointOutsideStringLiterals else return, + else => return, + }; + self.error_context_token = .{ + .id = .invalid, + .start = self.index, + .end = self.index + codepoint.byte_len, + .line_number = self.line_handler.line_number, + }; + return err; + } + + fn evaluatePreprocessorCommand(self: *Self, start: usize, end: usize) !void { + const token = Token{ + .id = .preprocessor_command, + .start = start, + .end = end, + .line_number = self.line_handler.line_number, + }; + const full_command = self.buffer[start..end]; + var command = full_command; + + // Anything besides exactly this is ignored by the Windows RC implementation + const expected_directive = "#pragma"; + if (!std.mem.startsWith(u8, command, expected_directive)) return; + command = command[expected_directive.len..]; + + if (command.len == 0 or !std.ascii.isWhitespace(command[0])) return; + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + // Note: CoDe_PaGeZ is also treated as "code_page" by the Windows RC implementation, + // and it will error with 'Missing left parenthesis in code_page #pragma' + const expected_extension = "code_page"; + if (!std.ascii.startsWithIgnoreCase(command, expected_extension)) return; + command = command[expected_extension.len..]; + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + if (command.len == 0 or command[0] != '(') { + self.error_context_token = token; + return error.CodePagePragmaMissingLeftParen; + } + command = command[1..]; + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + var num_str: []u8 = command[0..0]; + while (command.len > 0 and (command[0] != ')' and !std.ascii.isWhitespace(command[0]))) { + command = command[1..]; + num_str.len += 1; + } + + if (num_str.len == 0) { + self.error_context_token = token; + return error.CodePagePragmaNotInteger; + } + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + if (command.len == 0 or command[0] != ')') { + self.error_context_token = token; + return error.CodePagePragmaMissingRightParen; + } + + const code_page = code_page: { + if (std.ascii.eqlIgnoreCase("DEFAULT", num_str)) { + break :code_page self.default_code_page; + } + + // The Win32 compiler behaves fairly strangely around maxInt(u32): + // - If the overflowed u32 wraps and becomes a known code page ID, then + // it will error/warn with "Codepage not valid: ignored" (depending on /w) + // - If the overflowed u32 wraps and does not become a known code page ID, + // then it will error with 'constant too big' and 'Codepage not integer' + // + // Instead of that, we just have a separate error specifically for overflow. + const num = parseCodePageNum(num_str) catch |err| switch (err) { + error.InvalidCharacter => { + self.error_context_token = token; + return error.CodePagePragmaNotInteger; + }, + error.Overflow => { + self.error_context_token = token; + return error.CodePagePragmaOverflow; + }, + }; + + // Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252 + if (num_str[0] == '0' and num != 0) { + self.error_context_token = token; + return error.CodePagePragmaInvalidCodePage; + } + // Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation. + else if (num == 0) { + self.error_context_token = token; + return error.CodePagePragmaNotInteger; + } + // Anything above u16 max is not going to be found since our CodePage enum is backed by a u16. + if (num > std.math.maxInt(u16)) { + self.error_context_token = token; + return error.CodePagePragmaInvalidCodePage; + } + + break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) { + error.InvalidCodePage => { + self.error_context_token = token; + return error.CodePagePragmaInvalidCodePage; + }, + error.UnsupportedCodePage => { + self.error_context_token = token; + return error.CodePagePragmaUnsupportedCodePage; + }, + }; + }; + + // https://learn.microsoft.com/en-us/windows/win32/menurc/pragma-directives + // > This pragma is not supported in an included resource file (.rc) + // + // Even though the Win32 behavior is to just ignore such directives silently, + // this is an error in the lexer to allow for emitting warnings/errors when + // such directives are found if that's wanted. The intention is for the lexer + // to still be able to work correctly after this error is returned. + if (self.source_mappings) |source_mappings| { + if (!source_mappings.isRootFile(token.line_number)) { + self.error_context_token = token; + return error.CodePagePragmaInIncludedFile; + } + } + + self.seen_pragma_code_pages +|= 1; + self.current_code_page = code_page; + } + + fn parseCodePageNum(str: []const u8) !u32 { + var x: u32 = 0; + for (str) |c| { + const digit = try std.fmt.charToDigit(c, 10); + if (x != 0) x = try std.math.mul(u32, x, 10); + x = try std.math.add(u32, x, digit); + } + return x; + } + + pub fn getErrorDetails(self: Self, lex_err: LexError) ErrorDetails { + const err = switch (lex_err) { + error.UnfinishedStringLiteral => ErrorDetails.Error.unfinished_string_literal, + error.StringLiteralTooLong => return .{ + .err = .string_literal_too_long, + .token = self.error_context_token.?, + .extra = .{ .number = self.max_string_literal_codepoints }, + }, + error.InvalidNumberWithExponent => ErrorDetails.Error.invalid_number_with_exponent, + error.InvalidDigitCharacterInNumberLiteral => ErrorDetails.Error.invalid_digit_character_in_number_literal, + error.IllegalByte => ErrorDetails.Error.illegal_byte, + error.IllegalByteOutsideStringLiterals => ErrorDetails.Error.illegal_byte_outside_string_literals, + error.IllegalCodepointOutsideStringLiterals => ErrorDetails.Error.illegal_codepoint_outside_string_literals, + error.IllegalByteOrderMark => ErrorDetails.Error.illegal_byte_order_mark, + error.IllegalPrivateUseCharacter => ErrorDetails.Error.illegal_private_use_character, + error.FoundCStyleEscapedQuote => ErrorDetails.Error.found_c_style_escaped_quote, + error.CodePagePragmaMissingLeftParen => ErrorDetails.Error.code_page_pragma_missing_left_paren, + error.CodePagePragmaMissingRightParen => ErrorDetails.Error.code_page_pragma_missing_right_paren, + error.CodePagePragmaInvalidCodePage => ErrorDetails.Error.code_page_pragma_invalid_code_page, + error.CodePagePragmaNotInteger => ErrorDetails.Error.code_page_pragma_not_integer, + error.CodePagePragmaOverflow => ErrorDetails.Error.code_page_pragma_overflow, + error.CodePagePragmaUnsupportedCodePage => ErrorDetails.Error.code_page_pragma_unsupported_code_page, + error.CodePagePragmaInIncludedFile => ErrorDetails.Error.code_page_pragma_in_included_file, + }; + return .{ + .err = err, + .token = self.error_context_token.?, + }; + } +}; + +fn testLexNormal(source: []const u8, expected_tokens: []const Token.Id) !void { + var lexer = Lexer.init(source, .{}); + if (dumpTokensDuringTests) std.debug.print("\n----------------------\n{s}\n----------------------\n", .{lexer.buffer}); + for (expected_tokens) |expected_token_id| { + const token = try lexer.nextNormal(); + if (dumpTokensDuringTests) lexer.dump(&token); + try std.testing.expectEqual(expected_token_id, token.id); + } + const last_token = try lexer.nextNormal(); + try std.testing.expectEqual(Token.Id.eof, last_token.id); +} + +fn expectLexError(expected: LexError, actual: anytype) !void { + try std.testing.expectError(expected, actual); + if (dumpTokensDuringTests) std.debug.print("{!}\n", .{actual}); +} + +test "normal: numbers" { + try testLexNormal("1", &.{.number}); + try testLexNormal("-1", &.{.number}); + try testLexNormal("- 1", &.{ .number, .number }); + try testLexNormal("-a", &.{.number}); +} + +test "normal: string literals" { + try testLexNormal("\"\"", &.{.quoted_ascii_string}); + // "" is an escaped " + try testLexNormal("\" \"\" \"", &.{.quoted_ascii_string}); +} + +test "superscript chars and code pages" { + const firstToken = struct { + pub fn firstToken(source: []const u8, default_code_page: CodePage, comptime lex_method: Lexer.LexMethod) LexError!Token { + var lexer = Lexer.init(source, .{ .default_code_page = default_code_page }); + return lexer.next(lex_method); + } + }.firstToken; + const utf8_source = "²"; + const windows1252_source = "\xB2"; + + const windows1252_encoded_as_windows1252 = firstToken(windows1252_source, .windows1252, .normal); + try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, windows1252_encoded_as_windows1252); + + const utf8_encoded_as_windows1252 = try firstToken(utf8_source, .windows1252, .normal); + try std.testing.expectEqual(Token{ + .id = .literal, + .start = 0, + .end = 2, + .line_number = 1, + }, utf8_encoded_as_windows1252); + + const utf8_encoded_as_utf8 = firstToken(utf8_source, .utf8, .normal); + try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, utf8_encoded_as_utf8); + + const windows1252_encoded_as_utf8 = try firstToken(windows1252_source, .utf8, .normal); + try std.testing.expectEqual(Token{ + .id = .literal, + .start = 0, + .end = 1, + .line_number = 1, + }, windows1252_encoded_as_utf8); +} diff --git a/src/resinator/literals.zig b/src/resinator/literals.zig new file mode 100644 index 000000000000..1d5258455bb2 --- /dev/null +++ b/src/resinator/literals.zig @@ -0,0 +1,904 @@ +const std = @import("std"); +const code_pages = @import("code_pages.zig"); +const CodePage = code_pages.CodePage; +const windows1252 = @import("windows1252.zig"); +const ErrorDetails = @import("errors.zig").ErrorDetails; +const DiagnosticsContext = @import("errors.zig").DiagnosticsContext; +const Token = @import("lex.zig").Token; + +/// rc is maximally liberal in terms of what it accepts as a number literal +/// for data values. As long as it starts with a number or - or ~, that's good enough. +pub fn isValidNumberDataLiteral(str: []const u8) bool { + if (str.len == 0) return false; + switch (str[0]) { + '~', '-', '0'...'9' => return true, + else => return false, + } +} + +pub const SourceBytes = struct { + slice: []const u8, + code_page: CodePage, +}; + +pub const StringType = enum { ascii, wide }; + +/// Valid escapes: +/// "" -> " +/// \a, \A => 0x08 (not 0x07 like in C) +/// \n => 0x0A +/// \r => 0x0D +/// \t, \T => 0x09 +/// \\ => \ +/// \nnn => byte with numeric value given by nnn interpreted as octal +/// (wraps on overflow, number of digits can be 1-3 for ASCII strings +/// and 1-7 for wide strings) +/// \xhh => byte with numeric value given by hh interpreted as hex +/// (number of digits can be 0-2 for ASCII strings and 0-4 for +/// wide strings) +/// \<\r+> => \ +/// \<[\r\n\t ]+> => +/// +/// Special cases: +/// <\t> => 1-8 spaces, dependent on columns in the source rc file itself +/// <\r> => +/// <\n+><\w+?\n?> => <\n> +/// +/// Special, especially weird case: +/// \"" => " +/// NOTE: This leads to footguns because the preprocessor can start parsing things +/// out-of-sync with the RC compiler, expanding macros within string literals, etc. +/// This parse function handles this case the same as the Windows RC compiler, but +/// \" within a string literal is treated as an error by the lexer, so the relevant +/// branches should never actually be hit during this function. +pub const IterativeStringParser = struct { + source: []const u8, + code_page: CodePage, + /// The type of the string inferred by the prefix (L"" or "") + /// This is what matters for things like the maximum digits in an + /// escape sequence, whether or not invalid escape sequences are skipped, etc. + declared_string_type: StringType, + pending_codepoint: ?u21 = null, + num_pending_spaces: u8 = 0, + index: usize = 0, + column: usize = 0, + diagnostics: ?DiagnosticsContext = null, + seen_tab: bool = false, + + const State = enum { + normal, + quote, + newline, + escaped, + escaped_cr, + escaped_newlines, + escaped_octal, + escaped_hex, + }; + + pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser { + const declared_string_type: StringType = switch (bytes.slice[0]) { + 'L', 'l' => .wide, + else => .ascii, + }; + var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove "" + var column = options.start_column + 1; // for the removed " + if (declared_string_type == .wide) { + source = source[1..]; // remove L + column += 1; // for the removed L + } + return .{ + .source = source, + .code_page = bytes.code_page, + .declared_string_type = declared_string_type, + .column = column, + .diagnostics = options.diagnostics, + }; + } + + pub const ParsedCodepoint = struct { + codepoint: u21, + from_escaped_integer: bool = false, + }; + + pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint { + const result = try self.nextUnchecked(); + if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) { + switch (result.?.codepoint) { + 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => { + const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00) + .rc_would_miscompile_codepoint_skip + else + .rc_would_miscompile_codepoint_byte_swap; + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = err, + .type = .warning, + .token = self.diagnostics.?.token, + .extra = .{ .number = result.?.codepoint }, + }); + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = err, + .type = .note, + .token = self.diagnostics.?.token, + .print_source_line = false, + .extra = .{ .number = result.?.codepoint }, + }); + }, + else => {}, + } + } + return result; + } + + pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint { + if (self.num_pending_spaces > 0) { + // Ensure that we don't get into this predicament so we can ensure that + // the order of processing any pending stuff doesn't matter + std.debug.assert(self.pending_codepoint == null); + self.num_pending_spaces -= 1; + return .{ .codepoint = ' ' }; + } + if (self.pending_codepoint) |pending_codepoint| { + self.pending_codepoint = null; + return .{ .codepoint = pending_codepoint }; + } + if (self.index >= self.source.len) return null; + + var state: State = .normal; + var string_escape_n: u16 = 0; + var string_escape_i: u8 = 0; + const max_octal_escape_digits: u8 = switch (self.declared_string_type) { + .ascii => 3, + .wide => 7, + }; + const max_hex_escape_digits: u8 = switch (self.declared_string_type) { + .ascii => 2, + .wide => 4, + }; + + while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : (self.index += codepoint.byte_len) { + const c = codepoint.value; + var backtrack = false; + defer { + if (backtrack) { + self.index -= codepoint.byte_len; + } else { + if (c == '\t') { + self.column += columnsUntilTabStop(self.column, 8); + } else { + self.column += codepoint.byte_len; + } + } + } + switch (state) { + .normal => switch (c) { + '\\' => state = .escaped, + '"' => state = .quote, + '\r' => {}, + '\n' => state = .newline, + '\t' => { + // Only warn about a tab getting converted to spaces once per string + if (self.diagnostics != null and !self.seen_tab) { + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = .tab_converted_to_spaces, + .type = .warning, + .token = self.diagnostics.?.token, + }); + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = .tab_converted_to_spaces, + .type = .note, + .token = self.diagnostics.?.token, + .print_source_line = false, + }); + self.seen_tab = true; + } + const cols = columnsUntilTabStop(self.column, 8); + self.num_pending_spaces = @intCast(cols - 1); + self.index += codepoint.byte_len; + return .{ .codepoint = ' ' }; + }, + else => { + self.index += codepoint.byte_len; + return .{ .codepoint = c }; + }, + }, + .quote => switch (c) { + '"' => { + // "" => " + self.index += codepoint.byte_len; + return .{ .codepoint = '"' }; + }, + else => unreachable, // this is a bug in the lexer + }, + .newline => switch (c) { + '\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {}, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + // + self.index += codepoint.byte_len; + self.pending_codepoint = '\n'; + return .{ .codepoint = ' ' }; + }, + }, + .escaped => switch (c) { + '\r' => state = .escaped_cr, + '\n' => state = .escaped_newlines, + '0'...'7' => { + string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable; + string_escape_i = 1; + state = .escaped_octal; + }, + 'x', 'X' => { + string_escape_n = 0; + string_escape_i = 0; + state = .escaped_hex; + }, + else => { + switch (c) { + 'a', 'A' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\x08' }; + }, // might be a bug in RC, but matches its behavior + 'n' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\n' }; + }, + 'r' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\r' }; + }, + 't', 'T' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\t' }; + }, + '\\' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\\' }; + }, + '"' => { + // \" is a special case that doesn't get the \ included, + backtrack = true; + }, + else => switch (self.declared_string_type) { + .wide => {}, // invalid escape sequences are skipped in wide strings + .ascii => { + // backtrack so that we handle the current char properly + backtrack = true; + self.index += codepoint.byte_len; + return .{ .codepoint = '\\' }; + }, + }, + } + state = .normal; + }, + }, + .escaped_cr => switch (c) { + '\r' => {}, + '\n' => state = .escaped_newlines, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + self.index += codepoint.byte_len; + return .{ .codepoint = '\\' }; + }, + }, + .escaped_newlines => switch (c) { + '\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {}, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + state = .normal; + }, + }, + .escaped_octal => switch (c) { + '0'...'7' => { + string_escape_n *%= 8; + string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable; + string_escape_i += 1; + if (string_escape_i == max_octal_escape_digits) { + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + self.index += codepoint.byte_len; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + } + }, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + // write out whatever byte we have parsed so far + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + self.index += codepoint.byte_len; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + }, + }, + .escaped_hex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + string_escape_n *= 16; + string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable; + string_escape_i += 1; + if (string_escape_i == max_hex_escape_digits) { + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + self.index += codepoint.byte_len; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + } + }, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + // write out whatever byte we have parsed so far + // (even with 0 actual digits, \x alone parses to 0) + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + self.index += codepoint.byte_len; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + }, + }, + } + } + + switch (state) { + .normal, .escaped_newlines => {}, + .newline => { + // + self.pending_codepoint = '\n'; + return .{ .codepoint = ' ' }; + }, + .escaped, .escaped_cr => return .{ .codepoint = '\\' }, + .escaped_octal, .escaped_hex => { + const escaped_value = switch (self.declared_string_type) { + .ascii => @as(u8, @truncate(string_escape_n)), + .wide => string_escape_n, + }; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + }, + .quote => unreachable, // this is a bug in the lexer + } + + return null; + } +}; + +pub const StringParseOptions = struct { + start_column: usize = 0, + diagnostics: ?DiagnosticsContext = null, + output_code_page: CodePage = .windows1252, +}; + +pub fn parseQuotedString( + comptime literal_type: StringType, + allocator: std.mem.Allocator, + bytes: SourceBytes, + options: StringParseOptions, +) !(switch (literal_type) { + .ascii => []u8, + .wide => [:0]u16, +}) { + const T = if (literal_type == .ascii) u8 else u16; + std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars + + var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len); + errdefer buf.deinit(); + + var iterative_parser = IterativeStringParser.init(bytes, options); + + while (try iterative_parser.next()) |parsed| { + const c = parsed.codepoint; + if (parsed.from_escaped_integer) { + try buf.append(@intCast(c)); + } else { + switch (literal_type) { + .ascii => switch (options.output_code_page) { + .windows1252 => { + if (windows1252.bestFitFromCodepoint(c)) |best_fit| { + try buf.append(best_fit); + } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) { + try buf.append('?'); + } else { + try buf.appendSlice("??"); + } + }, + .utf8 => { + var codepoint_to_encode = c; + if (c == code_pages.Codepoint.invalid) { + codepoint_to_encode = '�'; + } + var utf8_buf: [4]u8 = undefined; + const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable; + try buf.appendSlice(utf8_buf[0..utf8_len]); + }, + else => unreachable, // Unsupported code page + }, + .wide => { + if (c == code_pages.Codepoint.invalid) { + try buf.append(std.mem.nativeToLittle(u16, '�')); + } else if (c < 0x10000) { + const short: u16 = @intCast(c); + try buf.append(std.mem.nativeToLittle(u16, short)); + } else { + const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800; + try buf.append(std.mem.nativeToLittle(u16, high)); + const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00; + try buf.append(std.mem.nativeToLittle(u16, low)); + } + }, + } + } + } + + if (literal_type == .wide) { + return buf.toOwnedSliceSentinel(0); + } else { + return buf.toOwnedSlice(); + } +} + +pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 { + std.debug.assert(bytes.slice.len >= 2); // "" + return parseQuotedString(.ascii, allocator, bytes, options); +} + +pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 { + std.debug.assert(bytes.slice.len >= 3); // L"" + return parseQuotedString(.wide, allocator, bytes, options); +} + +pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 { + std.debug.assert(bytes.slice.len >= 2); // "" + return parseQuotedString(.wide, allocator, bytes, options); +} + +pub fn parseQuotedStringAsAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 { + std.debug.assert(bytes.slice.len >= 2); // "" + return parseQuotedString(.ascii, allocator, bytes, options); +} + +test "parse quoted ascii string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{ + .slice = + \\"hello" + , + .code_page = .windows1252, + }, .{})); + // hex with 0 digits + try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\x" + , + .code_page = .windows1252, + }, .{})); + // hex max of 2 digits + try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\XfFf" + , + .code_page = .windows1252, + }, .{})); + // octal with invalid octal digit + try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\19" + , + .code_page = .windows1252, + }, .{})); + // escaped quotes + try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{ + .slice = + \\" "" " + , + .code_page = .windows1252, + }, .{})); + // backslash right before escaped quotes + try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\""" + , + .code_page = .windows1252, + }, .{})); + // octal overflow + try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\401" + , + .code_page = .windows1252, + }, .{})); + // escapes + try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\a\n\r\t\\" + , + .code_page = .windows1252, + }, .{})); + // uppercase escapes + try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\A\N\R\T\\" + , + .code_page = .windows1252, + }, .{})); + // backslash on its own + try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\" + , + .code_page = .windows1252, + }, .{})); + // unrecognized escapes + try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\b" + , + .code_page = .windows1252, + }, .{})); + // escaped carriage returns + try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 }, + .{}, + )); + // escaped newlines + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 }, + .{}, + )); + // escaped CRLF pairs + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 }, + .{}, + )); + // escaped newlines with other whitespace + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\n \t\r\n \r\t\n \t\"", .code_page = .windows1252 }, + .{}, + )); + // literal tab characters get converted to spaces (dependent on source file columns) + try std.testing.expectEqualSlices(u8, " ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "abc ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"abc\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "abcdefg ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "\\ ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\t\"", .code_page = .windows1252 }, + .{}, + )); + // literal CR's get dropped + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 }, + .{}, + )); + // contiguous newlines and whitespace get collapsed to + try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\n\r\r \r\n \t \"", .code_page = .windows1252 }, + .{}, + )); +} + +test "parse quoted ascii string with utf8 code page" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that don't have a Windows-1252 representation get converted to ? + try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString( + arena, + .{ .slice = "\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that have a best fit mapping get converted accordingly, + // these are box drawing codepoints + try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString( + arena, + .{ .slice = "\"┌─┐\"", .code_page = .utf8 }, + .{}, + )); + // Invalid UTF-8 gets converted to ? depending on well-formedness + try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that would require a UTF-16 surrogate pair get converted to ?? + try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 }, + .{}, + )); + + // Output code page changes how invalid UTF-8 gets converted, since it + // now encodes the result as UTF-8 so it can write replacement characters. + try std.testing.expectEqualSlices(u8, "����", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{ .output_code_page = .utf8 }, + )); + try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 }, + .{ .output_code_page = .utf8 }, + )); +} + +test "parse quoted wide string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 'h', 'e', 'l', 'l', 'o' }, try parseQuotedWideString(arena, .{ + .slice = + \\L"hello" + , + .code_page = .windows1252, + }, .{})); + // hex with 0 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{ + .slice = + \\L"\x" + , + .code_page = .windows1252, + }, .{})); + // hex max of 4 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 0xFFFF, 'f' }, try parseQuotedWideString(arena, .{ + .slice = + \\L"\XfFfFf" + , + .code_page = .windows1252, + }, .{})); + // octal max of 7 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 0x9493, '3', '3' }, try parseQuotedWideString(arena, .{ + .slice = + \\L"\111222333" + , + .code_page = .windows1252, + }, .{})); + // octal overflow + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0xFF01}, try parseQuotedWideString(arena, .{ + .slice = + \\L"\777401" + , + .code_page = .windows1252, + }, .{})); + // literal tab characters get converted to spaces (dependent on source file columns) + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg "), try parseQuotedWideString( + arena, + .{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 }, + .{}, + )); + // Windows-1252 conversion + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString( + arena, + .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 }, + .{}, + )); + // Invalid escape sequences are skipped + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString( + arena, + .{ .slice = "L\"\\H\"", .code_page = .windows1252 }, + .{}, + )); +} + +test "parse quoted wide string with utf8 code page" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString( + arena, + .{ .slice = "L\"\"", .code_page = .utf8 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString( + arena, + .{ .slice = "L\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Invalid UTF-8 gets converted to � depending on well-formedness + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("����"), try parseQuotedWideString( + arena, + .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{}, + )); +} + +test "parse quoted ascii string as wide string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Whether or not invalid escapes are skipped is still determined by the L prefix + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"\\H\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString( + arena, + .{ .slice = "L\"\\H\"", .code_page = .windows1252 }, + .{}, + )); + // Maximum escape sequence value is also determined by the L prefix + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\x1234"), try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"\\x1234\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x1234}, try parseQuotedStringAsWideString( + arena, + .{ .slice = "L\"\\x1234\"", .code_page = .windows1252 }, + .{}, + )); +} + +pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize { + // 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4 + // 5 => 3, 6 => 2, 7 => 1, 8 => 8 + return tab_columns - (column % tab_columns); +} + +pub const Number = struct { + value: u32, + is_long: bool = false, + + pub fn asWord(self: Number) u16 { + return @truncate(self.value); + } + + pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number { + const result = switch (operator_char) { + '-' => lhs.value -% rhs.value, + '+' => lhs.value +% rhs.value, + '|' => lhs.value | rhs.value, + '&' => lhs.value & rhs.value, + else => unreachable, // invalid operator, this would be a lexer/parser bug + }; + return .{ + .value = result, + .is_long = lhs.is_long or rhs.is_long, + }; + } +}; + +/// Assumes that number literals normally rejected by RC's preprocessor +/// are similarly rejected before being parsed. +/// +/// Relevant RC preprocessor errors: +/// RC2021: expected exponent value, not '' +/// example that is rejected: 1e1 +/// example that is accepted: 1ea +/// (this function will parse the two examples above the same) +pub fn parseNumberLiteral(bytes: SourceBytes) Number { + std.debug.assert(bytes.slice.len > 0); + var result = Number{ .value = 0, .is_long = false }; + var radix: u8 = 10; + var buf = bytes.slice; + + const Prefix = enum { none, minus, complement }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + '~' => { + prefix = .complement; + buf = buf[1..]; + }, + else => {}, + } + + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + 'o' => { // octal radix prefix is case-sensitive + radix = 8; + buf = buf[2..]; + }, + 'x', 'X' => { + radix = 16; + buf = buf[2..]; + }, + else => {}, + } + } + + var i: usize = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + if (c == 'L' or c == 'l') { + result.is_long = true; + break; + } + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break, + else => break, + }; + + if (result.value != 0) { + result.value *%= radix; + } + result.value +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result.value = 0 -% result.value, + .complement => result.value = ~result.value, + } + + return result; +} + +test "parse number literal" { + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 })); + + // can handle any length of number, wraps on overflow appropriately + const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 }); + try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow); + try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord()); + + try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 })); + + try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 })); + + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 })); + + // anything after L is ignored + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 })); +} diff --git a/src/resinator/parse.zig b/src/resinator/parse.zig new file mode 100644 index 000000000000..2e528bea657b --- /dev/null +++ b/src/resinator/parse.zig @@ -0,0 +1,1880 @@ +const std = @import("std"); +const Lexer = @import("lex.zig").Lexer; +const Token = @import("lex.zig").Token; +const Node = @import("ast.zig").Node; +const Tree = @import("ast.zig").Tree; +const CodePageLookup = @import("ast.zig").CodePageLookup; +const Resource = @import("rc.zig").Resource; +const Allocator = std.mem.Allocator; +const ErrorDetails = @import("errors.zig").ErrorDetails; +const Diagnostics = @import("errors.zig").Diagnostics; +const SourceBytes = @import("literals.zig").SourceBytes; +const Compiler = @import("compile.zig").Compiler; +const rc = @import("rc.zig"); +const res = @import("res.zig"); + +// TODO: Make these configurable? +pub const max_nested_menu_level: u32 = 512; +pub const max_nested_version_level: u32 = 512; +pub const max_nested_expression_level: u32 = 200; + +pub const Parser = struct { + const Self = @This(); + + lexer: *Lexer, + /// values that need to be initialized per-parse + state: Parser.State = undefined, + options: Parser.Options, + + pub const Error = error{ParseError} || Allocator.Error; + + pub const Options = struct { + warn_instead_of_error_on_invalid_code_page: bool = false, + }; + + pub fn init(lexer: *Lexer, options: Options) Parser { + return Parser{ + .lexer = lexer, + .options = options, + }; + } + + pub const State = struct { + token: Token, + lookahead_lexer: Lexer, + allocator: Allocator, + arena: Allocator, + diagnostics: *Diagnostics, + input_code_page_lookup: CodePageLookup, + output_code_page_lookup: CodePageLookup, + }; + + pub fn parse(self: *Self, allocator: Allocator, diagnostics: *Diagnostics) Error!*Tree { + var arena = std.heap.ArenaAllocator.init(allocator); + errdefer arena.deinit(); + + self.state = Parser.State{ + .token = undefined, + .lookahead_lexer = undefined, + .allocator = allocator, + .arena = arena.allocator(), + .diagnostics = diagnostics, + .input_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page), + .output_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page), + }; + + const parsed_root = try self.parseRoot(); + + const tree = try self.state.arena.create(Tree); + tree.* = .{ + .node = parsed_root, + .input_code_pages = self.state.input_code_page_lookup, + .output_code_pages = self.state.output_code_page_lookup, + .source = self.lexer.buffer, + .arena = arena.state, + .allocator = allocator, + }; + return tree; + } + + fn parseRoot(self: *Self) Error!*Node { + var statements = std.ArrayList(*Node).init(self.state.allocator); + defer statements.deinit(); + + try self.parseStatements(&statements); + try self.check(.eof); + + const node = try self.state.arena.create(Node.Root); + node.* = .{ + .body = try self.state.arena.dupe(*Node, statements.items), + }; + return &node.base; + } + + fn parseStatements(self: *Self, statements: *std.ArrayList(*Node)) Error!void { + while (true) { + try self.nextToken(.whitespace_delimiter_only); + if (self.state.token.id == .eof) break; + // The Win32 compiler will sometimes try to recover from errors + // and then restart parsing afterwards. We don't ever do this + // because it almost always leads to unhelpful error messages + // (usually it will end up with bogus things like 'file + // not found: {') + var statement = try self.parseStatement(); + try statements.append(statement); + } + } + + /// Expects the current token to be the token before possible common resource attributes. + /// After return, the current token will be the token immediately before the end of the + /// common resource attributes (if any). If there are no common resource attributes, the + /// current token is unchanged. + /// The returned slice is allocated by the parser's arena + fn parseCommonResourceAttributes(self: *Self) ![]Token { + var common_resource_attributes = std.ArrayListUnmanaged(Token){}; + while (true) { + const maybe_common_resource_attribute = try self.lookaheadToken(.normal); + if (maybe_common_resource_attribute.id == .literal and rc.CommonResourceAttributes.map.has(maybe_common_resource_attribute.slice(self.lexer.buffer))) { + try common_resource_attributes.append(self.state.arena, maybe_common_resource_attribute); + self.nextToken(.normal) catch unreachable; + } else { + break; + } + } + return common_resource_attributes.toOwnedSlice(self.state.arena); + } + + /// Expects the current token to have already been dealt with, and that the + /// optional statements will potentially start on the next token. + /// After return, the current token will be the token immediately before the end of the + /// optional statements (if any). If there are no optional statements, the + /// current token is unchanged. + /// The returned slice is allocated by the parser's arena + fn parseOptionalStatements(self: *Self, resource: Resource) ![]*Node { + var optional_statements = std.ArrayListUnmanaged(*Node){}; + while (true) { + const lookahead_token = try self.lookaheadToken(.normal); + if (lookahead_token.id != .literal) break; + const slice = lookahead_token.slice(self.lexer.buffer); + const optional_statement_type = rc.OptionalStatements.map.get(slice) orelse switch (resource) { + .dialog, .dialogex => rc.OptionalStatements.dialog_map.get(slice) orelse break, + else => break, + }; + self.nextToken(.normal) catch unreachable; + switch (optional_statement_type) { + .language => { + const language = try self.parseLanguageStatement(); + try optional_statements.append(self.state.arena, language); + }, + // Number only + .version, .characteristics, .style, .exstyle => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ + .can_contain_not_expressions = optional_statement_type == .style or optional_statement_type == .exstyle, + .allowed_types = .{ .number = true }, + }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // String only + .caption => { + const identifier = self.state.token; + try self.nextToken(.normal); + const value = self.state.token; + if (!value.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = value, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + // TODO: Wrapping this in a Node.Literal is superfluous but necessary + // to put it in a SimpleStatement + const value_node = try self.state.arena.create(Node.Literal); + value_node.* = .{ + .token = value, + }; + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = &value_node.base, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // String or number + .class => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // Special case + .menu => { + const identifier = self.state.token; + try self.nextToken(.whitespace_delimiter_only); + try self.check(.literal); + // TODO: Wrapping this in a Node.Literal is superfluous but necessary + // to put it in a SimpleStatement + const value_node = try self.state.arena.create(Node.Literal); + value_node.* = .{ + .token = self.state.token, + }; + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = &value_node.base, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + .font => { + const identifier = self.state.token; + const point_size = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + // The comma between point_size and typeface is both optional and + // there can be any number of them + try self.skipAnyCommas(); + + try self.nextToken(.normal); + const typeface = self.state.token; + if (!typeface.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = typeface, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + + const ExSpecificValues = struct { + weight: ?*Node = null, + italic: ?*Node = null, + char_set: ?*Node = null, + }; + var ex_specific = ExSpecificValues{}; + ex_specific: { + var optional_param_parser = OptionalParamParser{ .parser = self }; + switch (resource) { + .dialogex => { + { + ex_specific.weight = try optional_param_parser.parse(.{}); + if (optional_param_parser.finished) break :ex_specific; + } + { + if (!(try self.parseOptionalToken(.comma))) break :ex_specific; + ex_specific.italic = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + } + { + ex_specific.char_set = try optional_param_parser.parse(.{}); + if (optional_param_parser.finished) break :ex_specific; + } + }, + .dialog => {}, + else => unreachable, // only DIALOG and DIALOGEX have FONT optional-statements + } + } + + const node = try self.state.arena.create(Node.FontStatement); + node.* = .{ + .identifier = identifier, + .point_size = point_size, + .typeface = typeface, + .weight = ex_specific.weight, + .italic = ex_specific.italic, + .char_set = ex_specific.char_set, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + } + } + return optional_statements.toOwnedSlice(self.state.arena); + } + + /// Expects the current token to be the first token of the statement. + fn parseStatement(self: *Self) Error!*Node { + const first_token = self.state.token; + std.debug.assert(first_token.id == .literal); + + if (rc.TopLevelKeywords.map.get(first_token.slice(self.lexer.buffer))) |keyword| switch (keyword) { + .language => { + const language_statement = try self.parseLanguageStatement(); + return language_statement; + }, + .version, .characteristics => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + return &node.base; + }, + .stringtable => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + const optional_statements = try self.parseOptionalStatements(.stringtable); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var strings = std.ArrayList(*Node).init(self.state.allocator); + defer strings.deinit(); + while (true) { + const maybe_end_token = try self.lookaheadToken(.normal); + switch (maybe_end_token.id) { + .end => { + self.nextToken(.normal) catch unreachable; + break; + }, + .eof => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .unfinished_string_table_block, + .token = maybe_end_token, + }); + }, + else => {}, + } + const id_expression = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const comma_token: ?Token = if (try self.parseOptionalToken(.comma)) self.state.token else null; + + try self.nextToken(.normal); + if (self.state.token.id != .quoted_ascii_string and self.state.token.id != .quoted_wide_string) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ .string_literal = true } }, + }); + } + + const string_node = try self.state.arena.create(Node.StringTableString); + string_node.* = .{ + .id = id_expression, + .maybe_comma = comma_token, + .string = self.state.token, + }; + try strings.append(&string_node.base); + } + + if (strings.items.len == 0) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, // TODO: probably a more specific error message + .token = self.state.token, + .extra = .{ .expected = .number }, + }); + } + + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.StringTable); + node.* = .{ + .type = first_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .begin_token = begin_token, + .strings = try self.state.arena.dupe(*Node, strings.items), + .end_token = end_token, + }; + return &node.base; + }, + }; + + // The Win32 RC compiler allows for a 'dangling' literal at the end of a file + // (as long as it's not a valid top-level keyword), and there is actually an + // .rc file with a such a dangling literal in the Windows-classic-samples set + // of projects. So, we have special compatibility for this particular case. + const maybe_eof = try self.lookaheadToken(.whitespace_delimiter_only); + if (maybe_eof.id == .eof) { + // TODO: emit warning + var context = try self.state.arena.alloc(Token, 2); + context[0] = first_token; + context[1] = maybe_eof; + const invalid_node = try self.state.arena.create(Node.Invalid); + invalid_node.* = .{ + .context = context, + }; + return &invalid_node.base; + } + + const id_token = first_token; + const id_code_page = self.lexer.current_code_page; + try self.nextToken(.whitespace_delimiter_only); + const resource = try self.checkResource(); + const type_token = self.state.token; + + if (resource == .string_num) { + try self.addErrorDetails(.{ + .err = .string_resource_as_numeric_type, + .token = type_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .string_resource_as_numeric_type, + .token = type_token, + .type = .note, + .print_source_line = false, + }); + } + + if (resource == .font) { + const id_bytes = SourceBytes{ + .slice = id_token.slice(self.lexer.buffer), + .code_page = id_code_page, + }; + const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(id_bytes); + if (maybe_ordinal == null) { + const would_be_win32_rc_ordinal = res.NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes); + if (would_be_win32_rc_ordinal) |win32_rc_ordinal| { + try self.addErrorDetails(ErrorDetails{ + .err = .id_must_be_ordinal, + .token = id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .win32_non_ascii_ordinal, + .token = id_token, + .type = .note, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + } else { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .id_must_be_ordinal, + .token = id_token, + .extra = .{ .resource = resource }, + }); + } + } + } + + switch (resource) { + .accelerators => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + const optional_statements = try self.parseOptionalStatements(resource); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var accelerators = std.ArrayListUnmanaged(*Node){}; + + while (true) { + const lookahead = try self.lookaheadToken(.normal); + switch (lookahead.id) { + .end, .eof => { + self.nextToken(.normal) catch unreachable; + break; + }, + else => {}, + } + const event = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const idvalue = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var type_and_options = std.ArrayListUnmanaged(Token){}; + while (true) { + if (!(try self.parseOptionalToken(.comma))) break; + + try self.nextToken(.normal); + if (!rc.AcceleratorTypeAndOptions.map.has(self.tokenSlice())) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .accelerator_type_or_option = true, + } }, + }); + } + try type_and_options.append(self.state.arena, self.state.token); + } + + const node = try self.state.arena.create(Node.Accelerator); + node.* = .{ + .event = event, + .idvalue = idvalue, + .type_and_options = try type_and_options.toOwnedSlice(self.state.arena), + }; + try accelerators.append(self.state.arena, &node.base); + } + + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Accelerators); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .begin_token = begin_token, + .accelerators = try accelerators.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .dialog, .dialogex => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var optional_param_parser = OptionalParamParser{ .parser = self }; + const help_id: ?*Node = try optional_param_parser.parse(.{}); + + const optional_statements = try self.parseOptionalStatements(resource); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var controls = std.ArrayListUnmanaged(*Node){}; + defer controls.deinit(self.state.allocator); + while (try self.parseControlStatement(resource)) |control_node| { + // The number of controls must fit in a u16 in order for it to + // be able to be written into the relevant field in the .res data. + if (controls.items.len >= std.math.maxInt(u16)) { + try self.addErrorDetails(.{ + .err = .too_many_dialog_controls, + .token = id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .too_many_dialog_controls, + .type = .note, + .token = control_node.getFirstToken(), + .token_span_end = control_node.getLastToken(), + .extra = .{ .resource = resource }, + }); + } + + try controls.append(self.state.allocator, control_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Dialog); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .x = x, + .y = y, + .width = width, + .height = height, + .help_id = help_id, + .optional_statements = optional_statements, + .begin_token = begin_token, + .controls = try self.state.arena.dupe(*Node, controls.items), + .end_token = end_token, + }; + return &node.base; + }, + .toolbar => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const button_width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const button_height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var buttons = std.ArrayListUnmanaged(*Node){}; + while (try self.parseToolbarButtonStatement()) |button_node| { + try buttons.append(self.state.arena, button_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Toolbar); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .button_width = button_width, + .button_height = button_height, + .begin_token = begin_token, + .buttons = try buttons.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .menu, .menuex => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + // help id is optional but must come between common resource attributes and optional-statements + var help_id: ?*Node = null; + // Note: No comma is allowed before or after help_id of MENUEX and help_id is not + // a possible field of MENU. + if (resource == .menuex and try self.lookaheadCouldBeNumberExpression(.not_disallowed)) { + help_id = try self.parseExpression(.{ + .is_known_to_be_number_expression = true, + }); + } + const optional_statements = try self.parseOptionalStatements(.stringtable); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + defer items.deinit(self.state.allocator); + while (try self.parseMenuItemStatement(resource, id_token, 1)) |item_node| { + try items.append(self.state.allocator, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = type_token, + }); + } + + const node = try self.state.arena.create(Node.Menu); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .help_id = help_id, + .begin_token = begin_token, + .items = try self.state.arena.dupe(*Node, items.items), + .end_token = end_token, + }; + return &node.base; + }, + .versioninfo => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + var fixed_info = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionStatement()) |version_statement| { + try fixed_info.append(self.state.arena, version_statement); + } + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var block_statements = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionBlockOrValue(id_token, 1)) |block_node| { + try block_statements.append(self.state.arena, block_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.VersionInfo); + node.* = .{ + .id = id_token, + .versioninfo = type_token, + .common_resource_attributes = common_resource_attributes, + .fixed_info = try fixed_info.toOwnedSlice(self.state.arena), + .begin_token = begin_token, + .block_statements = try block_statements.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .dlginclude => { + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + var filename_expression = try self.parseExpression(.{ + .allowed_types = .{ .string = true }, + }); + + const node = try self.state.arena.create(Node.ResourceExternal); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .filename = filename_expression, + }; + return &node.base; + }, + .stringtable => { + return self.addErrorDetailsAndFail(.{ + .err = .name_or_id_not_allowed, + .token = id_token, + .extra = .{ .resource = resource }, + }); + }, + // Just try everything as a 'generic' resource (raw data or external file) + // TODO: More fine-grained switch cases as necessary + else => { + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const maybe_begin = try self.lookaheadToken(.normal); + if (maybe_begin.id == .begin) { + self.nextToken(.normal) catch unreachable; + + if (!resource.canUseRawData()) { + try self.addErrorDetails(ErrorDetails{ + .err = .resource_type_cant_use_raw_data, + .token = maybe_begin, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .resource_type_cant_use_raw_data, + .type = .note, + .print_source_line = false, + .token = maybe_begin, + }); + } + + const raw_data = try self.parseRawDataBlock(); + const end_token = self.state.token; + + const node = try self.state.arena.create(Node.ResourceRawData); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .begin_token = maybe_begin, + .raw_data = raw_data, + .end_token = end_token, + }; + return &node.base; + } + + var filename_expression = try self.parseExpression(.{ + // Don't tell the user that numbers are accepted since we error on + // number expressions and regular number literals are treated as unquoted + // literals rather than numbers, so from the users perspective + // numbers aren't really allowed. + .expected_types_override = .{ + .literal = true, + .string_literal = true, + }, + }); + + const node = try self.state.arena.create(Node.ResourceExternal); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .filename = filename_expression, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be a begin token. + /// After return, the current token will be the end token. + fn parseRawDataBlock(self: *Self) Error![]*Node { + var raw_data = std.ArrayList(*Node).init(self.state.allocator); + defer raw_data.deinit(); + while (true) { + const maybe_end_token = try self.lookaheadToken(.normal); + switch (maybe_end_token.id) { + .comma => { + // comma as the first token in a raw data block is an error + if (raw_data.items.len == 0) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = maybe_end_token, + .extra = .{ .expected_types = .{ + .number = true, + .number_expression = true, + .string_literal = true, + } }, + }); + } + // otherwise just skip over commas + self.nextToken(.normal) catch unreachable; + continue; + }, + .end => { + self.nextToken(.normal) catch unreachable; + break; + }, + .eof => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .unfinished_raw_data_block, + .token = maybe_end_token, + }); + }, + else => {}, + } + const expression = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + try raw_data.append(expression); + + if (expression.isNumberExpression()) { + const maybe_close_paren = try self.lookaheadToken(.normal); + if (maybe_close_paren.id == .close_paren) { + // ) is an error + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = maybe_close_paren, + .extra = .{ .expected = .operator }, + }); + } + } + } + return try self.state.arena.dupe(*Node, raw_data.items); + } + + /// Expects the current token to be handled, and that the control statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// control statement (or unchanged if the function returns null). + fn parseControlStatement(self: *Self, resource: Resource) Error!?*Node { + const control_token = try self.lookaheadToken(.normal); + const control = rc.Control.map.get(control_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + try self.skipAnyCommas(); + + var text: ?Token = null; + if (control.hasTextParam()) { + try self.nextToken(.normal); + switch (self.state.token.id) { + .quoted_ascii_string, .quoted_wide_string, .number => { + text = self.state.token; + }, + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .number = true, + .string_literal = true, + } }, + }); + }, + } + try self.skipAnyCommas(); + } + + const id = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.skipAnyCommas(); + + var class: ?*Node = null; + var style: ?*Node = null; + if (control == .control) { + class = try self.parseExpression(.{}); + if (class.?.id == .literal) { + const class_literal = @fieldParentPtr(Node.Literal, "base", class.?); + const is_invalid_control_class = class_literal.token.id == .literal and !rc.ControlClass.map.has(class_literal.token.slice(self.lexer.buffer)); + if (is_invalid_control_class) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .control_class = true, + } }, + }); + } + } + try self.skipAnyCommas(); + style = try self.parseExpression(.{ + .can_contain_not_expressions = true, + .allowed_types = .{ .number = true }, + }); + // If there is no comma after the style paramter, the Win32 RC compiler + // could misinterpret the statement and end up skipping over at least one token + // that should have been interepeted as the next parameter (x). For example: + // CONTROL "text", 1, BUTTON, 15 30, 1, 2, 3, 4 + // the `15` is the style parameter, but in the Win32 implementation the `30` + // is completely ignored (i.e. the `1, 2, 3, 4` are `x`, `y`, `w`, `h`). + // If a comma is added after the `15`, then `30` gets interpreted (correctly) + // as the `x` value. + // + // Instead of emulating this behavior, we just warn about the potential for + // weird behavior in the Win32 implementation whenever there isn't a comma after + // the style parameter. + const lookahead_token = try self.lookaheadToken(.normal); + if (lookahead_token.id != .comma and lookahead_token.id != .eof) { + try self.addErrorDetails(.{ + .err = .rc_could_miscompile_control_params, + .type = .warning, + .token = lookahead_token, + }); + try self.addErrorDetails(.{ + .err = .rc_could_miscompile_control_params, + .type = .note, + .token = style.?.getFirstToken(), + .token_span_end = style.?.getLastToken(), + }); + } + try self.skipAnyCommas(); + } + + const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var optional_param_parser = OptionalParamParser{ .parser = self }; + if (control != .control) { + style = try optional_param_parser.parse(.{ .not_expression_allowed = true }); + } + + var exstyle: ?*Node = try optional_param_parser.parse(.{ .not_expression_allowed = true }); + var help_id: ?*Node = switch (resource) { + .dialogex => try optional_param_parser.parse(.{}), + else => null, + }; + + var extra_data: []*Node = &[_]*Node{}; + var extra_data_begin: ?Token = null; + var extra_data_end: ?Token = null; + // extra data is DIALOGEX-only + if (resource == .dialogex and try self.parseOptionalToken(.begin)) { + extra_data_begin = self.state.token; + extra_data = try self.parseRawDataBlock(); + extra_data_end = self.state.token; + } + + const node = try self.state.arena.create(Node.ControlStatement); + node.* = .{ + .type = control_token, + .text = text, + .class = class, + .id = id, + .x = x, + .y = y, + .width = width, + .height = height, + .style = style, + .exstyle = exstyle, + .help_id = help_id, + .extra_data_begin = extra_data_begin, + .extra_data = extra_data, + .extra_data_end = extra_data_end, + }; + return &node.base; + } + + fn parseToolbarButtonStatement(self: *Self) Error!?*Node { + const keyword_token = try self.lookaheadToken(.normal); + const button_type = rc.ToolbarButton.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + switch (button_type) { + .separator => { + const node = try self.state.arena.create(Node.Literal); + node.* = .{ + .token = keyword_token, + }; + return &node.base; + }, + .button => { + const button_id = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = keyword_token, + .value = button_id, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be handled, and that the menuitem/popup statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// menuitem statement (or unchanged if the function returns null). + fn parseMenuItemStatement(self: *Self, resource: Resource, top_level_menu_id_token: Token, nesting_level: u32) Error!?*Node { + const menuitem_token = try self.lookaheadToken(.normal); + const menuitem = rc.MenuItem.map.get(menuitem_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + if (nesting_level > max_nested_menu_level) { + try self.addErrorDetails(.{ + .err = .nested_resource_level_exceeds_max, + .token = top_level_menu_id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_resource_level_exceeds_max, + .type = .note, + .token = menuitem_token, + .extra = .{ .resource = resource }, + }); + } + + switch (resource) { + .menu => switch (menuitem) { + .menuitem => { + try self.nextToken(.normal); + if (rc.MenuItem.isSeparator(self.state.token.slice(self.lexer.buffer))) { + const separator_token = self.state.token; + // There can be any number of trailing commas after SEPARATOR + try self.skipAnyCommas(); + const node = try self.state.arena.create(Node.MenuItemSeparator); + node.* = .{ + .menuitem = menuitem_token, + .separator = separator_token, + }; + return &node.base; + } else { + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + try self.skipAnyCommas(); + + const result = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + _ = try self.parseOptionalToken(.comma); + + var options = std.ArrayListUnmanaged(Token){}; + while (true) { + const option_token = try self.lookaheadToken(.normal); + if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) { + break; + } + self.nextToken(.normal) catch unreachable; + try options.append(self.state.arena, option_token); + try self.skipAnyCommas(); + } + + const node = try self.state.arena.create(Node.MenuItem); + node.* = .{ + .menuitem = menuitem_token, + .text = text, + .result = result, + .option_list = try options.toOwnedSlice(self.state.arena), + }; + return &node.base; + } + }, + .popup => { + try self.nextToken(.normal); + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + try self.skipAnyCommas(); + + var options = std.ArrayListUnmanaged(Token){}; + while (true) { + const option_token = try self.lookaheadToken(.normal); + if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) { + break; + } + self.nextToken(.normal) catch unreachable; + try options.append(self.state.arena, option_token); + try self.skipAnyCommas(); + } + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| { + try items.append(self.state.arena, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = menuitem_token, + }); + } + + const node = try self.state.arena.create(Node.Popup); + node.* = .{ + .popup = menuitem_token, + .text = text, + .option_list = try options.toOwnedSlice(self.state.arena), + .begin_token = begin_token, + .items = try items.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + }, + .menuex => { + try self.nextToken(.normal); + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + + var param_parser = OptionalParamParser{ .parser = self }; + const id = try param_parser.parse(.{}); + const item_type = try param_parser.parse(.{}); + const state = try param_parser.parse(.{}); + + if (menuitem == .menuitem) { + // trailing comma is allowed, skip it + _ = try self.parseOptionalToken(.comma); + + const node = try self.state.arena.create(Node.MenuItemEx); + node.* = .{ + .menuitem = menuitem_token, + .text = text, + .id = id, + .type = item_type, + .state = state, + }; + return &node.base; + } + + const help_id = try param_parser.parse(.{}); + + // trailing comma is allowed, skip it + _ = try self.parseOptionalToken(.comma); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| { + try items.append(self.state.arena, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = menuitem_token, + }); + } + + const node = try self.state.arena.create(Node.PopupEx); + node.* = .{ + .popup = menuitem_token, + .text = text, + .id = id, + .type = item_type, + .state = state, + .help_id = help_id, + .begin_token = begin_token, + .items = try items.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + else => unreachable, + } + @compileError("unreachable"); + } + + pub const OptionalParamParser = struct { + finished: bool = false, + parser: *Self, + + pub const Options = struct { + not_expression_allowed: bool = false, + }; + + pub fn parse(self: *OptionalParamParser, options: OptionalParamParser.Options) Error!?*Node { + if (self.finished) return null; + if (!(try self.parser.parseOptionalToken(.comma))) { + self.finished = true; + return null; + } + // If the next lookahead token could be part of a number expression, + // then parse it. Otherwise, treat it as an 'empty' expression and + // continue parsing, since 'empty' values are allowed. + if (try self.parser.lookaheadCouldBeNumberExpression(switch (options.not_expression_allowed) { + true => .not_allowed, + false => .not_disallowed, + })) { + const node = try self.parser.parseExpression(.{ + .allowed_types = .{ .number = true }, + .can_contain_not_expressions = options.not_expression_allowed, + }); + return node; + } + return null; + } + }; + + /// Expects the current token to be handled, and that the version statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// version statement (or unchanged if the function returns null). + fn parseVersionStatement(self: *Self) Error!?*Node { + const type_token = try self.lookaheadToken(.normal); + const statement_type = rc.VersionInfo.map.get(type_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + switch (statement_type) { + .file_version, .product_version => { + var parts = std.BoundedArray(*Node, 4){}; + + while (parts.len < 4) { + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + parts.addOneAssumeCapacity().* = value; + + if (parts.len == 4 or !(try self.parseOptionalToken(.comma))) { + break; + } + } + + const node = try self.state.arena.create(Node.VersionStatement); + node.* = .{ + .type = type_token, + .parts = try self.state.arena.dupe(*Node, parts.slice()), + }; + return &node.base; + }, + else => { + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = type_token, + .value = value, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be handled, and that the version BLOCK/VALUE will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// version BLOCK/VALUE (or unchanged if the function returns null). + fn parseVersionBlockOrValue(self: *Self, top_level_version_id_token: Token, nesting_level: u32) Error!?*Node { + const keyword_token = try self.lookaheadToken(.normal); + const keyword = rc.VersionBlock.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + if (nesting_level > max_nested_version_level) { + try self.addErrorDetails(.{ + .err = .nested_resource_level_exceeds_max, + .token = top_level_version_id_token, + .extra = .{ .resource = .versioninfo }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_resource_level_exceeds_max, + .type = .note, + .token = keyword_token, + .extra = .{ .resource = .versioninfo }, + }); + } + + try self.nextToken(.normal); + const key = self.state.token; + if (!key.isStringLiteral()) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = key, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + // Need to keep track of this to detect a potential miscompilation when + // the comma is omitted and the first value is a quoted string. + const had_comma_before_first_value = try self.parseOptionalToken(.comma); + try self.skipAnyCommas(); + + const values = try self.parseBlockValuesList(had_comma_before_first_value); + + switch (keyword) { + .block => { + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var children = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionBlockOrValue(top_level_version_id_token, nesting_level + 1)) |value_node| { + try children.append(self.state.arena, value_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Block); + node.* = .{ + .identifier = keyword_token, + .key = key, + .values = values, + .begin_token = begin_token, + .children = try children.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .value => { + const node = try self.state.arena.create(Node.BlockValue); + node.* = .{ + .identifier = keyword_token, + .key = key, + .values = values, + }; + return &node.base; + }, + } + } + + fn parseBlockValuesList(self: *Self, had_comma_before_first_value: bool) Error![]*Node { + var values = std.ArrayListUnmanaged(*Node){}; + var seen_number: bool = false; + var first_string_value: ?*Node = null; + while (true) { + const lookahead_token = try self.lookaheadToken(.normal); + switch (lookahead_token.id) { + .operator, + .number, + .open_paren, + .quoted_ascii_string, + .quoted_wide_string, + => {}, + else => break, + } + const value = try self.parseExpression(.{}); + + if (value.isNumberExpression()) { + seen_number = true; + } else if (first_string_value == null) { + std.debug.assert(value.isStringLiteral()); + first_string_value = value; + } + + const has_trailing_comma = try self.parseOptionalToken(.comma); + try self.skipAnyCommas(); + + const value_value = try self.state.arena.create(Node.BlockValueValue); + value_value.* = .{ + .expression = value, + .trailing_comma = has_trailing_comma, + }; + try values.append(self.state.arena, &value_value.base); + } + if (seen_number and first_string_value != null) { + // The Win32 RC compiler does some strange stuff with the data size: + // Strings are counted as UTF-16 code units including the null-terminator + // Numbers are counted as their byte lengths + // So, when both strings and numbers are within a single value, + // it incorrectly sets the value's type as binary, but then gives the + // data length as a mixture of bytes and UTF-16 code units. This means that + // when the length is read, it will be treated as byte length and will + // not read the full value. We don't reproduce this behavior, so we warn + // of the miscompilation here. + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_byte_count, + .type = .warning, + .token = first_string_value.?.getFirstToken(), + .token_span_start = values.items[0].getFirstToken(), + .token_span_end = values.items[values.items.len - 1].getLastToken(), + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_byte_count, + .type = .note, + .token = first_string_value.?.getFirstToken(), + .token_span_start = values.items[0].getFirstToken(), + .token_span_end = values.items[values.items.len - 1].getLastToken(), + .print_source_line = false, + }); + } + if (!had_comma_before_first_value and values.items.len > 0 and values.items[0].cast(.block_value_value).?.expression.isStringLiteral()) { + const token = values.items[0].cast(.block_value_value).?.expression.cast(.literal).?.token; + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_padding, + .type = .warning, + .token = token, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_padding, + .type = .note, + .token = token, + .print_source_line = false, + }); + } + return values.toOwnedSlice(self.state.arena); + } + + fn numberExpressionContainsAnyLSuffixes(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) bool { + // TODO: This could probably be done without evaluating the whole expression + return Compiler.evaluateNumberExpression(expression_node, source, code_page_lookup).is_long; + } + + /// Expects the current token to be a literal token that contains the string LANGUAGE + fn parseLanguageStatement(self: *Self) Error!*Node { + const language_token = self.state.token; + + const primary_language = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const sublanguage = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + // The Win32 RC compiler errors if either parameter contains any number with an L + // suffix. Instead of that, we want to warn and then let the values get truncated. + // The warning is done here to allow the compiler logic to not have to deal with this. + if (numberExpressionContainsAnyLSuffixes(primary_language, self.lexer.buffer, &self.state.input_code_page_lookup)) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = primary_language.getFirstToken(), + .token_span_end = primary_language.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = primary_language.getFirstToken(), + .token_span_end = primary_language.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + } + if (numberExpressionContainsAnyLSuffixes(sublanguage, self.lexer.buffer, &self.state.input_code_page_lookup)) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = sublanguage.getFirstToken(), + .token_span_end = sublanguage.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = sublanguage.getFirstToken(), + .token_span_end = sublanguage.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + } + + const node = try self.state.arena.create(Node.LanguageStatement); + node.* = .{ + .language_token = language_token, + .primary_language_id = primary_language, + .sublanguage_id = sublanguage, + }; + return &node.base; + } + + pub const ParseExpressionOptions = struct { + is_known_to_be_number_expression: bool = false, + can_contain_not_expressions: bool = false, + nesting_context: NestingContext = .{}, + allowed_types: AllowedTypes = .{ .literal = true, .number = true, .string = true }, + expected_types_override: ?ErrorDetails.ExpectedTypes = null, + + pub const AllowedTypes = struct { + literal: bool = false, + number: bool = false, + string: bool = false, + }; + + pub const NestingContext = struct { + first_token: ?Token = null, + last_token: ?Token = null, + level: u32 = 0, + + /// Returns a new NestingContext with values modified appropriately for an increased nesting level + fn incremented(ctx: NestingContext, first_token: Token, most_recent_token: Token) NestingContext { + return .{ + .first_token = ctx.first_token orelse first_token, + .last_token = most_recent_token, + .level = ctx.level + 1, + }; + } + }; + + pub fn toErrorDetails(options: ParseExpressionOptions, token: Token) ErrorDetails { + // TODO: expected_types_override interaction with is_known_to_be_number_expression? + var expected_types = options.expected_types_override orelse ErrorDetails.ExpectedTypes{ + .number = options.allowed_types.number, + .number_expression = options.allowed_types.number, + .string_literal = options.allowed_types.string and !options.is_known_to_be_number_expression, + .literal = options.allowed_types.literal and !options.is_known_to_be_number_expression, + }; + return ErrorDetails{ + .err = .expected_something_else, + .token = token, + .extra = .{ .expected_types = expected_types }, + }; + } + }; + + /// Returns true if the next lookahead token is a number or could be the start of a number expression. + /// Only useful when looking for empty expressions in optional fields. + fn lookaheadCouldBeNumberExpression(self: *Self, not_allowed: enum { not_allowed, not_disallowed }) Error!bool { + var lookahead_token = try self.lookaheadToken(.normal); + switch (lookahead_token.id) { + .literal => if (not_allowed == .not_allowed) { + return std.ascii.eqlIgnoreCase("NOT", lookahead_token.slice(self.lexer.buffer)); + } else return false, + .number => return true, + .open_paren => return true, + .operator => { + // + can be a unary operator, see parseExpression's handling of unary + + const operator_char = lookahead_token.slice(self.lexer.buffer)[0]; + return operator_char == '+'; + }, + else => return false, + } + } + + fn parsePrimary(self: *Self, options: ParseExpressionOptions) Error!*Node { + try self.nextToken(.normal); + const first_token = self.state.token; + var is_close_paren_expression = false; + var is_unary_plus_expression = false; + switch (self.state.token.id) { + .quoted_ascii_string, .quoted_wide_string => { + if (!options.allowed_types.string) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .literal => { + if (options.can_contain_not_expressions and std.ascii.eqlIgnoreCase("NOT", self.state.token.slice(self.lexer.buffer))) { + const not_token = self.state.token; + try self.nextToken(.normal); + try self.check(.number); + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.NotExpression); + node.* = .{ + .not_token = not_token, + .number_token = self.state.token, + }; + return &node.base; + } + if (!options.allowed_types.literal) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .number => { + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .open_paren => { + const open_paren_token = self.state.token; + + const expression = try self.parseExpression(.{ + .is_known_to_be_number_expression = true, + .can_contain_not_expressions = options.can_contain_not_expressions, + .nesting_context = options.nesting_context.incremented(first_token, open_paren_token), + .allowed_types = .{ .number = true }, + }); + + try self.nextToken(.normal); + // TODO: Add context to error about where the open paren is + try self.check(.close_paren); + + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(open_paren_token)); + const node = try self.state.arena.create(Node.GroupedExpression); + node.* = .{ + .open_token = open_paren_token, + .expression = expression, + .close_token = self.state.token, + }; + return &node.base; + }, + .close_paren => { + // Note: In the Win32 implementation, a single close paren + // counts as a valid "expression", but only when its the first and + // only token in the expression. Such an expression is then treated + // as a 'skip this expression' instruction. For example: + // 1 RCDATA { 1, ), ), ), 2 } + // will be evaluated as if it were `1 RCDATA { 1, 2 }` and only + // 0x0001 and 0x0002 will be written to the .res data. + // + // This behavior is not emulated because it almost certainly has + // no valid use cases and only introduces edge cases that are + // not worth the effort to track down and deal with. Instead, + // we error but also add a note about the Win32 RC behavior if + // this edge case is detected. + if (!options.is_known_to_be_number_expression) { + is_close_paren_expression = true; + } + }, + .operator => { + // In the Win32 implementation, something akin to a unary + + // is allowed but it doesn't behave exactly like a unary +. + // Instead of emulating the Win32 behavior, we instead error + // and add a note about unary plus not being allowed. + // + // This is done because unary + only works in some places, + // and there's no real use-case for it since it's so limited + // in how it can be used (e.g. +1 is accepted but (+1) will error) + // + // Even understanding when unary plus is allowed is difficult, so + // we don't do any fancy detection of when the Win32 RC compiler would + // allow a unary + and instead just output the note in all cases. + // + // Some examples of allowed expressions by the Win32 compiler: + // +1 + // 0|+5 + // +1+2 + // +~-5 + // +(1) + // + // Some examples of disallowed expressions by the Win32 compiler: + // (+1) + // ++5 + // + // TODO: Potentially re-evaluate and support the unary plus in a bug-for-bug + // compatible way. + const operator_char = self.state.token.slice(self.lexer.buffer)[0]; + if (operator_char == '+') { + is_unary_plus_expression = true; + } + }, + else => {}, + } + + try self.addErrorDetails(options.toErrorDetails(self.state.token)); + if (is_close_paren_expression) { + try self.addErrorDetails(ErrorDetails{ + .err = .close_paren_expression, + .type = .note, + .token = self.state.token, + .print_source_line = false, + }); + } + if (is_unary_plus_expression) { + try self.addErrorDetails(ErrorDetails{ + .err = .unary_plus_expression, + .type = .note, + .token = self.state.token, + .print_source_line = false, + }); + } + return error.ParseError; + } + + /// Expects the current token to have already been dealt with, and that the + /// expression will start on the next token. + /// After return, the current token will have been dealt with. + fn parseExpression(self: *Self, options: ParseExpressionOptions) Error!*Node { + if (options.nesting_context.level > max_nested_expression_level) { + try self.addErrorDetails(.{ + .err = .nested_expression_level_exceeds_max, + .token = options.nesting_context.first_token.?, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_expression_level_exceeds_max, + .type = .note, + .token = options.nesting_context.last_token.?, + }); + } + var expr: *Node = try self.parsePrimary(options); + const first_token = expr.getFirstToken(); + + // Non-number expressions can't have operators, so we can just return + if (!expr.isNumberExpression()) return expr; + + while (try self.parseOptionalTokenAdvanced(.operator, .normal_expect_operator)) { + const operator = self.state.token; + const rhs_node = try self.parsePrimary(.{ + .is_known_to_be_number_expression = true, + .can_contain_not_expressions = options.can_contain_not_expressions, + .nesting_context = options.nesting_context.incremented(first_token, operator), + .allowed_types = options.allowed_types, + }); + + if (!rhs_node.isNumberExpression()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = rhs_node.getFirstToken(), + .token_span_end = rhs_node.getLastToken(), + .extra = .{ .expected_types = .{ + .number = true, + .number_expression = true, + } }, + }); + } + + const node = try self.state.arena.create(Node.BinaryExpression); + node.* = .{ + .left = expr, + .operator = operator, + .right = rhs_node, + }; + expr = &node.base; + } + + return expr; + } + + /// Skips any amount of commas (including zero) + /// In other words, it will skip the regex `,*` + /// Assumes the token(s) should be parsed with `.normal` as the method. + fn skipAnyCommas(self: *Self) !void { + while (try self.parseOptionalToken(.comma)) {} + } + + /// Advances the current token only if the token's id matches the specified `id`. + /// Assumes the token should be parsed with `.normal` as the method. + /// Returns true if the token matched, false otherwise. + fn parseOptionalToken(self: *Self, id: Token.Id) Error!bool { + return self.parseOptionalTokenAdvanced(id, .normal); + } + + /// Advances the current token only if the token's id matches the specified `id`. + /// Returns true if the token matched, false otherwise. + fn parseOptionalTokenAdvanced(self: *Self, id: Token.Id, comptime method: Lexer.LexMethod) Error!bool { + const maybe_token = try self.lookaheadToken(method); + if (maybe_token.id != id) return false; + self.nextToken(method) catch unreachable; + return true; + } + + fn addErrorDetails(self: *Self, details: ErrorDetails) Allocator.Error!void { + try self.state.diagnostics.append(details); + } + + fn addErrorDetailsAndFail(self: *Self, details: ErrorDetails) Error { + try self.addErrorDetails(details); + return error.ParseError; + } + + fn nextToken(self: *Self, comptime method: Lexer.LexMethod) Error!void { + self.state.token = token: while (true) { + const token = self.lexer.next(method) catch |err| switch (err) { + error.CodePagePragmaInIncludedFile => { + // The Win32 RC compiler silently ignores such `#pragma code_point` directives, + // but we want to both ignore them *and* emit a warning + try self.addErrorDetails(.{ + .err = .code_page_pragma_in_included_file, + .type = .warning, + .token = self.lexer.error_context_token.?, + }); + continue; + }, + error.CodePagePragmaInvalidCodePage => { + var details = self.lexer.getErrorDetails(err); + if (!self.options.warn_instead_of_error_on_invalid_code_page) { + return self.addErrorDetailsAndFail(details); + } + details.type = .warning; + try self.addErrorDetails(details); + continue; + }, + error.InvalidDigitCharacterInNumberLiteral => { + const details = self.lexer.getErrorDetails(err); + try self.addErrorDetails(details); + return self.addErrorDetailsAndFail(.{ + .err = details.err, + .type = .note, + .token = details.token, + .print_source_line = false, + }); + }, + else => return self.addErrorDetailsAndFail(self.lexer.getErrorDetails(err)), + }; + break :token token; + }; + // After every token, set the input code page for its line + try self.state.input_code_page_lookup.setForToken(self.state.token, self.lexer.current_code_page); + // But only set the output code page to the current code page if we are past the first code_page pragma in the file. + // Otherwise, we want to fill the lookup using the default code page so that lookups still work for lines that + // don't have an explicit output code page set. + const output_code_page = if (self.lexer.seen_pragma_code_pages > 1) self.lexer.current_code_page else self.state.output_code_page_lookup.default_code_page; + try self.state.output_code_page_lookup.setForToken(self.state.token, output_code_page); + } + + fn lookaheadToken(self: *Self, comptime method: Lexer.LexMethod) Error!Token { + self.state.lookahead_lexer = self.lexer.*; + return token: while (true) { + break :token self.state.lookahead_lexer.next(method) catch |err| switch (err) { + // Ignore this error and get the next valid token, we'll deal with this + // properly when getting the token for real + error.CodePagePragmaInIncludedFile => continue, + else => return self.addErrorDetailsAndFail(self.state.lookahead_lexer.getErrorDetails(err)), + }; + }; + } + + fn tokenSlice(self: *Self) []const u8 { + return self.state.token.slice(self.lexer.buffer); + } + + /// Check that the current token is something that can be used as an ID + fn checkId(self: *Self) !void { + switch (self.state.token.id) { + .literal => {}, + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = .literal }, + }); + }, + } + } + + fn check(self: *Self, expected_token_id: Token.Id) !void { + if (self.state.token.id != expected_token_id) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = expected_token_id }, + }); + } + } + + fn checkResource(self: *Self) !Resource { + switch (self.state.token.id) { + .literal => return Resource.fromString(.{ + .slice = self.state.token.slice(self.lexer.buffer), + .code_page = self.lexer.current_code_page, + }), + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = .literal }, + }); + }, + } + } +}; diff --git a/src/resinator/rc.zig b/src/resinator/rc.zig new file mode 100644 index 000000000000..00cb455058bd --- /dev/null +++ b/src/resinator/rc.zig @@ -0,0 +1,407 @@ +const std = @import("std"); +const utils = @import("utils.zig"); +const res = @import("res.zig"); +const SourceBytes = @import("literals.zig").SourceBytes; + +// https://learn.microsoft.com/en-us/windows/win32/menurc/about-resource-files + +pub const Resource = enum { + accelerators, + bitmap, + cursor, + dialog, + dialogex, + /// As far as I can tell, this is undocumented; the most I could find was this: + /// https://www.betaarchive.com/wiki/index.php/Microsoft_KB_Archive/91697 + dlginclude, + /// Undocumented, basically works exactly like RCDATA + dlginit, + font, + html, + icon, + menu, + menuex, + messagetable, + plugplay, // Obsolete + rcdata, + stringtable, + /// Undocumented + toolbar, + user_defined, + versioninfo, + vxd, // Obsolete + + // Types that are treated as a user-defined type when encountered, but have + // special meaning without the Visual Studio GUI. We match the Win32 RC compiler + // behavior by acting as if these keyword don't exist when compiling the .rc + // (thereby treating them as user-defined). + //textinclude, // A special resource that is interpreted by Visual C++. + //typelib, // A special resource that is used with the /TLBID and /TLBOUT linker options + + // Types that can only be specified by numbers, they don't have keywords + cursor_num, + icon_num, + string_num, + anicursor_num, + aniicon_num, + fontdir_num, + manifest_num, + + const map = std.ComptimeStringMapWithEql(Resource, .{ + .{ "ACCELERATORS", .accelerators }, + .{ "BITMAP", .bitmap }, + .{ "CURSOR", .cursor }, + .{ "DIALOG", .dialog }, + .{ "DIALOGEX", .dialogex }, + .{ "DLGINCLUDE", .dlginclude }, + .{ "DLGINIT", .dlginit }, + .{ "FONT", .font }, + .{ "HTML", .html }, + .{ "ICON", .icon }, + .{ "MENU", .menu }, + .{ "MENUEX", .menuex }, + .{ "MESSAGETABLE", .messagetable }, + .{ "PLUGPLAY", .plugplay }, + .{ "RCDATA", .rcdata }, + .{ "STRINGTABLE", .stringtable }, + .{ "TOOLBAR", .toolbar }, + .{ "VERSIONINFO", .versioninfo }, + .{ "VXD", .vxd }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + pub fn fromString(bytes: SourceBytes) Resource { + const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(bytes); + if (maybe_ordinal) |ordinal| { + if (ordinal.ordinal >= 256) return .user_defined; + return fromRT(@enumFromInt(ordinal.ordinal)); + } + return map.get(bytes.slice) orelse .user_defined; + } + + // TODO: Some comptime validation that RT <-> Resource conversion is synced? + pub fn fromRT(rt: res.RT) Resource { + return switch (rt) { + .ACCELERATOR => .accelerators, + .ANICURSOR => .anicursor_num, + .ANIICON => .aniicon_num, + .BITMAP => .bitmap, + .CURSOR => .cursor_num, + .DIALOG => .dialog, + .DLGINCLUDE => .dlginclude, + .DLGINIT => .dlginit, + .FONT => .font, + .FONTDIR => .fontdir_num, + .GROUP_CURSOR => .cursor, + .GROUP_ICON => .icon, + .HTML => .html, + .ICON => .icon_num, + .MANIFEST => .manifest_num, + .MENU => .menu, + .MESSAGETABLE => .messagetable, + .PLUGPLAY => .plugplay, + .RCDATA => .rcdata, + .STRING => .string_num, + .TOOLBAR => .toolbar, + .VERSION => .versioninfo, + .VXD => .vxd, + _ => .user_defined, + }; + } + + pub fn canUseRawData(resource: Resource) bool { + return switch (resource) { + .user_defined, + .html, + .plugplay, // Obsolete + .rcdata, + .vxd, // Obsolete + .manifest_num, + .dlginit, + => true, + else => false, + }; + } + + pub fn nameForErrorDisplay(resource: Resource) []const u8 { + return switch (resource) { + // zig fmt: off + .accelerators, .bitmap, .cursor, .dialog, .dialogex, .dlginclude, .dlginit, .font, + .html, .icon, .menu, .menuex, .messagetable, .plugplay, .rcdata, .stringtable, + .toolbar, .versioninfo, .vxd => @tagName(resource), + // zig fmt: on + .user_defined => "user-defined", + .cursor_num => std.fmt.comptimePrint("{d} (cursor)", .{@intFromEnum(res.RT.CURSOR)}), + .icon_num => std.fmt.comptimePrint("{d} (icon)", .{@intFromEnum(res.RT.ICON)}), + .string_num => std.fmt.comptimePrint("{d} (string)", .{@intFromEnum(res.RT.STRING)}), + .anicursor_num => std.fmt.comptimePrint("{d} (anicursor)", .{@intFromEnum(res.RT.ANICURSOR)}), + .aniicon_num => std.fmt.comptimePrint("{d} (aniicon)", .{@intFromEnum(res.RT.ANIICON)}), + .fontdir_num => std.fmt.comptimePrint("{d} (fontdir)", .{@intFromEnum(res.RT.FONTDIR)}), + .manifest_num => std.fmt.comptimePrint("{d} (manifest)", .{@intFromEnum(res.RT.MANIFEST)}), + }; + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/menurc/stringtable-resource#parameters +/// https://learn.microsoft.com/en-us/windows/win32/menurc/dialog-resource#parameters +/// https://learn.microsoft.com/en-us/windows/win32/menurc/dialogex-resource#parameters +pub const OptionalStatements = enum { + characteristics, + language, + version, + + // DIALOG + caption, + class, + exstyle, + font, + menu, + style, + + pub const map = std.ComptimeStringMapWithEql(OptionalStatements, .{ + .{ "CHARACTERISTICS", .characteristics }, + .{ "LANGUAGE", .language }, + .{ "VERSION", .version }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + pub const dialog_map = std.ComptimeStringMapWithEql(OptionalStatements, .{ + .{ "CAPTION", .caption }, + .{ "CLASS", .class }, + .{ "EXSTYLE", .exstyle }, + .{ "FONT", .font }, + .{ "MENU", .menu }, + .{ "STYLE", .style }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const Control = enum { + auto3state, + autocheckbox, + autoradiobutton, + checkbox, + combobox, + control, + ctext, + defpushbutton, + edittext, + hedit, + iedit, + groupbox, + icon, + listbox, + ltext, + pushbox, + pushbutton, + radiobutton, + rtext, + scrollbar, + state3, + userbutton, + + pub const map = std.ComptimeStringMapWithEql(Control, .{ + .{ "AUTO3STATE", .auto3state }, + .{ "AUTOCHECKBOX", .autocheckbox }, + .{ "AUTORADIOBUTTON", .autoradiobutton }, + .{ "CHECKBOX", .checkbox }, + .{ "COMBOBOX", .combobox }, + .{ "CONTROL", .control }, + .{ "CTEXT", .ctext }, + .{ "DEFPUSHBUTTON", .defpushbutton }, + .{ "EDITTEXT", .edittext }, + .{ "HEDIT", .hedit }, + .{ "IEDIT", .iedit }, + .{ "GROUPBOX", .groupbox }, + .{ "ICON", .icon }, + .{ "LISTBOX", .listbox }, + .{ "LTEXT", .ltext }, + .{ "PUSHBOX", .pushbox }, + .{ "PUSHBUTTON", .pushbutton }, + .{ "RADIOBUTTON", .radiobutton }, + .{ "RTEXT", .rtext }, + .{ "SCROLLBAR", .scrollbar }, + .{ "STATE3", .state3 }, + .{ "USERBUTTON", .userbutton }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + pub fn hasTextParam(control: Control) bool { + switch (control) { + .scrollbar, .listbox, .iedit, .hedit, .edittext, .combobox => return false, + else => return true, + } + } +}; + +pub const ControlClass = struct { + pub const map = std.ComptimeStringMapWithEql(res.ControlClass, .{ + .{ "BUTTON", .button }, + .{ "EDIT", .edit }, + .{ "STATIC", .static }, + .{ "LISTBOX", .listbox }, + .{ "SCROLLBAR", .scrollbar }, + .{ "COMBOBOX", .combobox }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + /// Like `map.get` but works on WTF16 strings, for use with parsed + /// string literals ("BUTTON", or even "\x42UTTON") + pub fn fromWideString(str: []const u16) ?res.ControlClass { + const utf16Literal = std.unicode.utf8ToUtf16LeStringLiteral; + return if (ascii.eqlIgnoreCaseW(str, utf16Literal("BUTTON"))) + .button + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("EDIT"))) + .edit + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("STATIC"))) + .static + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("LISTBOX"))) + .listbox + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("SCROLLBAR"))) + .scrollbar + else if (ascii.eqlIgnoreCaseW(str, utf16Literal("COMBOBOX"))) + .combobox + else + null; + } +}; + +const ascii = struct { + /// Compares ASCII values case-insensitively, non-ASCII values are compared directly + pub fn eqlIgnoreCaseW(a: []const u16, b: []const u16) bool { + if (a.len != b.len) return false; + for (a, b) |a_c, b_c| { + if (a_c < 128) { + if (std.ascii.toLower(@intCast(a_c)) != std.ascii.toLower(@intCast(b_c))) return false; + } else { + if (a_c != b_c) return false; + } + } + return true; + } +}; + +pub const MenuItem = enum { + menuitem, + popup, + + pub const map = std.ComptimeStringMapWithEql(MenuItem, .{ + .{ "MENUITEM", .menuitem }, + .{ "POPUP", .popup }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + + pub fn isSeparator(bytes: []const u8) bool { + return std.ascii.eqlIgnoreCase(bytes, "SEPARATOR"); + } + + pub const Option = enum { + checked, + grayed, + help, + inactive, + menubarbreak, + menubreak, + + pub const map = std.ComptimeStringMapWithEql(Option, .{ + .{ "CHECKED", .checked }, + .{ "GRAYED", .grayed }, + .{ "HELP", .help }, + .{ "INACTIVE", .inactive }, + .{ "MENUBARBREAK", .menubarbreak }, + .{ "MENUBREAK", .menubreak }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); + }; +}; + +pub const ToolbarButton = enum { + button, + separator, + + pub const map = std.ComptimeStringMapWithEql(ToolbarButton, .{ + .{ "BUTTON", .button }, + .{ "SEPARATOR", .separator }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const VersionInfo = enum { + file_version, + product_version, + file_flags_mask, + file_flags, + file_os, + file_type, + file_subtype, + + pub const map = std.ComptimeStringMapWithEql(VersionInfo, .{ + .{ "FILEVERSION", .file_version }, + .{ "PRODUCTVERSION", .product_version }, + .{ "FILEFLAGSMASK", .file_flags_mask }, + .{ "FILEFLAGS", .file_flags }, + .{ "FILEOS", .file_os }, + .{ "FILETYPE", .file_type }, + .{ "FILESUBTYPE", .file_subtype }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const VersionBlock = enum { + block, + value, + + pub const map = std.ComptimeStringMapWithEql(VersionBlock, .{ + .{ "BLOCK", .block }, + .{ "VALUE", .value }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +/// Keywords that are be the first token in a statement and (if so) dictate how the rest +/// of the statement is parsed. +pub const TopLevelKeywords = enum { + language, + version, + characteristics, + stringtable, + + pub const map = std.ComptimeStringMapWithEql(TopLevelKeywords, .{ + .{ "LANGUAGE", .language }, + .{ "VERSION", .version }, + .{ "CHARACTERISTICS", .characteristics }, + .{ "STRINGTABLE", .stringtable }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const CommonResourceAttributes = enum { + preload, + loadoncall, + fixed, + moveable, + discardable, + pure, + impure, + shared, + nonshared, + + pub const map = std.ComptimeStringMapWithEql(CommonResourceAttributes, .{ + .{ "PRELOAD", .preload }, + .{ "LOADONCALL", .loadoncall }, + .{ "FIXED", .fixed }, + .{ "MOVEABLE", .moveable }, + .{ "DISCARDABLE", .discardable }, + .{ "PURE", .pure }, + .{ "IMPURE", .impure }, + .{ "SHARED", .shared }, + .{ "NONSHARED", .nonshared }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; + +pub const AcceleratorTypeAndOptions = enum { + virtkey, + ascii, + noinvert, + alt, + shift, + control, + + pub const map = std.ComptimeStringMapWithEql(AcceleratorTypeAndOptions, .{ + .{ "VIRTKEY", .virtkey }, + .{ "ASCII", .ascii }, + .{ "NOINVERT", .noinvert }, + .{ "ALT", .alt }, + .{ "SHIFT", .shift }, + .{ "CONTROL", .control }, + }, std.comptime_string_map.eqlAsciiIgnoreCase); +}; diff --git a/src/resinator/res.zig b/src/resinator/res.zig new file mode 100644 index 000000000000..48edeeccbcf8 --- /dev/null +++ b/src/resinator/res.zig @@ -0,0 +1,1108 @@ +const std = @import("std"); +const rc = @import("rc.zig"); +const Resource = rc.Resource; +const CommonResourceAttributes = rc.CommonResourceAttributes; +const Allocator = std.mem.Allocator; +const windows1252 = @import("windows1252.zig"); +const CodePage = @import("code_pages.zig").CodePage; +const literals = @import("literals.zig"); +const SourceBytes = literals.SourceBytes; +const Codepoint = @import("code_pages.zig").Codepoint; +const lang = @import("lang.zig"); +const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit; + +/// https://learn.microsoft.com/en-us/windows/win32/menurc/resource-types +pub const RT = enum(u8) { + ACCELERATOR = 9, + ANICURSOR = 21, + ANIICON = 22, + BITMAP = 2, + CURSOR = 1, + DIALOG = 5, + DLGINCLUDE = 17, + DLGINIT = 240, + FONT = 8, + FONTDIR = 7, + GROUP_CURSOR = 1 + 11, // CURSOR + 11 + GROUP_ICON = 3 + 11, // ICON + 11 + HTML = 23, + ICON = 3, + MANIFEST = 24, + MENU = 4, + MESSAGETABLE = 11, + PLUGPLAY = 19, + RCDATA = 10, + STRING = 6, + TOOLBAR = 241, + VERSION = 16, + VXD = 20, + _, + + /// Returns null if the resource type is user-defined + /// Asserts that the resource is not `stringtable` + pub fn fromResource(resource: Resource) ?RT { + return switch (resource) { + .accelerators => .ACCELERATOR, + .bitmap => .BITMAP, + .cursor => .GROUP_CURSOR, + .dialog => .DIALOG, + .dialogex => .DIALOG, + .dlginclude => .DLGINCLUDE, + .dlginit => .DLGINIT, + .font => .FONT, + .html => .HTML, + .icon => .GROUP_ICON, + .menu => .MENU, + .menuex => .MENU, + .messagetable => .MESSAGETABLE, + .plugplay => .PLUGPLAY, + .rcdata => .RCDATA, + .stringtable => unreachable, + .toolbar => .TOOLBAR, + .user_defined => null, + .versioninfo => .VERSION, + .vxd => .VXD, + + .cursor_num => .CURSOR, + .icon_num => .ICON, + .string_num => .STRING, + .anicursor_num => .ANICURSOR, + .aniicon_num => .ANIICON, + .fontdir_num => .FONTDIR, + .manifest_num => .MANIFEST, + }; + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/menurc/common-resource-attributes +/// https://learn.microsoft.com/en-us/windows/win32/menurc/resourceheader +pub const MemoryFlags = packed struct(u16) { + value: u16, + + pub const MOVEABLE: u16 = 0x10; + // TODO: SHARED and PURE seem to be the same thing? Testing seems to confirm this but + // would like to find mention of it somewhere. + pub const SHARED: u16 = 0x20; + pub const PURE: u16 = 0x20; + pub const PRELOAD: u16 = 0x40; + pub const DISCARDABLE: u16 = 0x1000; + + /// Note: The defaults can have combinations that are not possible to specify within + /// an .rc file, as the .rc attributes imply other values (i.e. specifying + /// DISCARDABLE always implies MOVEABLE and PURE/SHARED, and yet RT_ICON + /// has a default of only MOVEABLE | DISCARDABLE). + pub fn defaults(predefined_resource_type: ?RT) MemoryFlags { + if (predefined_resource_type == null) { + return MemoryFlags{ .value = MOVEABLE | SHARED }; + } else { + return switch (predefined_resource_type.?) { + // zig fmt: off + .RCDATA, .BITMAP, .HTML, .MANIFEST, + .ACCELERATOR, .VERSION, .MESSAGETABLE, + .DLGINIT, .TOOLBAR, .PLUGPLAY, + .VXD, => MemoryFlags{ .value = MOVEABLE | SHARED }, + + .GROUP_ICON, .GROUP_CURSOR, + .STRING, .FONT, .DIALOG, .MENU, + .DLGINCLUDE, => MemoryFlags{ .value = MOVEABLE | SHARED | DISCARDABLE }, + + .ICON, .CURSOR, .ANIICON, .ANICURSOR => MemoryFlags{ .value = MOVEABLE | DISCARDABLE }, + .FONTDIR => MemoryFlags{ .value = MOVEABLE | PRELOAD }, + // zig fmt: on + // Same as predefined_resource_type == null + _ => return MemoryFlags{ .value = MOVEABLE | SHARED }, + }; + } + } + + pub fn set(self: *MemoryFlags, attribute: CommonResourceAttributes) void { + switch (attribute) { + .preload => self.value |= PRELOAD, + .loadoncall => self.value &= ~PRELOAD, + .moveable => self.value |= MOVEABLE, + .fixed => self.value &= ~(MOVEABLE | DISCARDABLE), + .shared => self.value |= SHARED, + .nonshared => self.value &= ~(SHARED | DISCARDABLE), + .pure => self.value |= PURE, + .impure => self.value &= ~(PURE | DISCARDABLE), + .discardable => self.value |= DISCARDABLE | MOVEABLE | PURE, + } + } + + pub fn setGroup(self: *MemoryFlags, attribute: CommonResourceAttributes, implied_shared_or_pure: bool) void { + switch (attribute) { + .preload => { + self.value |= PRELOAD; + if (implied_shared_or_pure) self.value &= ~SHARED; + }, + .loadoncall => { + self.value &= ~PRELOAD; + if (implied_shared_or_pure) self.value |= SHARED; + }, + else => self.set(attribute), + } + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers +pub const Language = packed struct(u16) { + // Note: This is the default no matter what locale the current system is set to, + // e.g. even if the system's locale is en-GB, en-US will still be the + // default language for resources in the Win32 rc compiler. + primary_language_id: u10 = lang.LANG_ENGLISH, + sublanguage_id: u6 = lang.SUBLANG_ENGLISH_US, + + /// Default language ID as a u16 + pub const default: u16 = (Language{}).asInt(); + + pub fn fromInt(int: u16) Language { + return @bitCast(int); + } + + pub fn asInt(self: Language) u16 { + return @bitCast(self); + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-dlgitemtemplate#remarks +pub const ControlClass = enum(u16) { + button = 0x80, + edit = 0x81, + static = 0x82, + listbox = 0x83, + scrollbar = 0x84, + combobox = 0x85, + + pub fn fromControl(control: rc.Control) ?ControlClass { + return switch (control) { + // zig fmt: off + .auto3state, .autocheckbox, .autoradiobutton, + .checkbox, .defpushbutton, .groupbox, .pushbox, + .pushbutton, .radiobutton, .state3, .userbutton => .button, + // zig fmt: on + .combobox => .combobox, + .control => null, + .ctext, .icon, .ltext, .rtext => .static, + .edittext, .hedit, .iedit => .edit, + .listbox => .listbox, + .scrollbar => .scrollbar, + }; + } + + pub fn getImpliedStyle(control: rc.Control) u32 { + var style = WS.CHILD | WS.VISIBLE; + switch (control) { + .auto3state => style |= BS.AUTO3STATE | WS.TABSTOP, + .autocheckbox => style |= BS.AUTOCHECKBOX | WS.TABSTOP, + .autoradiobutton => style |= BS.AUTORADIOBUTTON, + .checkbox => style |= BS.CHECKBOX | WS.TABSTOP, + .combobox => {}, + .control => {}, + .ctext => style |= SS.CENTER | WS.GROUP, + .defpushbutton => style |= BS.DEFPUSHBUTTON | WS.TABSTOP, + .edittext, .hedit, .iedit => style |= WS.TABSTOP | WS.BORDER, + .groupbox => style |= BS.GROUPBOX, + .icon => style |= SS.ICON, + .listbox => style |= LBS.NOTIFY | WS.BORDER, + .ltext => style |= WS.GROUP, + .pushbox => style |= BS.PUSHBOX | WS.TABSTOP, + .pushbutton => style |= WS.TABSTOP, + .radiobutton => style |= BS.RADIOBUTTON, + .rtext => style |= SS.RIGHT | WS.GROUP, + .scrollbar => {}, + .state3 => style |= BS.@"3STATE" | WS.TABSTOP, + .userbutton => style |= BS.USERBUTTON | WS.TABSTOP, + } + return style; + } +}; + +pub const NameOrOrdinal = union(enum) { + name: [:0]const u16, + ordinal: u16, + + pub fn deinit(self: NameOrOrdinal, allocator: Allocator) void { + switch (self) { + .name => |name| { + allocator.free(name); + }, + .ordinal => {}, + } + } + + /// Returns the full length of the amount of bytes that would be written by `write` + /// (e.g. for an ordinal it will return the length including the 0xFFFF indicator) + pub fn byteLen(self: NameOrOrdinal) usize { + switch (self) { + .name => |name| { + // + 1 for 0-terminated + return (name.len + 1) * @sizeOf(u16); + }, + .ordinal => return 4, + } + } + + pub fn write(self: NameOrOrdinal, writer: anytype) !void { + switch (self) { + .name => |name| { + for (name[0 .. name.len + 1]) |code_unit| { + try writer.writeIntLittle(u16, code_unit); + } + }, + .ordinal => |ordinal| { + try writer.writeIntLittle(u16, 0xffff); + try writer.writeIntLittle(u16, ordinal); + }, + } + } + + pub fn writeEmpty(writer: anytype) !void { + try writer.writeIntLittle(u16, 0); + } + + pub fn fromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal { + if (maybeOrdinalFromString(bytes)) |ordinal| { + return ordinal; + } + return nameFromString(allocator, bytes); + } + + pub fn nameFromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal { + // Names have a limit of 256 UTF-16 code units + null terminator + var buf = try std.ArrayList(u16).initCapacity(allocator, @min(257, bytes.slice.len)); + errdefer buf.deinit(); + + var i: usize = 0; + while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) { + if (buf.items.len == 256) break; + + const c = codepoint.value; + if (c == Codepoint.invalid) { + try buf.append(std.mem.nativeToLittle(u16, '�')); + } else if (c < 0x7F) { + // ASCII chars in names are always converted to uppercase + try buf.append(std.ascii.toUpper(@intCast(c))); + } else if (c < 0x10000) { + const short: u16 = @intCast(c); + try buf.append(std.mem.nativeToLittle(u16, short)); + } else { + const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800; + try buf.append(std.mem.nativeToLittle(u16, high)); + + // Note: This can cut-off in the middle of a UTF-16 surrogate pair, + // i.e. it can make the string end with an unpaired high surrogate + if (buf.items.len == 256) break; + + const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00; + try buf.append(std.mem.nativeToLittle(u16, low)); + } + } + + return NameOrOrdinal{ .name = try buf.toOwnedSliceSentinel(0) }; + } + + /// Returns `null` if the bytes do not form a valid number. + /// Does not allow non-ASCII digits (which the Win32 RC compiler does allow + /// in base 10 numbers, see `maybeNonAsciiOrdinalFromString`). + pub fn maybeOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal { + var buf = bytes.slice; + var radix: u8 = 10; + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + '0'...'9' => {}, + 'x', 'X' => { + radix = 16; + buf = buf[2..]; + // only the first 4 hex digits matter, anything else is ignored + // i.e. 0x12345 is treated as if it were 0x1234 + buf.len = @min(buf.len, 4); + }, + else => return null, + } + } + + var i: usize = 0; + var result: u16 = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + const digit: u8 = switch (c) { + 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch switch (radix) { + 10 => return null, + // non-hex-digits are treated as a terminator rather than invalidating + // the number (note: if there are no valid hex digits then the result + // will be zero which is not treated as a valid number) + 16 => break, + else => unreachable, + }, + else => if (radix == 10) return null else break, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + // Anything that resolves to zero is not interpretted as a number + if (result == 0) return null; + return NameOrOrdinal{ .ordinal = result }; + } + + /// The Win32 RC compiler uses `iswdigit` for digit detection for base 10 + /// numbers, which means that non-ASCII digits are 'accepted' but handled + /// in a totally unintuitive manner, leading to arbitrary results. + /// + /// This function will return the value that such an ordinal 'would' have + /// if it was run through the Win32 RC compiler. This allows us to disallow + /// non-ASCII digits in number literals but still detect when the Win32 + /// RC compiler would have allowed them, so that a proper warning/error + /// can be emitted. + pub fn maybeNonAsciiOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal { + var buf = bytes.slice; + const radix = 10; + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + // We only care about base 10 numbers here + 'x', 'X' => return null, + else => {}, + } + } + + var i: usize = 0; + var result: u16 = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + const digit: u16 = digit: { + const is_digit = (c >= '0' and c <= '9') or isNonAsciiDigit(c); + if (!is_digit) return null; + break :digit @intCast(c - '0'); + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + // Anything that resolves to zero is not interpretted as a number + if (result == 0) return null; + return NameOrOrdinal{ .ordinal = result }; + } + + pub fn predefinedResourceType(self: NameOrOrdinal) ?RT { + switch (self) { + .ordinal => |ordinal| { + if (ordinal >= 256) return null; + switch (@as(RT, @enumFromInt(ordinal))) { + .ACCELERATOR, + .ANICURSOR, + .ANIICON, + .BITMAP, + .CURSOR, + .DIALOG, + .DLGINCLUDE, + .DLGINIT, + .FONT, + .FONTDIR, + .GROUP_CURSOR, + .GROUP_ICON, + .HTML, + .ICON, + .MANIFEST, + .MENU, + .MESSAGETABLE, + .PLUGPLAY, + .RCDATA, + .STRING, + .TOOLBAR, + .VERSION, + .VXD, + => |rt| return rt, + _ => return null, + } + }, + .name => return null, + } + } +}; + +fn expectNameOrOrdinal(expected: NameOrOrdinal, actual: NameOrOrdinal) !void { + switch (expected) { + .name => { + if (actual != .name) return error.TestExpectedEqual; + try std.testing.expectEqualSlices(u16, expected.name, actual.name); + }, + .ordinal => { + if (actual != .ordinal) return error.TestExpectedEqual; + try std.testing.expectEqual(expected.ordinal, actual.ordinal); + }, + } +} + +test "NameOrOrdinal" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const allocator = arena.allocator(); + + // zero is treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0", .code_page = .windows1252 }), + ); + // any non-digit byte invalidates the number + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1A") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1a", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1ÿ") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1\xff", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1€") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1€", .code_page = .utf8 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1�") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1\x80", .code_page = .utf8 }), + ); + // same with overflow that resolves to 0 + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("65536") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "65536", .code_page = .windows1252 }), + ); + // hex zero is also treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0X0") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x0", .code_page = .windows1252 }), + ); + // hex numbers work + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x100 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x100", .code_page = .windows1252 }), + ); + // only the first 4 hex digits matter + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x1234 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0X12345", .code_page = .windows1252 }), + ); + // octal is not supported so it gets treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0O1234") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0o1234", .code_page = .windows1252 }), + ); + // overflow wraps + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = @truncate(65635) }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "65635", .code_page = .windows1252 }), + ); + // non-hex-digits in a hex literal are treated as a terminator + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x4 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x4n", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0xFA }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0xFAZ92348", .code_page = .windows1252 }), + ); + // 0 at the start is allowed + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 50 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "050", .code_page = .windows1252 }), + ); + // limit of 256 UTF-16 code units, can cut off between a surrogate pair + { + var expected = blk: { + // the input before the 𐐷 character, but uppercased + var expected_u8_bytes = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528QFFL7SHNSIETG0QKLR1UYPBTUV1PMFQRRA0VJDG354GQEDJMUPGPP1W1EXVNTZVEIZ6K3IPQM1AWGEYALMEODYVEZGOD3MFMGEY8FNR4JUETTB1PZDEWSNDRGZUA8SNXP3NGO"; + var buf: [256:0]u16 = undefined; + for (expected_u8_bytes, 0..) |byte, i| { + buf[i] = byte; + } + // surrogate pair that is now orphaned + buf[255] = 0xD801; + break :blk buf; + }; + try expectNameOrOrdinal( + NameOrOrdinal{ .name = &expected }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528qffL7ShnSIETg0qkLr1UYpbtuv1PMFQRRa0VjDG354GQedJmUPgpp1w1ExVnTzVEiz6K3iPqM1AWGeYALmeODyvEZGOD3MfmGey8fnR4jUeTtB1PzdeWsNDrGzuA8Snxp3NGO𐐷", + .code_page = .utf8, + }), + ); + } +} + +test "NameOrOrdinal code page awareness" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const allocator = arena.allocator(); + + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("��𐐷") }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "\xF0\x80\x80𐐷", + .code_page = .utf8, + }), + ); + try expectNameOrOrdinal( + // The UTF-8 representation of 𐐷 is 0xF0 0x90 0x90 0xB7. In order to provide valid + // UTF-8 to utf8ToUtf16LeStringLiteral, it uses the UTF-8 representation of the codepoint + // which is 0xC2 0x90. The code units in the expected UTF-16 string are: + // { 0x00F0, 0x20AC, 0x20AC, 0x00F0, 0x0090, 0x0090, 0x00B7 } + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("ð€€ð\xC2\x90\xC2\x90·") }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "\xF0\x80\x80𐐷", + .code_page = .windows1252, + }), + ); +} + +/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-accel#members +/// https://devblogs.microsoft.com/oldnewthing/20070316-00/?p=27593 +pub const AcceleratorModifiers = struct { + value: u8 = 0, + explicit_ascii_or_virtkey: bool = false, + + pub const ASCII = 0; + pub const VIRTKEY = 1; + pub const NOINVERT = 1 << 1; + pub const SHIFT = 1 << 2; + pub const CONTROL = 1 << 3; + pub const ALT = 1 << 4; + /// Marker for the last accelerator in an accelerator table + pub const last_accelerator_in_table = 1 << 7; + + pub fn apply(self: *AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) void { + if (modifier == .ascii or modifier == .virtkey) self.explicit_ascii_or_virtkey = true; + self.value |= modifierValue(modifier); + } + + pub fn isSet(self: AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) bool { + // ASCII is set whenever VIRTKEY is not + if (modifier == .ascii) return self.value & modifierValue(.virtkey) == 0; + return self.value & modifierValue(modifier) != 0; + } + + fn modifierValue(modifier: rc.AcceleratorTypeAndOptions) u8 { + return switch (modifier) { + .ascii => ASCII, + .virtkey => VIRTKEY, + .noinvert => NOINVERT, + .shift => SHIFT, + .control => CONTROL, + .alt => ALT, + }; + } + + pub fn markLast(self: *AcceleratorModifiers) void { + self.value |= last_accelerator_in_table; + } +}; + +const AcceleratorKeyCodepointTranslator = struct { + string_type: literals.StringType, + + pub fn translate(self: @This(), maybe_parsed: ?literals.IterativeStringParser.ParsedCodepoint) ?u21 { + const parsed = maybe_parsed orelse return null; + if (parsed.codepoint == Codepoint.invalid) return 0xFFFD; + if (parsed.from_escaped_integer and self.string_type == .ascii) { + return windows1252.toCodepoint(@intCast(parsed.codepoint)); + } + return parsed.codepoint; + } +}; + +pub const ParseAcceleratorKeyStringError = error{ EmptyAccelerator, AcceleratorTooLong, InvalidControlCharacter, ControlCharacterOutOfRange }; + +/// Expects bytes to be the full bytes of a string literal token (e.g. including the "" or L""). +pub fn parseAcceleratorKeyString(bytes: SourceBytes, is_virt: bool, options: literals.StringParseOptions) (ParseAcceleratorKeyStringError || Allocator.Error)!u16 { + if (bytes.slice.len == 0) { + return error.EmptyAccelerator; + } + + var parser = literals.IterativeStringParser.init(bytes, options); + var translator = AcceleratorKeyCodepointTranslator{ .string_type = parser.declared_string_type }; + + const first_codepoint = translator.translate(try parser.next()) orelse return error.EmptyAccelerator; + // 0 is treated as a terminator, so this is equivalent to an empty string + if (first_codepoint == 0) return error.EmptyAccelerator; + + if (first_codepoint == '^') { + // Note: Emitting this warning unconditonally whenever ^ is the first character + // matches the Win32 RC behavior, but it's questionable whether or not + // the warning should be emitted for ^^ since that results in the ASCII + // character ^ being written to the .res. + if (is_virt and options.diagnostics != null) { + try options.diagnostics.?.diagnostics.append(.{ + .err = .ascii_character_not_equivalent_to_virtual_key_code, + .type = .warning, + .token = options.diagnostics.?.token, + }); + } + + const c = translator.translate(try parser.next()) orelse return error.InvalidControlCharacter; + switch (c) { + '^' => return '^', // special case + 'a'...'z', 'A'...'Z' => return std.ascii.toUpper(@intCast(c)) - 0x40, + // Note: The Windows RC compiler allows more than just A-Z, but what it allows + // seems to be tied to some sort of Unicode-aware 'is character' function or something. + // The full list of codepoints that trigger an out-of-range error can be found here: + // https://gist.github.com/squeek502/2e9d0a4728a83eed074ad9785a209fd0 + // For codepoints >= 0x80 that don't trigger the error, the Windows RC compiler takes the + // codepoint and does the `- 0x40` transformation as if it were A-Z which couldn't lead + // to anything useable, so there's no point in emulating that behavior--erroring for + // all non-[a-zA-Z] makes much more sense and is what was probably intended by the + // Windows RC compiler. + else => return error.ControlCharacterOutOfRange, + } + @compileError("this should be unreachable"); + } + + const second_codepoint = translator.translate(try parser.next()); + + var result: u32 = initial_value: { + if (first_codepoint >= 0x10000) { + if (second_codepoint != null and second_codepoint.? != 0) return error.AcceleratorTooLong; + // No idea why it works this way, but this seems to match the Windows RC + // behavior for codepoints >= 0x10000 + const low = @as(u16, @intCast(first_codepoint & 0x3FF)) + 0xDC00; + const extra = (first_codepoint - 0x10000) / 0x400; + break :initial_value low + extra * 0x100; + } + break :initial_value first_codepoint; + }; + + // 0 is treated as a terminator + if (second_codepoint != null and second_codepoint.? == 0) return @truncate(result); + + const third_codepoint = translator.translate(try parser.next()); + // 0 is treated as a terminator, so a 0 in the third position is fine but + // anything else is too many codepoints for an accelerator + if (third_codepoint != null and third_codepoint.? != 0) return error.AcceleratorTooLong; + + if (second_codepoint) |c| { + if (c >= 0x10000) return error.AcceleratorTooLong; + result <<= 8; + result += c; + } else if (is_virt) { + switch (result) { + 'a'...'z' => result -= 0x20, // toUpper + else => {}, + } + } + return @truncate(result); +} + +test "accelerator keys" { + try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString( + .{ .slice = "\"^a\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString( + .{ .slice = "\"^A\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 26), try parseAcceleratorKeyString( + .{ .slice = "\"^Z\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, '^'), try parseAcceleratorKeyString( + .{ .slice = "\"^^\"", .code_page = .windows1252 }, + false, + .{}, + )); + + try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString( + .{ .slice = "\"a\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x6162), try parseAcceleratorKeyString( + .{ .slice = "\"ab\"", .code_page = .windows1252 }, + false, + .{}, + )); + + try std.testing.expectEqual(@as(u16, 'C'), try parseAcceleratorKeyString( + .{ .slice = "\"c\"", .code_page = .windows1252 }, + true, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x6363), try parseAcceleratorKeyString( + .{ .slice = "\"cc\"", .code_page = .windows1252 }, + true, + .{}, + )); + + // \x00 or any escape that evaluates to zero acts as a terminator, everything past it + // is ignored + try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString( + .{ .slice = "\"a\\0bcdef\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // \x80 is € in Windows-1252, which is Unicode codepoint 20AC + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // This depends on the code page, though, with codepage 65001, \x80 + // on its own is invalid UTF-8 so it gets converted to the replacement character + try std.testing.expectEqual(@as(u16, 0xFFFD), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // This also behaves the same with escaped characters + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // Even with utf8 code page + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // Wide string with the actual characters behaves like the ASCII string version + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "L\"\x80\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // But wide string with escapes behaves differently + try std.testing.expectEqual(@as(u16, 0x8080), try parseAcceleratorKeyString( + .{ .slice = "L\"\\x80\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // and invalid escapes within wide strings get skipped + try std.testing.expectEqual(@as(u16, 'z'), try parseAcceleratorKeyString( + .{ .slice = "L\"\\Hz\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // any non-A-Z codepoints are illegal + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^\x83\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^1\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.InvalidControlCharacter, parseAcceleratorKeyString( + .{ .slice = "\"^\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.EmptyAccelerator, parseAcceleratorKeyString( + .{ .slice = "\"\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"hello\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // Invalid UTF-8 gets converted to 0xFFFD, multiple invalids get shifted and added together + // The behavior is the same for ascii and wide strings + try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString( + .{ .slice = "L\"\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + + // Codepoints >= 0x10000 + try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString( + .{ .slice = "\"\xF0\x90\x84\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString( + .{ .slice = "L\"\xF0\x90\x84\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x9C01), try parseAcceleratorKeyString( + .{ .slice = "\"\xF4\x80\x80\x81\"", .code_page = .utf8 }, + false, + .{}, + )); + // anything before or after a codepoint >= 0x10000 causes an error + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"a\xF0\x90\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"\xF0\x90\x80\x80a\"", .code_page = .utf8 }, + false, + .{}, + )); +} + +pub const ForcedOrdinal = struct { + pub fn fromBytes(bytes: SourceBytes) u16 { + var i: usize = 0; + var result: u21 = 0; + while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) { + const c = switch (codepoint.value) { + // Codepoints that would need a surrogate pair in UTF-16 are + // broken up into their UTF-16 code units and each code unit + // is interpreted as a digit. + 0x10000...0x10FFFF => { + const high = @as(u16, @intCast((codepoint.value - 0x10000) >> 10)) + 0xD800; + if (result != 0) result *%= 10; + result +%= high -% '0'; + + const low = @as(u16, @intCast(codepoint.value & 0x3FF)) + 0xDC00; + if (result != 0) result *%= 10; + result +%= low -% '0'; + continue; + }, + Codepoint.invalid => 0xFFFD, + else => codepoint.value, + }; + if (result != 0) result *%= 10; + result +%= c -% '0'; + } + return @truncate(result); + } + + pub fn fromUtf16Le(utf16: [:0]const u16) u16 { + var result: u16 = 0; + for (utf16) |code_unit| { + if (result != 0) result *%= 10; + result +%= code_unit -% '0'; + } + return result; + } +}; + +test "forced ordinal" { + try std.testing.expectEqual(@as(u16, 3200), ForcedOrdinal.fromBytes(.{ .slice = "3200", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 0x33), ForcedOrdinal.fromBytes(.{ .slice = "1+1", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 65531), ForcedOrdinal.fromBytes(.{ .slice = "1!", .code_page = .windows1252 })); + + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0\x8C", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0Œ", .code_page = .utf8 })); + + // invalid UTF-8 gets converted to 0xFFFD (replacement char) and then interpreted as a digit + try std.testing.expectEqual(@as(u16, 0xFFCD), ForcedOrdinal.fromBytes(.{ .slice = "0\x81", .code_page = .utf8 })); + // codepoints >= 0x10000 + try std.testing.expectEqual(@as(u16, 0x49F2), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10002}", .code_page = .utf8 })); + try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10100}", .code_page = .utf8 })); + + // From UTF-16 + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromUtf16Le(&[_:0]u16{ '0', 'Œ' })); + try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromUtf16Le(std.unicode.utf8ToUtf16LeStringLiteral("0\u{10100}"))); +} + +/// https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo +pub const FixedFileInfo = struct { + file_version: Version = .{}, + product_version: Version = .{}, + file_flags_mask: u32 = 0, + file_flags: u32 = 0, + file_os: u32 = 0, + file_type: u32 = 0, + file_subtype: u32 = 0, + file_date: Version = .{}, // TODO: I think this is always all zeroes? + + pub const signature = 0xFEEF04BD; + // Note: This corresponds to a version of 1.0 + pub const version = 0x00010000; + + pub const byte_len = 0x34; + pub const key = std.unicode.utf8ToUtf16LeStringLiteral("VS_VERSION_INFO"); + + pub const Version = struct { + parts: [4]u16 = [_]u16{0} ** 4, + + pub fn mostSignificantCombinedParts(self: Version) u32 { + return (@as(u32, self.parts[0]) << 16) + self.parts[1]; + } + + pub fn leastSignificantCombinedParts(self: Version) u32 { + return (@as(u32, self.parts[2]) << 16) + self.parts[3]; + } + }; + + pub fn write(self: FixedFileInfo, writer: anytype) !void { + try writer.writeIntLittle(u32, signature); + try writer.writeIntLittle(u32, version); + try writer.writeIntLittle(u32, self.file_version.mostSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.file_version.leastSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.product_version.mostSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.product_version.leastSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.file_flags_mask); + try writer.writeIntLittle(u32, self.file_flags); + try writer.writeIntLittle(u32, self.file_os); + try writer.writeIntLittle(u32, self.file_type); + try writer.writeIntLittle(u32, self.file_subtype); + try writer.writeIntLittle(u32, self.file_date.mostSignificantCombinedParts()); + try writer.writeIntLittle(u32, self.file_date.leastSignificantCombinedParts()); + } +}; + +test "FixedFileInfo.Version" { + const version = FixedFileInfo.Version{ + .parts = .{ 1, 2, 3, 4 }, + }; + try std.testing.expectEqual(@as(u32, 0x00010002), version.mostSignificantCombinedParts()); + try std.testing.expectEqual(@as(u32, 0x00030004), version.leastSignificantCombinedParts()); +} + +pub const VersionNode = struct { + pub const type_string: u16 = 1; + pub const type_binary: u16 = 0; +}; + +pub const MenuItemFlags = struct { + value: u16 = 0, + + pub fn apply(self: *MenuItemFlags, option: rc.MenuItem.Option) void { + self.value |= optionValue(option); + } + + pub fn isSet(self: MenuItemFlags, option: rc.MenuItem.Option) bool { + return self.value & optionValue(option) != 0; + } + + fn optionValue(option: rc.MenuItem.Option) u16 { + return @intCast(switch (option) { + .checked => MF.CHECKED, + .grayed => MF.GRAYED, + .help => MF.HELP, + .inactive => MF.DISABLED, + .menubarbreak => MF.MENUBARBREAK, + .menubreak => MF.MENUBREAK, + }); + } + + pub fn markLast(self: *MenuItemFlags) void { + self.value |= @intCast(MF.END); + } +}; + +/// Menu Flags from WinUser.h +/// This is not complete, it only contains what is needed +pub const MF = struct { + pub const GRAYED: u32 = 0x00000001; + pub const DISABLED: u32 = 0x00000002; + pub const CHECKED: u32 = 0x00000008; + pub const POPUP: u32 = 0x00000010; + pub const MENUBARBREAK: u32 = 0x00000020; + pub const MENUBREAK: u32 = 0x00000040; + pub const HELP: u32 = 0x00004000; + pub const END: u32 = 0x00000080; +}; + +/// Window Styles from WinUser.h +pub const WS = struct { + pub const OVERLAPPED: u32 = 0x00000000; + pub const POPUP: u32 = 0x80000000; + pub const CHILD: u32 = 0x40000000; + pub const MINIMIZE: u32 = 0x20000000; + pub const VISIBLE: u32 = 0x10000000; + pub const DISABLED: u32 = 0x08000000; + pub const CLIPSIBLINGS: u32 = 0x04000000; + pub const CLIPCHILDREN: u32 = 0x02000000; + pub const MAXIMIZE: u32 = 0x01000000; + pub const CAPTION: u32 = BORDER | DLGFRAME; + pub const BORDER: u32 = 0x00800000; + pub const DLGFRAME: u32 = 0x00400000; + pub const VSCROLL: u32 = 0x00200000; + pub const HSCROLL: u32 = 0x00100000; + pub const SYSMENU: u32 = 0x00080000; + pub const THICKFRAME: u32 = 0x00040000; + pub const GROUP: u32 = 0x00020000; + pub const TABSTOP: u32 = 0x00010000; + + pub const MINIMIZEBOX: u32 = 0x00020000; + pub const MAXIMIZEBOX: u32 = 0x00010000; + + pub const TILED: u32 = OVERLAPPED; + pub const ICONIC: u32 = MINIMIZE; + pub const SIZEBOX: u32 = THICKFRAME; + pub const TILEDWINDOW: u32 = OVERLAPPEDWINDOW; + + // Common Window Styles + pub const OVERLAPPEDWINDOW: u32 = OVERLAPPED | CAPTION | SYSMENU | THICKFRAME | MINIMIZEBOX | MAXIMIZEBOX; + pub const POPUPWINDOW: u32 = POPUP | BORDER | SYSMENU; + pub const CHILDWINDOW: u32 = CHILD; +}; + +/// Dialog Box Template Styles from WinUser.h +pub const DS = struct { + pub const SETFONT: u32 = 0x40; +}; + +/// Button Control Styles from WinUser.h +/// This is not complete, it only contains what is needed +pub const BS = struct { + pub const PUSHBUTTON: u32 = 0x00000000; + pub const DEFPUSHBUTTON: u32 = 0x00000001; + pub const CHECKBOX: u32 = 0x00000002; + pub const AUTOCHECKBOX: u32 = 0x00000003; + pub const RADIOBUTTON: u32 = 0x00000004; + pub const @"3STATE": u32 = 0x00000005; + pub const AUTO3STATE: u32 = 0x00000006; + pub const GROUPBOX: u32 = 0x00000007; + pub const USERBUTTON: u32 = 0x00000008; + pub const AUTORADIOBUTTON: u32 = 0x00000009; + pub const PUSHBOX: u32 = 0x0000000A; + pub const OWNERDRAW: u32 = 0x0000000B; + pub const TYPEMASK: u32 = 0x0000000F; + pub const LEFTTEXT: u32 = 0x00000020; +}; + +/// Static Control Constants from WinUser.h +/// This is not complete, it only contains what is needed +pub const SS = struct { + pub const LEFT: u32 = 0x00000000; + pub const CENTER: u32 = 0x00000001; + pub const RIGHT: u32 = 0x00000002; + pub const ICON: u32 = 0x00000003; +}; + +/// Listbox Styles from WinUser.h +/// This is not complete, it only contains what is needed +pub const LBS = struct { + pub const NOTIFY: u32 = 0x0001; +}; diff --git a/src/resinator/source_mapping.zig b/src/resinator/source_mapping.zig new file mode 100644 index 000000000000..babd41295b99 --- /dev/null +++ b/src/resinator/source_mapping.zig @@ -0,0 +1,684 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter; +const parseQuotedAsciiString = @import("literals.zig").parseQuotedAsciiString; +const lex = @import("lex.zig"); + +pub const ParseLineCommandsResult = struct { + result: []u8, + mappings: SourceMappings, +}; + +const CurrentMapping = struct { + line_num: usize = 1, + filename: std.ArrayListUnmanaged(u8) = .{}, + pending: bool = true, + ignore_contents: bool = false, +}; + +pub const ParseAndRemoveLineCommandsOptions = struct { + initial_filename: ?[]const u8 = null, +}; + +/// Parses and removes #line commands as well as all source code that is within a file +/// with .c or .h extensions. +/// +/// > RC treats files with the .c and .h extensions in a special manner. It +/// > assumes that a file with one of these extensions does not contain +/// > resources. If a file has the .c or .h file name extension, RC ignores all +/// > lines in the file except the preprocessor directives. Therefore, to +/// > include a file that contains resources in another resource script, give +/// > the file to be included an extension other than .c or .h. +/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives +/// +/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping +/// between the lines and their corresponding lines in their original files. +/// +/// `buf` must be at least as long as `source` +/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) +/// +/// If `options.initial_filename` is provided, that filename is guaranteed to be +/// within the `mappings.files` table and `root_filename_offset` will be set appropriately. +pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { + var parse_result = ParseLineCommandsResult{ + .result = undefined, + .mappings = .{}, + }; + errdefer parse_result.mappings.deinit(allocator); + + var current_mapping: CurrentMapping = .{}; + defer current_mapping.filename.deinit(allocator); + + if (options.initial_filename) |initial_filename| { + try current_mapping.filename.appendSlice(allocator, initial_filename); + parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename); + } + + std.debug.assert(buf.len >= source.len); + var result = UncheckedSliceWriter{ .slice = buf }; + const State = enum { + line_start, + preprocessor, + non_preprocessor, + }; + var state: State = .line_start; + var index: usize = 0; + var pending_start: ?usize = null; + var preprocessor_start: usize = 0; + var line_number: usize = 1; + while (index < source.len) : (index += 1) { + const c = source[index]; + switch (state) { + .line_start => switch (c) { + '#' => { + preprocessor_start = index; + state = .preprocessor; + if (pending_start == null) { + pending_start = index; + } + }, + '\r', '\n' => { + const is_crlf = formsLineEndingPair(source, c, index + 1); + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + if (!current_mapping.ignore_contents) { + result.write(c); + if (is_crlf) result.write(source[index + 1]); + line_number += 1; + } + if (is_crlf) index += 1; + pending_start = null; + }, + ' ', '\t', '\x0b', '\x0c' => { + if (pending_start == null) { + pending_start = index; + } + }, + else => { + state = .non_preprocessor; + if (pending_start != null) { + if (!current_mapping.ignore_contents) { + result.writeSlice(source[pending_start.? .. index + 1]); + } + pending_start = null; + continue; + } + if (!current_mapping.ignore_contents) { + result.write(c); + } + }, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + // Now that we have the full line we can decide what to do with it + const preprocessor_str = source[preprocessor_start..index]; + const is_crlf = formsLineEndingPair(source, c, index + 1); + if (std.mem.startsWith(u8, preprocessor_str, "#line")) { + try handleLineCommand(allocator, preprocessor_str, ¤t_mapping); + } else { + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + if (!current_mapping.ignore_contents) { + const line_ending_len: usize = if (is_crlf) 2 else 1; + result.writeSlice(source[pending_start.? .. index + line_ending_len]); + line_number += 1; + } + } + if (is_crlf) index += 1; + state = .line_start; + pending_start = null; + }, + else => {}, + }, + .non_preprocessor => switch (c) { + '\r', '\n' => { + const is_crlf = formsLineEndingPair(source, c, index + 1); + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + if (!current_mapping.ignore_contents) { + result.write(c); + if (is_crlf) result.write(source[index + 1]); + line_number += 1; + } + if (is_crlf) index += 1; + state = .line_start; + pending_start = null; + }, + else => { + if (!current_mapping.ignore_contents) { + result.write(c); + } + }, + }, + } + } else { + switch (state) { + .line_start => {}, + .non_preprocessor => { + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + }, + .preprocessor => { + // Now that we have the full line we can decide what to do with it + const preprocessor_str = source[preprocessor_start..index]; + if (std.mem.startsWith(u8, preprocessor_str, "#line")) { + try handleLineCommand(allocator, preprocessor_str, ¤t_mapping); + } else { + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + if (!current_mapping.ignore_contents) { + result.writeSlice(source[pending_start.?..index]); + } + } + }, + } + } + + parse_result.result = result.getWritten(); + + // Remove whitespace from the end of the result. This avoids issues when the + // preprocessor adds a newline to the end of the file, since then the + // post-preprocessed source could have more lines than the corresponding input source and + // the inserted line can't be mapped to any lines in the original file. + // There's no way that whitespace at the end of a file can affect the parsing + // of the RC script so this is okay to do unconditionally. + // TODO: There might be a better way around this + while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) { + parse_result.result.len -= 1; + } + + // If there have been no line mappings at all, then we're dealing with an empty file. + // In this case, we want to fake a line mapping just so that we return something + // that is useable in the same way that a non-empty mapping would be. + if (parse_result.mappings.mapping.items.len == 0) { + try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); + } + + return parse_result; +} + +/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair +pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool { + if (next_index >= source.len) return false; + + const next_ending = source[next_index]; + if (next_ending != '\r' and next_ending != '\n') return false; + + // can't be \n\n or \r\r + if (line_ending == next_ending) return false; + + return true; +} + +pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void { + const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items); + + try mapping.set(allocator, post_processed_line_number, .{ + .start_line = current_mapping.line_num, + .end_line = current_mapping.line_num, + .filename_offset = filename_offset, + }); + + current_mapping.line_num += 1; + current_mapping.pending = false; +} + +// TODO: Might want to provide diagnostics on invalid line commands instead of just returning +pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{OutOfMemory}!void { + // TODO: Are there other whitespace characters that should be included? + var tokenizer = std.mem.tokenize(u8, line_command, " \t"); + const line_directive = tokenizer.next() orelse return; // #line + if (!std.mem.eql(u8, line_directive, "#line")) return; + const linenum_str = tokenizer.next() orelse return; + const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return; + + var filename_literal = tokenizer.rest(); + while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) { + filename_literal.len -= 1; + } + if (filename_literal.len < 2) return; + const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"'; + if (!is_quoted) return; + const filename = parseFilename(allocator, filename_literal[1 .. filename_literal.len - 1]) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return, + }; + defer allocator.free(filename); + + current_mapping.line_num = linenum; + current_mapping.filename.clearRetainingCapacity(); + try current_mapping.filename.appendSlice(allocator, filename); + current_mapping.pending = true; + current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h"); +} + +pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { + var buf = try allocator.alloc(u8, source.len); + errdefer allocator.free(buf); + var result = try parseAndRemoveLineCommands(allocator, source, buf, options); + result.result = try allocator.realloc(buf, result.result.len); + return result; +} + +/// C-style string parsing with a few caveats: +/// - The str cannot contain newlines or carriage returns +/// - Hex and octal escape are limited to u8 +/// - No handling/support for L, u, or U prefixed strings +/// - The start and end double quotes should be omitted from the `str` +/// - Other than the above, does not assume any validity of the strings (i.e. there +/// may be unescaped double quotes within the str) and will return error.InvalidString +/// on any problems found. +/// +/// The result is a UTF-8 encoded string. +fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 { + const State = enum { + string, + escape, + escape_hex, + escape_octal, + escape_u, + }; + + var filename = try std.ArrayList(u8).initCapacity(allocator, str.len); + errdefer filename.deinit(); + var state: State = .string; + var index: usize = 0; + var escape_len: usize = undefined; + var escape_val: u64 = undefined; + var escape_expected_len: u8 = undefined; + while (index < str.len) : (index += 1) { + const c = str[index]; + switch (state) { + .string => switch (c) { + '\\' => state = .escape, + '"' => return error.InvalidString, + else => filename.appendAssumeCapacity(c), + }, + .escape => switch (c) { + '\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => { + const escaped_c = switch (c) { + '\'', '"', '\\', '?' => c, + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'a' => '\x07', + 'b' => '\x08', + 'e' => '\x1b', // non-standard + 'f' => '\x0c', + 'v' => '\x0b', + else => unreachable, + }; + filename.appendAssumeCapacity(escaped_c); + state = .string; + }, + 'x' => { + escape_val = 0; + escape_len = 0; + state = .escape_hex; + }, + '0'...'7' => { + escape_val = std.fmt.charToDigit(c, 8) catch unreachable; + escape_len = 1; + state = .escape_octal; + }, + 'u' => { + escape_val = 0; + escape_len = 0; + state = .escape_u; + escape_expected_len = 4; + }, + 'U' => { + escape_val = 0; + escape_len = 0; + state = .escape_u; + escape_expected_len = 8; + }, + else => return error.InvalidString, + }, + .escape_hex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + const digit = std.fmt.charToDigit(c, 16) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString; + escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + }, + else => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + index -= 1; // reconsume + }, + }, + .escape_octal => switch (c) { + '0'...'7' => { + const digit = std.fmt.charToDigit(c, 8) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString; + escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + if (escape_len == 3) { + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + } + }, + else => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + index -= 1; // reconsume + }, + }, + .escape_u => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + const digit = std.fmt.charToDigit(c, 16) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString; + escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + if (escape_len == escape_expected_len) { + var buf: [4]u8 = undefined; + const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString; + filename.appendSliceAssumeCapacity(buf[0..utf8_len]); + state = .string; + } + }, + // Requires escape_expected_len valid hex digits + else => return error.InvalidString, + }, + } + } else { + switch (state) { + .string => {}, + .escape, .escape_u => return error.InvalidString, + .escape_hex => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + }, + .escape_octal => { + filename.appendAssumeCapacity(@intCast(escape_val)); + }, + } + } + + return filename.toOwnedSlice(); +} + +fn testParseFilename(expected: []const u8, input: []const u8) !void { + const parsed = try parseFilename(std.testing.allocator, input); + defer std.testing.allocator.free(parsed); + + return std.testing.expectEqualSlices(u8, expected, parsed); +} + +test parseFilename { + try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11"); + try testParseFilename("\xABz\x53", "\\xABz\\123"); + try testParseFilename("⚡⚡", "\\u26A1\\U000026A1"); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\"")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777")); +} + +pub const SourceMappings = struct { + /// line number -> span where the index is (line number - 1) + mapping: std.ArrayListUnmanaged(SourceSpan) = .{}, + files: StringTable = .{}, + /// The default assumes that the first filename added is the root file. + /// The value should be set to the correct offset if that assumption does not hold. + root_filename_offset: u32 = 0, + + pub const SourceSpan = struct { + start_line: usize, + end_line: usize, + filename_offset: u32, + }; + + pub fn deinit(self: *SourceMappings, allocator: Allocator) void { + self.files.deinit(allocator); + self.mapping.deinit(allocator); + } + + pub fn set(self: *SourceMappings, allocator: Allocator, line_num: usize, span: SourceSpan) !void { + var ptr = try self.expandAndGet(allocator, line_num); + ptr.* = span; + } + + pub fn has(self: *SourceMappings, line_num: usize) bool { + return self.mapping.items.len >= line_num; + } + + /// Note: `line_num` is 1-indexed + pub fn get(self: SourceMappings, line_num: usize) SourceSpan { + return self.mapping.items[line_num - 1]; + } + + pub fn getPtr(self: SourceMappings, line_num: usize) *SourceSpan { + return &self.mapping.items[line_num - 1]; + } + + /// Expands the number of lines in the mapping to include the requested + /// line number (if necessary) and returns a pointer to the value at that + /// line number. + /// + /// Note: `line_num` is 1-indexed + pub fn expandAndGet(self: *SourceMappings, allocator: Allocator, line_num: usize) !*SourceSpan { + try self.mapping.resize(allocator, line_num); + return &self.mapping.items[line_num - 1]; + } + + pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) void { + std.debug.assert(num_following_lines_to_collapse > 0); + + var span_to_collapse_into = self.getPtr(line_num); + const last_collapsed_span = self.get(line_num + num_following_lines_to_collapse); + span_to_collapse_into.end_line = last_collapsed_span.end_line; + + const after_collapsed_start = line_num + num_following_lines_to_collapse; + const new_num_lines = self.mapping.items.len - num_following_lines_to_collapse; + std.mem.copy(SourceSpan, self.mapping.items[line_num..new_num_lines], self.mapping.items[after_collapsed_start..]); + + self.mapping.items.len = new_num_lines; + } + + /// Returns true if the line is from the main/root file (i.e. not a file that has been + /// `#include`d). + pub fn isRootFile(self: *SourceMappings, line_num: usize) bool { + const line_mapping = self.get(line_num); + if (line_mapping.filename_offset == self.root_filename_offset) return true; + return false; + } +}; + +test "SourceMappings collapse" { + const allocator = std.testing.allocator; + + var mappings = SourceMappings{}; + defer mappings.deinit(allocator); + const filename_offset = try mappings.files.put(allocator, "test.rc"); + + try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = filename_offset }); + try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 3, .filename_offset = filename_offset }); + try mappings.set(allocator, 3, .{ .start_line = 4, .end_line = 4, .filename_offset = filename_offset }); + try mappings.set(allocator, 4, .{ .start_line = 5, .end_line = 5, .filename_offset = filename_offset }); + + mappings.collapse(1, 2); + + try std.testing.expectEqual(@as(usize, 2), mappings.mapping.items.len); + try std.testing.expectEqual(@as(usize, 4), mappings.mapping.items[0].end_line); + try std.testing.expectEqual(@as(usize, 5), mappings.mapping.items[1].end_line); +} + +/// Same thing as StringTable in Zig's src/Wasm.zig +pub const StringTable = struct { + data: std.ArrayListUnmanaged(u8) = .{}, + map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, + + pub fn deinit(self: *StringTable, allocator: Allocator) void { + self.data.deinit(allocator); + self.map.deinit(allocator); + } + + pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 { + const result = try self.map.getOrPutContextAdapted( + allocator, + value, + std.hash_map.StringIndexAdapter{ .bytes = &self.data }, + .{ .bytes = &self.data }, + ); + if (result.found_existing) { + return result.key_ptr.*; + } + + try self.data.ensureUnusedCapacity(allocator, value.len + 1); + const offset: u32 = @intCast(self.data.items.len); + + self.data.appendSliceAssumeCapacity(value); + self.data.appendAssumeCapacity(0); + + result.key_ptr.* = offset; + + return offset; + } + + pub fn get(self: StringTable, offset: u32) []const u8 { + std.debug.assert(offset < self.data.items.len); + return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0); + } + + pub fn getOffset(self: *StringTable, value: []const u8) ?u32 { + return self.map.getKeyAdapted( + value, + std.hash_map.StringIndexAdapter{ .bytes = &self.data }, + ); + } +}; + +const ExpectedSourceSpan = struct { + start_line: usize, + end_line: usize, + filename: []const u8, +}; + +fn testParseAndRemoveLineCommands( + expected: []const u8, + comptime expected_spans: []const ExpectedSourceSpan, + source: []const u8, + options: ParseAndRemoveLineCommandsOptions, +) !void { + var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options); + defer std.testing.allocator.free(results.result); + defer results.mappings.deinit(std.testing.allocator); + + try std.testing.expectEqualStrings(expected, results.result); + + expectEqualMappings(expected_spans, results.mappings) catch |err| { + std.debug.print("\nexpected mappings:\n", .{}); + for (expected_spans, 0..) |span, i| { + const line_num = i + 1; + std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line }); + } + std.debug.print("\nactual mappings:\n", .{}); + for (results.mappings.mapping.items, 0..) |span, i| { + const line_num = i + 1; + const filename = results.mappings.files.get(span.filename_offset); + std.debug.print("{}: {s}:{}-{}\n", .{ line_num, filename, span.start_line, span.end_line }); + } + std.debug.print("\n", .{}); + return err; + }; +} + +fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void { + try std.testing.expectEqual(expected_spans.len, mappings.mapping.items.len); + for (expected_spans, 0..) |expected_span, i| { + const line_num = i + 1; + const span = mappings.get(line_num); + const filename = mappings.files.get(span.filename_offset); + try std.testing.expectEqual(expected_span.start_line, span.start_line); + try std.testing.expectEqual(expected_span.end_line, span.end_line); + try std.testing.expectEqualStrings(expected_span.filename, filename); + } +} + +test "basic" { + try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, "#line 1 \"blah.rc\"", .{}); +} + +test "only removes line commands" { + try testParseAndRemoveLineCommands( + \\#pragma code_page(65001) + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, + \\#line 1 "blah.rc" + \\#pragma code_page(65001) + , .{}); +} + +test "whitespace and line endings" { + try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, "#line \t 1 \t \"blah.rc\"\r\n", .{}); +} + +test "example" { + try testParseAndRemoveLineCommands( + \\ + \\included RCDATA {"hello"} + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "./included.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "./included.rc" }, + }, + \\#line 1 "rcdata.rc" + \\#line 1 "" + \\#line 1 "" + \\#line 355 "" + \\#line 1 "" + \\#line 1 "" + \\#line 1 "rcdata.rc" + \\#line 1 "./header.h" + \\ + \\ + \\2 RCDATA {"blah"} + \\ + \\ + \\#line 1 "./included.rc" + \\ + \\included RCDATA {"hello"} + \\#line 7 "./header.h" + \\#line 1 "rcdata.rc" + , .{}); +} + +test "CRLF and other line endings" { + try testParseAndRemoveLineCommands( + "hello\r\n#pragma code_page(65001)\r\nworld", + &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" }, + .{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" }, + }, + "#line 1 \"crlf.rc\"\r\n#line 1 \"\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n", + .{}, + ); +} + +test "no line commands" { + try testParseAndRemoveLineCommands( + \\1 RCDATA {"blah"} + \\2 RCDATA {"blah"} + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, + }, + \\1 RCDATA {"blah"} + \\2 RCDATA {"blah"} + , .{ .initial_filename = "blah.rc" }); +} + +test "in place" { + var mut_source = "#line 1 \"blah.rc\"".*; + var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}); + defer result.mappings.deinit(std.testing.allocator); + try std.testing.expectEqualStrings("", result.result); +} diff --git a/src/resinator/utils.zig b/src/resinator/utils.zig new file mode 100644 index 000000000000..a29f068aeaf8 --- /dev/null +++ b/src/resinator/utils.zig @@ -0,0 +1,83 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +/// Like std.io.FixedBufferStream but does no bounds checking +pub const UncheckedSliceWriter = struct { + const Self = @This(); + + pos: usize = 0, + slice: []u8, + + pub fn write(self: *Self, char: u8) void { + self.slice[self.pos] = char; + self.pos += 1; + } + + pub fn writeSlice(self: *Self, slice: []const u8) void { + for (slice) |c| { + self.write(c); + } + } + + pub fn getWritten(self: Self) []u8 { + return self.slice[0..self.pos]; + } +}; + +/// Cross-platform 'std.fs.Dir.openFile' wrapper that will always return IsDir if +/// a directory is attempted to be opened. +/// TODO: Remove once https://github.com/ziglang/zig/issues/5732 is addressed. +pub fn openFileNotDir(cwd: std.fs.Dir, path: []const u8, flags: std.fs.File.OpenFlags) std.fs.File.OpenError!std.fs.File { + const file = try cwd.openFile(path, flags); + errdefer file.close(); + // https://github.com/ziglang/zig/issues/5732 + if (builtin.os.tag != .windows) { + const stat = try file.stat(); + + if (stat.kind == .directory) + return error.IsDir; + } + return file; +} + +/// Emulates the Windows implementation of `iswdigit`, but only returns true +/// for the non-ASCII digits that `iswdigit` on Windows would return true for. +pub fn isNonAsciiDigit(c: u21) bool { + return switch (c) { + '²', + '³', + '¹', + '\u{660}'...'\u{669}', + '\u{6F0}'...'\u{6F9}', + '\u{7C0}'...'\u{7C9}', + '\u{966}'...'\u{96F}', + '\u{9E6}'...'\u{9EF}', + '\u{A66}'...'\u{A6F}', + '\u{AE6}'...'\u{AEF}', + '\u{B66}'...'\u{B6F}', + '\u{BE6}'...'\u{BEF}', + '\u{C66}'...'\u{C6F}', + '\u{CE6}'...'\u{CEF}', + '\u{D66}'...'\u{D6F}', + '\u{E50}'...'\u{E59}', + '\u{ED0}'...'\u{ED9}', + '\u{F20}'...'\u{F29}', + '\u{1040}'...'\u{1049}', + '\u{1090}'...'\u{1099}', + '\u{17E0}'...'\u{17E9}', + '\u{1810}'...'\u{1819}', + '\u{1946}'...'\u{194F}', + '\u{19D0}'...'\u{19D9}', + '\u{1B50}'...'\u{1B59}', + '\u{1BB0}'...'\u{1BB9}', + '\u{1C40}'...'\u{1C49}', + '\u{1C50}'...'\u{1C59}', + '\u{A620}'...'\u{A629}', + '\u{A8D0}'...'\u{A8D9}', + '\u{A900}'...'\u{A909}', + '\u{AA50}'...'\u{AA59}', + '\u{FF10}'...'\u{FF19}', + => true, + else => false, + }; +} diff --git a/src/resinator/windows1252.zig b/src/resinator/windows1252.zig new file mode 100644 index 000000000000..81e4dfa4549e --- /dev/null +++ b/src/resinator/windows1252.zig @@ -0,0 +1,588 @@ +const std = @import("std"); + +pub fn windows1252ToUtf8Stream(writer: anytype, reader: anytype) !usize { + var bytes_written: usize = 0; + var utf8_buf: [3]u8 = undefined; + while (true) { + const c = reader.readByte() catch |err| switch (err) { + error.EndOfStream => return bytes_written, + else => |e| return e, + }; + const codepoint = toCodepoint(c); + if (codepoint <= 0x7F) { + try writer.writeByte(c); + bytes_written += 1; + } else { + const utf8_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch unreachable; + try writer.writeAll(utf8_buf[0..utf8_len]); + bytes_written += utf8_len; + } + } +} + +/// Returns the number of code units written to the writer +pub fn windows1252ToUtf16AllocZ(allocator: std.mem.Allocator, win1252_str: []const u8) ![:0]u16 { + // Guaranteed to need exactly the same number of code units as Windows-1252 bytes + var utf16_slice = try allocator.allocSentinel(u16, win1252_str.len, 0); + errdefer allocator.free(utf16_slice); + for (win1252_str, 0..) |c, i| { + utf16_slice[i] = toCodepoint(c); + } + return utf16_slice; +} + +/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt +pub fn toCodepoint(c: u8) u16 { + return switch (c) { + 0x80 => 0x20ac, // Euro Sign + 0x82 => 0x201a, // Single Low-9 Quotation Mark + 0x83 => 0x0192, // Latin Small Letter F With Hook + 0x84 => 0x201e, // Double Low-9 Quotation Mark + 0x85 => 0x2026, // Horizontal Ellipsis + 0x86 => 0x2020, // Dagger + 0x87 => 0x2021, // Double Dagger + 0x88 => 0x02c6, // Modifier Letter Circumflex Accent + 0x89 => 0x2030, // Per Mille Sign + 0x8a => 0x0160, // Latin Capital Letter S With Caron + 0x8b => 0x2039, // Single Left-Pointing Angle Quotation Mark + 0x8c => 0x0152, // Latin Capital Ligature Oe + 0x8e => 0x017d, // Latin Capital Letter Z With Caron + 0x91 => 0x2018, // Left Single Quotation Mark + 0x92 => 0x2019, // Right Single Quotation Mark + 0x93 => 0x201c, // Left Double Quotation Mark + 0x94 => 0x201d, // Right Double Quotation Mark + 0x95 => 0x2022, // Bullet + 0x96 => 0x2013, // En Dash + 0x97 => 0x2014, // Em Dash + 0x98 => 0x02dc, // Small Tilde + 0x99 => 0x2122, // Trade Mark Sign + 0x9a => 0x0161, // Latin Small Letter S With Caron + 0x9b => 0x203a, // Single Right-Pointing Angle Quotation Mark + 0x9c => 0x0153, // Latin Small Ligature Oe + 0x9e => 0x017e, // Latin Small Letter Z With Caron + 0x9f => 0x0178, // Latin Capital Letter Y With Diaeresis + else => c, + }; +} + +/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt +/// Plus some mappings found empirically by iterating all codepoints: +/// 0x2007 => 0xA0, // Figure Space +/// 0x2008 => ' ', // Punctuation Space +/// 0x2009 => ' ', // Thin Space +/// 0x200A => ' ', // Hair Space +/// 0x2012 => '-', // Figure Dash +/// 0x2015 => '-', // Horizontal Bar +/// 0x201B => '\'', // Single High-reversed-9 Quotation Mark +/// 0x201F => '"', // Double High-reversed-9 Quotation Mark +/// 0x202F => 0xA0, // Narrow No-Break Space +/// 0x2033 => '"', // Double Prime +/// 0x2036 => '"', // Reversed Double Prime +pub fn bestFitFromCodepoint(codepoint: u21) ?u8 { + return switch (codepoint) { + 0x00...0x7F, + 0x81, + 0x8D, + 0x8F, + 0x90, + 0x9D, + 0xA0...0xFF, + => @intCast(codepoint), + 0x0100 => 0x41, // Latin Capital Letter A With Macron + 0x0101 => 0x61, // Latin Small Letter A With Macron + 0x0102 => 0x41, // Latin Capital Letter A With Breve + 0x0103 => 0x61, // Latin Small Letter A With Breve + 0x0104 => 0x41, // Latin Capital Letter A With Ogonek + 0x0105 => 0x61, // Latin Small Letter A With Ogonek + 0x0106 => 0x43, // Latin Capital Letter C With Acute + 0x0107 => 0x63, // Latin Small Letter C With Acute + 0x0108 => 0x43, // Latin Capital Letter C With Circumflex + 0x0109 => 0x63, // Latin Small Letter C With Circumflex + 0x010a => 0x43, // Latin Capital Letter C With Dot Above + 0x010b => 0x63, // Latin Small Letter C With Dot Above + 0x010c => 0x43, // Latin Capital Letter C With Caron + 0x010d => 0x63, // Latin Small Letter C With Caron + 0x010e => 0x44, // Latin Capital Letter D With Caron + 0x010f => 0x64, // Latin Small Letter D With Caron + 0x0110 => 0xd0, // Latin Capital Letter D With Stroke + 0x0111 => 0x64, // Latin Small Letter D With Stroke + 0x0112 => 0x45, // Latin Capital Letter E With Macron + 0x0113 => 0x65, // Latin Small Letter E With Macron + 0x0114 => 0x45, // Latin Capital Letter E With Breve + 0x0115 => 0x65, // Latin Small Letter E With Breve + 0x0116 => 0x45, // Latin Capital Letter E With Dot Above + 0x0117 => 0x65, // Latin Small Letter E With Dot Above + 0x0118 => 0x45, // Latin Capital Letter E With Ogonek + 0x0119 => 0x65, // Latin Small Letter E With Ogonek + 0x011a => 0x45, // Latin Capital Letter E With Caron + 0x011b => 0x65, // Latin Small Letter E With Caron + 0x011c => 0x47, // Latin Capital Letter G With Circumflex + 0x011d => 0x67, // Latin Small Letter G With Circumflex + 0x011e => 0x47, // Latin Capital Letter G With Breve + 0x011f => 0x67, // Latin Small Letter G With Breve + 0x0120 => 0x47, // Latin Capital Letter G With Dot Above + 0x0121 => 0x67, // Latin Small Letter G With Dot Above + 0x0122 => 0x47, // Latin Capital Letter G With Cedilla + 0x0123 => 0x67, // Latin Small Letter G With Cedilla + 0x0124 => 0x48, // Latin Capital Letter H With Circumflex + 0x0125 => 0x68, // Latin Small Letter H With Circumflex + 0x0126 => 0x48, // Latin Capital Letter H With Stroke + 0x0127 => 0x68, // Latin Small Letter H With Stroke + 0x0128 => 0x49, // Latin Capital Letter I With Tilde + 0x0129 => 0x69, // Latin Small Letter I With Tilde + 0x012a => 0x49, // Latin Capital Letter I With Macron + 0x012b => 0x69, // Latin Small Letter I With Macron + 0x012c => 0x49, // Latin Capital Letter I With Breve + 0x012d => 0x69, // Latin Small Letter I With Breve + 0x012e => 0x49, // Latin Capital Letter I With Ogonek + 0x012f => 0x69, // Latin Small Letter I With Ogonek + 0x0130 => 0x49, // Latin Capital Letter I With Dot Above + 0x0131 => 0x69, // Latin Small Letter Dotless I + 0x0134 => 0x4a, // Latin Capital Letter J With Circumflex + 0x0135 => 0x6a, // Latin Small Letter J With Circumflex + 0x0136 => 0x4b, // Latin Capital Letter K With Cedilla + 0x0137 => 0x6b, // Latin Small Letter K With Cedilla + 0x0139 => 0x4c, // Latin Capital Letter L With Acute + 0x013a => 0x6c, // Latin Small Letter L With Acute + 0x013b => 0x4c, // Latin Capital Letter L With Cedilla + 0x013c => 0x6c, // Latin Small Letter L With Cedilla + 0x013d => 0x4c, // Latin Capital Letter L With Caron + 0x013e => 0x6c, // Latin Small Letter L With Caron + 0x0141 => 0x4c, // Latin Capital Letter L With Stroke + 0x0142 => 0x6c, // Latin Small Letter L With Stroke + 0x0143 => 0x4e, // Latin Capital Letter N With Acute + 0x0144 => 0x6e, // Latin Small Letter N With Acute + 0x0145 => 0x4e, // Latin Capital Letter N With Cedilla + 0x0146 => 0x6e, // Latin Small Letter N With Cedilla + 0x0147 => 0x4e, // Latin Capital Letter N With Caron + 0x0148 => 0x6e, // Latin Small Letter N With Caron + 0x014c => 0x4f, // Latin Capital Letter O With Macron + 0x014d => 0x6f, // Latin Small Letter O With Macron + 0x014e => 0x4f, // Latin Capital Letter O With Breve + 0x014f => 0x6f, // Latin Small Letter O With Breve + 0x0150 => 0x4f, // Latin Capital Letter O With Double Acute + 0x0151 => 0x6f, // Latin Small Letter O With Double Acute + 0x0152 => 0x8c, // Latin Capital Ligature Oe + 0x0153 => 0x9c, // Latin Small Ligature Oe + 0x0154 => 0x52, // Latin Capital Letter R With Acute + 0x0155 => 0x72, // Latin Small Letter R With Acute + 0x0156 => 0x52, // Latin Capital Letter R With Cedilla + 0x0157 => 0x72, // Latin Small Letter R With Cedilla + 0x0158 => 0x52, // Latin Capital Letter R With Caron + 0x0159 => 0x72, // Latin Small Letter R With Caron + 0x015a => 0x53, // Latin Capital Letter S With Acute + 0x015b => 0x73, // Latin Small Letter S With Acute + 0x015c => 0x53, // Latin Capital Letter S With Circumflex + 0x015d => 0x73, // Latin Small Letter S With Circumflex + 0x015e => 0x53, // Latin Capital Letter S With Cedilla + 0x015f => 0x73, // Latin Small Letter S With Cedilla + 0x0160 => 0x8a, // Latin Capital Letter S With Caron + 0x0161 => 0x9a, // Latin Small Letter S With Caron + 0x0162 => 0x54, // Latin Capital Letter T With Cedilla + 0x0163 => 0x74, // Latin Small Letter T With Cedilla + 0x0164 => 0x54, // Latin Capital Letter T With Caron + 0x0165 => 0x74, // Latin Small Letter T With Caron + 0x0166 => 0x54, // Latin Capital Letter T With Stroke + 0x0167 => 0x74, // Latin Small Letter T With Stroke + 0x0168 => 0x55, // Latin Capital Letter U With Tilde + 0x0169 => 0x75, // Latin Small Letter U With Tilde + 0x016a => 0x55, // Latin Capital Letter U With Macron + 0x016b => 0x75, // Latin Small Letter U With Macron + 0x016c => 0x55, // Latin Capital Letter U With Breve + 0x016d => 0x75, // Latin Small Letter U With Breve + 0x016e => 0x55, // Latin Capital Letter U With Ring Above + 0x016f => 0x75, // Latin Small Letter U With Ring Above + 0x0170 => 0x55, // Latin Capital Letter U With Double Acute + 0x0171 => 0x75, // Latin Small Letter U With Double Acute + 0x0172 => 0x55, // Latin Capital Letter U With Ogonek + 0x0173 => 0x75, // Latin Small Letter U With Ogonek + 0x0174 => 0x57, // Latin Capital Letter W With Circumflex + 0x0175 => 0x77, // Latin Small Letter W With Circumflex + 0x0176 => 0x59, // Latin Capital Letter Y With Circumflex + 0x0177 => 0x79, // Latin Small Letter Y With Circumflex + 0x0178 => 0x9f, // Latin Capital Letter Y With Diaeresis + 0x0179 => 0x5a, // Latin Capital Letter Z With Acute + 0x017a => 0x7a, // Latin Small Letter Z With Acute + 0x017b => 0x5a, // Latin Capital Letter Z With Dot Above + 0x017c => 0x7a, // Latin Small Letter Z With Dot Above + 0x017d => 0x8e, // Latin Capital Letter Z With Caron + 0x017e => 0x9e, // Latin Small Letter Z With Caron + 0x0180 => 0x62, // Latin Small Letter B With Stroke + 0x0189 => 0xd0, // Latin Capital Letter African D + 0x0191 => 0x83, // Latin Capital Letter F With Hook + 0x0192 => 0x83, // Latin Small Letter F With Hook + 0x0197 => 0x49, // Latin Capital Letter I With Stroke + 0x019a => 0x6c, // Latin Small Letter L With Bar + 0x019f => 0x4f, // Latin Capital Letter O With Middle Tilde + 0x01a0 => 0x4f, // Latin Capital Letter O With Horn + 0x01a1 => 0x6f, // Latin Small Letter O With Horn + 0x01ab => 0x74, // Latin Small Letter T With Palatal Hook + 0x01ae => 0x54, // Latin Capital Letter T With Retroflex Hook + 0x01af => 0x55, // Latin Capital Letter U With Horn + 0x01b0 => 0x75, // Latin Small Letter U With Horn + 0x01b6 => 0x7a, // Latin Small Letter Z With Stroke + 0x01c0 => 0x7c, // Latin Letter Dental Click + 0x01c3 => 0x21, // Latin Letter Retroflex Click + 0x01cd => 0x41, // Latin Capital Letter A With Caron + 0x01ce => 0x61, // Latin Small Letter A With Caron + 0x01cf => 0x49, // Latin Capital Letter I With Caron + 0x01d0 => 0x69, // Latin Small Letter I With Caron + 0x01d1 => 0x4f, // Latin Capital Letter O With Caron + 0x01d2 => 0x6f, // Latin Small Letter O With Caron + 0x01d3 => 0x55, // Latin Capital Letter U With Caron + 0x01d4 => 0x75, // Latin Small Letter U With Caron + 0x01d5 => 0x55, // Latin Capital Letter U With Diaeresis And Macron + 0x01d6 => 0x75, // Latin Small Letter U With Diaeresis And Macron + 0x01d7 => 0x55, // Latin Capital Letter U With Diaeresis And Acute + 0x01d8 => 0x75, // Latin Small Letter U With Diaeresis And Acute + 0x01d9 => 0x55, // Latin Capital Letter U With Diaeresis And Caron + 0x01da => 0x75, // Latin Small Letter U With Diaeresis And Caron + 0x01db => 0x55, // Latin Capital Letter U With Diaeresis And Grave + 0x01dc => 0x75, // Latin Small Letter U With Diaeresis And Grave + 0x01de => 0x41, // Latin Capital Letter A With Diaeresis And Macron + 0x01df => 0x61, // Latin Small Letter A With Diaeresis And Macron + 0x01e4 => 0x47, // Latin Capital Letter G With Stroke + 0x01e5 => 0x67, // Latin Small Letter G With Stroke + 0x01e6 => 0x47, // Latin Capital Letter G With Caron + 0x01e7 => 0x67, // Latin Small Letter G With Caron + 0x01e8 => 0x4b, // Latin Capital Letter K With Caron + 0x01e9 => 0x6b, // Latin Small Letter K With Caron + 0x01ea => 0x4f, // Latin Capital Letter O With Ogonek + 0x01eb => 0x6f, // Latin Small Letter O With Ogonek + 0x01ec => 0x4f, // Latin Capital Letter O With Ogonek And Macron + 0x01ed => 0x6f, // Latin Small Letter O With Ogonek And Macron + 0x01f0 => 0x6a, // Latin Small Letter J With Caron + 0x0261 => 0x67, // Latin Small Letter Script G + 0x02b9 => 0x27, // Modifier Letter Prime + 0x02ba => 0x22, // Modifier Letter Double Prime + 0x02bc => 0x27, // Modifier Letter Apostrophe + 0x02c4 => 0x5e, // Modifier Letter Up Arrowhead + 0x02c6 => 0x88, // Modifier Letter Circumflex Accent + 0x02c8 => 0x27, // Modifier Letter Vertical Line + 0x02c9 => 0xaf, // Modifier Letter Macron + 0x02ca => 0xb4, // Modifier Letter Acute Accent + 0x02cb => 0x60, // Modifier Letter Grave Accent + 0x02cd => 0x5f, // Modifier Letter Low Macron + 0x02da => 0xb0, // Ring Above + 0x02dc => 0x98, // Small Tilde + 0x0300 => 0x60, // Combining Grave Accent + 0x0301 => 0xb4, // Combining Acute Accent + 0x0302 => 0x5e, // Combining Circumflex Accent + 0x0303 => 0x7e, // Combining Tilde + 0x0304 => 0xaf, // Combining Macron + 0x0305 => 0xaf, // Combining Overline + 0x0308 => 0xa8, // Combining Diaeresis + 0x030a => 0xb0, // Combining Ring Above + 0x030e => 0x22, // Combining Double Vertical Line Above + 0x0327 => 0xb8, // Combining Cedilla + 0x0331 => 0x5f, // Combining Macron Below + 0x0332 => 0x5f, // Combining Low Line + 0x037e => 0x3b, // Greek Question Mark + 0x0393 => 0x47, // Greek Capital Letter Gamma + 0x0398 => 0x54, // Greek Capital Letter Theta + 0x03a3 => 0x53, // Greek Capital Letter Sigma + 0x03a6 => 0x46, // Greek Capital Letter Phi + 0x03a9 => 0x4f, // Greek Capital Letter Omega + 0x03b1 => 0x61, // Greek Small Letter Alpha + 0x03b2 => 0xdf, // Greek Small Letter Beta + 0x03b4 => 0x64, // Greek Small Letter Delta + 0x03b5 => 0x65, // Greek Small Letter Epsilon + 0x03bc => 0xb5, // Greek Small Letter Mu + 0x03c0 => 0x70, // Greek Small Letter Pi + 0x03c3 => 0x73, // Greek Small Letter Sigma + 0x03c4 => 0x74, // Greek Small Letter Tau + 0x03c6 => 0x66, // Greek Small Letter Phi + 0x04bb => 0x68, // Cyrillic Small Letter Shha + 0x0589 => 0x3a, // Armenian Full Stop + 0x066a => 0x25, // Arabic Percent Sign + 0x2000 => 0x20, // En Quad + 0x2001 => 0x20, // Em Quad + 0x2002 => 0x20, // En Space + 0x2003 => 0x20, // Em Space + 0x2004 => 0x20, // Three-Per-Em Space + 0x2005 => 0x20, // Four-Per-Em Space + 0x2006 => 0x20, // Six-Per-Em Space + 0x2010 => 0x2d, // Hyphen + 0x2011 => 0x2d, // Non-Breaking Hyphen + 0x2013 => 0x96, // En Dash + 0x2014 => 0x97, // Em Dash + 0x2017 => 0x3d, // Double Low Line + 0x2018 => 0x91, // Left Single Quotation Mark + 0x2019 => 0x92, // Right Single Quotation Mark + 0x201a => 0x82, // Single Low-9 Quotation Mark + 0x201c => 0x93, // Left Double Quotation Mark + 0x201d => 0x94, // Right Double Quotation Mark + 0x201e => 0x84, // Double Low-9 Quotation Mark + 0x2020 => 0x86, // Dagger + 0x2021 => 0x87, // Double Dagger + 0x2022 => 0x95, // Bullet + 0x2024 => 0xb7, // One Dot Leader + 0x2026 => 0x85, // Horizontal Ellipsis + 0x2030 => 0x89, // Per Mille Sign + 0x2032 => 0x27, // Prime + 0x2035 => 0x60, // Reversed Prime + 0x2039 => 0x8b, // Single Left-Pointing Angle Quotation Mark + 0x203a => 0x9b, // Single Right-Pointing Angle Quotation Mark + 0x2044 => 0x2f, // Fraction Slash + 0x2070 => 0xb0, // Superscript Zero + 0x2074 => 0x34, // Superscript Four + 0x2075 => 0x35, // Superscript Five + 0x2076 => 0x36, // Superscript Six + 0x2077 => 0x37, // Superscript Seven + 0x2078 => 0x38, // Superscript Eight + 0x207f => 0x6e, // Superscript Latin Small Letter N + 0x2080 => 0x30, // Subscript Zero + 0x2081 => 0x31, // Subscript One + 0x2082 => 0x32, // Subscript Two + 0x2083 => 0x33, // Subscript Three + 0x2084 => 0x34, // Subscript Four + 0x2085 => 0x35, // Subscript Five + 0x2086 => 0x36, // Subscript Six + 0x2087 => 0x37, // Subscript Seven + 0x2088 => 0x38, // Subscript Eight + 0x2089 => 0x39, // Subscript Nine + 0x20ac => 0x80, // Euro Sign + 0x20a1 => 0xa2, // Colon Sign + 0x20a4 => 0xa3, // Lira Sign + 0x20a7 => 0x50, // Peseta Sign + 0x2102 => 0x43, // Double-Struck Capital C + 0x2107 => 0x45, // Euler Constant + 0x210a => 0x67, // Script Small G + 0x210b => 0x48, // Script Capital H + 0x210c => 0x48, // Black-Letter Capital H + 0x210d => 0x48, // Double-Struck Capital H + 0x210e => 0x68, // Planck Constant + 0x2110 => 0x49, // Script Capital I + 0x2111 => 0x49, // Black-Letter Capital I + 0x2112 => 0x4c, // Script Capital L + 0x2113 => 0x6c, // Script Small L + 0x2115 => 0x4e, // Double-Struck Capital N + 0x2118 => 0x50, // Script Capital P + 0x2119 => 0x50, // Double-Struck Capital P + 0x211a => 0x51, // Double-Struck Capital Q + 0x211b => 0x52, // Script Capital R + 0x211c => 0x52, // Black-Letter Capital R + 0x211d => 0x52, // Double-Struck Capital R + 0x2122 => 0x99, // Trade Mark Sign + 0x2124 => 0x5a, // Double-Struck Capital Z + 0x2128 => 0x5a, // Black-Letter Capital Z + 0x212a => 0x4b, // Kelvin Sign + 0x212b => 0xc5, // Angstrom Sign + 0x212c => 0x42, // Script Capital B + 0x212d => 0x43, // Black-Letter Capital C + 0x212e => 0x65, // Estimated Symbol + 0x212f => 0x65, // Script Small E + 0x2130 => 0x45, // Script Capital E + 0x2131 => 0x46, // Script Capital F + 0x2133 => 0x4d, // Script Capital M + 0x2134 => 0x6f, // Script Small O + 0x2205 => 0xd8, // Empty Set + 0x2212 => 0x2d, // Minus Sign + 0x2213 => 0xb1, // Minus-Or-Plus Sign + 0x2215 => 0x2f, // Division Slash + 0x2216 => 0x5c, // Set Minus + 0x2217 => 0x2a, // Asterisk Operator + 0x2218 => 0xb0, // Ring Operator + 0x2219 => 0xb7, // Bullet Operator + 0x221a => 0x76, // Square Root + 0x221e => 0x38, // Infinity + 0x2223 => 0x7c, // Divides + 0x2229 => 0x6e, // Intersection + 0x2236 => 0x3a, // Ratio + 0x223c => 0x7e, // Tilde Operator + 0x2248 => 0x98, // Almost Equal To + 0x2261 => 0x3d, // Identical To + 0x2264 => 0x3d, // Less-Than Or Equal To + 0x2265 => 0x3d, // Greater-Than Or Equal To + 0x226a => 0xab, // Much Less-Than + 0x226b => 0xbb, // Much Greater-Than + 0x22c5 => 0xb7, // Dot Operator + 0x2302 => 0xa6, // House + 0x2303 => 0x5e, // Up Arrowhead + 0x2310 => 0xac, // Reversed Not Sign + 0x2320 => 0x28, // Top Half Integral + 0x2321 => 0x29, // Bottom Half Integral + 0x2329 => 0x3c, // Left-Pointing Angle Bracket + 0x232a => 0x3e, // Right-Pointing Angle Bracket + 0x2500 => 0x2d, // Box Drawings Light Horizontal + 0x2502 => 0xa6, // Box Drawings Light Vertical + 0x250c => 0x2b, // Box Drawings Light Down And Right + 0x2510 => 0x2b, // Box Drawings Light Down And Left + 0x2514 => 0x2b, // Box Drawings Light Up And Right + 0x2518 => 0x2b, // Box Drawings Light Up And Left + 0x251c => 0x2b, // Box Drawings Light Vertical And Right + 0x2524 => 0xa6, // Box Drawings Light Vertical And Left + 0x252c => 0x2d, // Box Drawings Light Down And Horizontal + 0x2534 => 0x2d, // Box Drawings Light Up And Horizontal + 0x253c => 0x2b, // Box Drawings Light Vertical And Horizontal + 0x2550 => 0x2d, // Box Drawings Double Horizontal + 0x2551 => 0xa6, // Box Drawings Double Vertical + 0x2552 => 0x2b, // Box Drawings Down Single And Right Double + 0x2553 => 0x2b, // Box Drawings Down Double And Right Single + 0x2554 => 0x2b, // Box Drawings Double Down And Right + 0x2555 => 0x2b, // Box Drawings Down Single And Left Double + 0x2556 => 0x2b, // Box Drawings Down Double And Left Single + 0x2557 => 0x2b, // Box Drawings Double Down And Left + 0x2558 => 0x2b, // Box Drawings Up Single And Right Double + 0x2559 => 0x2b, // Box Drawings Up Double And Right Single + 0x255a => 0x2b, // Box Drawings Double Up And Right + 0x255b => 0x2b, // Box Drawings Up Single And Left Double + 0x255c => 0x2b, // Box Drawings Up Double And Left Single + 0x255d => 0x2b, // Box Drawings Double Up And Left + 0x255e => 0xa6, // Box Drawings Vertical Single And Right Double + 0x255f => 0xa6, // Box Drawings Vertical Double And Right Single + 0x2560 => 0xa6, // Box Drawings Double Vertical And Right + 0x2561 => 0xa6, // Box Drawings Vertical Single And Left Double + 0x2562 => 0xa6, // Box Drawings Vertical Double And Left Single + 0x2563 => 0xa6, // Box Drawings Double Vertical And Left + 0x2564 => 0x2d, // Box Drawings Down Single And Horizontal Double + 0x2565 => 0x2d, // Box Drawings Down Double And Horizontal Single + 0x2566 => 0x2d, // Box Drawings Double Down And Horizontal + 0x2567 => 0x2d, // Box Drawings Up Single And Horizontal Double + 0x2568 => 0x2d, // Box Drawings Up Double And Horizontal Single + 0x2569 => 0x2d, // Box Drawings Double Up And Horizontal + 0x256a => 0x2b, // Box Drawings Vertical Single And Horizontal Double + 0x256b => 0x2b, // Box Drawings Vertical Double And Horizontal Single + 0x256c => 0x2b, // Box Drawings Double Vertical And Horizontal + 0x2580 => 0xaf, // Upper Half Block + 0x2584 => 0x5f, // Lower Half Block + 0x2588 => 0xa6, // Full Block + 0x258c => 0xa6, // Left Half Block + 0x2590 => 0xa6, // Right Half Block + 0x2591 => 0xa6, // Light Shade + 0x2592 => 0xa6, // Medium Shade + 0x2593 => 0xa6, // Dark Shade + 0x25a0 => 0xa6, // Black Square + 0x263c => 0xa4, // White Sun With Rays + 0x2758 => 0x7c, // Light Vertical Bar + 0x3000 => 0x20, // Ideographic Space + 0x3008 => 0x3c, // Left Angle Bracket + 0x3009 => 0x3e, // Right Angle Bracket + 0x300a => 0xab, // Left Double Angle Bracket + 0x300b => 0xbb, // Right Double Angle Bracket + 0x301a => 0x5b, // Left White Square Bracket + 0x301b => 0x5d, // Right White Square Bracket + 0x30fb => 0xb7, // Katakana Middle Dot + 0xff01 => 0x21, // Fullwidth Exclamation Mark + 0xff02 => 0x22, // Fullwidth Quotation Mark + 0xff03 => 0x23, // Fullwidth Number Sign + 0xff04 => 0x24, // Fullwidth Dollar Sign + 0xff05 => 0x25, // Fullwidth Percent Sign + 0xff06 => 0x26, // Fullwidth Ampersand + 0xff07 => 0x27, // Fullwidth Apostrophe + 0xff08 => 0x28, // Fullwidth Left Parenthesis + 0xff09 => 0x29, // Fullwidth Right Parenthesis + 0xff0a => 0x2a, // Fullwidth Asterisk + 0xff0b => 0x2b, // Fullwidth Plus Sign + 0xff0c => 0x2c, // Fullwidth Comma + 0xff0d => 0x2d, // Fullwidth Hyphen-Minus + 0xff0e => 0x2e, // Fullwidth Full Stop + 0xff0f => 0x2f, // Fullwidth Solidus + 0xff10 => 0x30, // Fullwidth Digit Zero + 0xff11 => 0x31, // Fullwidth Digit One + 0xff12 => 0x32, // Fullwidth Digit Two + 0xff13 => 0x33, // Fullwidth Digit Three + 0xff14 => 0x34, // Fullwidth Digit Four + 0xff15 => 0x35, // Fullwidth Digit Five + 0xff16 => 0x36, // Fullwidth Digit Six + 0xff17 => 0x37, // Fullwidth Digit Seven + 0xff18 => 0x38, // Fullwidth Digit Eight + 0xff19 => 0x39, // Fullwidth Digit Nine + 0xff1a => 0x3a, // Fullwidth Colon + 0xff1b => 0x3b, // Fullwidth Semicolon + 0xff1c => 0x3c, // Fullwidth Less-Than Sign + 0xff1d => 0x3d, // Fullwidth Equals Sign + 0xff1e => 0x3e, // Fullwidth Greater-Than Sign + 0xff1f => 0x3f, // Fullwidth Question Mark + 0xff20 => 0x40, // Fullwidth Commercial At + 0xff21 => 0x41, // Fullwidth Latin Capital Letter A + 0xff22 => 0x42, // Fullwidth Latin Capital Letter B + 0xff23 => 0x43, // Fullwidth Latin Capital Letter C + 0xff24 => 0x44, // Fullwidth Latin Capital Letter D + 0xff25 => 0x45, // Fullwidth Latin Capital Letter E + 0xff26 => 0x46, // Fullwidth Latin Capital Letter F + 0xff27 => 0x47, // Fullwidth Latin Capital Letter G + 0xff28 => 0x48, // Fullwidth Latin Capital Letter H + 0xff29 => 0x49, // Fullwidth Latin Capital Letter I + 0xff2a => 0x4a, // Fullwidth Latin Capital Letter J + 0xff2b => 0x4b, // Fullwidth Latin Capital Letter K + 0xff2c => 0x4c, // Fullwidth Latin Capital Letter L + 0xff2d => 0x4d, // Fullwidth Latin Capital Letter M + 0xff2e => 0x4e, // Fullwidth Latin Capital Letter N + 0xff2f => 0x4f, // Fullwidth Latin Capital Letter O + 0xff30 => 0x50, // Fullwidth Latin Capital Letter P + 0xff31 => 0x51, // Fullwidth Latin Capital Letter Q + 0xff32 => 0x52, // Fullwidth Latin Capital Letter R + 0xff33 => 0x53, // Fullwidth Latin Capital Letter S + 0xff34 => 0x54, // Fullwidth Latin Capital Letter T + 0xff35 => 0x55, // Fullwidth Latin Capital Letter U + 0xff36 => 0x56, // Fullwidth Latin Capital Letter V + 0xff37 => 0x57, // Fullwidth Latin Capital Letter W + 0xff38 => 0x58, // Fullwidth Latin Capital Letter X + 0xff39 => 0x59, // Fullwidth Latin Capital Letter Y + 0xff3a => 0x5a, // Fullwidth Latin Capital Letter Z + 0xff3b => 0x5b, // Fullwidth Left Square Bracket + 0xff3c => 0x5c, // Fullwidth Reverse Solidus + 0xff3d => 0x5d, // Fullwidth Right Square Bracket + 0xff3e => 0x5e, // Fullwidth Circumflex Accent + 0xff3f => 0x5f, // Fullwidth Low Line + 0xff40 => 0x60, // Fullwidth Grave Accent + 0xff41 => 0x61, // Fullwidth Latin Small Letter A + 0xff42 => 0x62, // Fullwidth Latin Small Letter B + 0xff43 => 0x63, // Fullwidth Latin Small Letter C + 0xff44 => 0x64, // Fullwidth Latin Small Letter D + 0xff45 => 0x65, // Fullwidth Latin Small Letter E + 0xff46 => 0x66, // Fullwidth Latin Small Letter F + 0xff47 => 0x67, // Fullwidth Latin Small Letter G + 0xff48 => 0x68, // Fullwidth Latin Small Letter H + 0xff49 => 0x69, // Fullwidth Latin Small Letter I + 0xff4a => 0x6a, // Fullwidth Latin Small Letter J + 0xff4b => 0x6b, // Fullwidth Latin Small Letter K + 0xff4c => 0x6c, // Fullwidth Latin Small Letter L + 0xff4d => 0x6d, // Fullwidth Latin Small Letter M + 0xff4e => 0x6e, // Fullwidth Latin Small Letter N + 0xff4f => 0x6f, // Fullwidth Latin Small Letter O + 0xff50 => 0x70, // Fullwidth Latin Small Letter P + 0xff51 => 0x71, // Fullwidth Latin Small Letter Q + 0xff52 => 0x72, // Fullwidth Latin Small Letter R + 0xff53 => 0x73, // Fullwidth Latin Small Letter S + 0xff54 => 0x74, // Fullwidth Latin Small Letter T + 0xff55 => 0x75, // Fullwidth Latin Small Letter U + 0xff56 => 0x76, // Fullwidth Latin Small Letter V + 0xff57 => 0x77, // Fullwidth Latin Small Letter W + 0xff58 => 0x78, // Fullwidth Latin Small Letter X + 0xff59 => 0x79, // Fullwidth Latin Small Letter Y + 0xff5a => 0x7a, // Fullwidth Latin Small Letter Z + 0xff5b => 0x7b, // Fullwidth Left Curly Bracket + 0xff5c => 0x7c, // Fullwidth Vertical Line + 0xff5d => 0x7d, // Fullwidth Right Curly Bracket + 0xff5e => 0x7e, // Fullwidth Tilde + // Not in the best fit mapping, but RC uses these mappings too + 0x2007 => 0xA0, // Figure Space + 0x2008 => ' ', // Punctuation Space + 0x2009 => ' ', // Thin Space + 0x200A => ' ', // Hair Space + 0x2012 => '-', // Figure Dash + 0x2015 => '-', // Horizontal Bar + 0x201B => '\'', // Single High-reversed-9 Quotation Mark + 0x201F => '"', // Double High-reversed-9 Quotation Mark + 0x202F => 0xA0, // Narrow No-Break Space + 0x2033 => '"', // Double Prime + 0x2036 => '"', // Reversed Double Prime + else => null, + }; +} + +test "windows-1252 to utf8" { + var buf = std.ArrayList(u8).init(std.testing.allocator); + defer buf.deinit(); + + const input_windows1252 = "\x81pqrstuvwxyz{|}~\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8e\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9e\x9f\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + const expected_utf8 = "\xc2\x81pqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"; + + var fbs = std.io.fixedBufferStream(input_windows1252); + const bytes_written = try windows1252ToUtf8Stream(buf.writer(), fbs.reader()); + + try std.testing.expectEqualStrings(expected_utf8, buf.items); + try std.testing.expectEqual(expected_utf8.len, bytes_written); +} diff --git a/test/standalone.zig b/test/standalone.zig index 22b9dfba49be..87022f8bfcce 100644 --- a/test/standalone.zig +++ b/test/standalone.zig @@ -194,6 +194,10 @@ pub const build_cases = [_]BuildCase{ .build_root = "test/standalone/load_dynamic_library", .import = @import("standalone/load_dynamic_library/build.zig"), }, + .{ + .build_root = "test/standalone/windows_resources", + .import = @import("standalone/windows_resources/build.zig"), + }, .{ .build_root = "test/standalone/windows_spawn", .import = @import("standalone/windows_spawn/build.zig"), diff --git a/test/standalone/windows_resources/build.zig b/test/standalone/windows_resources/build.zig new file mode 100644 index 000000000000..9476fa7839db --- /dev/null +++ b/test/standalone/windows_resources/build.zig @@ -0,0 +1,40 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const test_step = b.step("test", "Test it"); + b.default_step = test_step; + + const native_target: std.zig.CrossTarget = .{}; + const cross_target = .{ + .cpu_arch = .x86_64, + .os_tag = .windows, + .abi = .gnu, + }; + + add(b, native_target, .any, test_step); + add(b, cross_target, .any, test_step); + + add(b, native_target, .gnu, test_step); + add(b, cross_target, .gnu, test_step); +} + +fn add(b: *std.Build, target: std.zig.CrossTarget, rc_includes: enum { any, gnu }, test_step: *std.Build.Step) void { + const exe = b.addExecutable(.{ + .name = "zig_resource_test", + .root_source_file = .{ .path = "main.zig" }, + .target = target, + .optimize = .Debug, + }); + exe.addWin32ResourceFile(.{ + .file = .{ .path = "res/zig.rc" }, + .flags = &.{"/c65001"}, // UTF-8 code page + }); + exe.rc_includes = switch (rc_includes) { + .any => .any, + .gnu => .gnu, + }; + + _ = exe.getEmittedBin(); + + test_step.dependOn(&exe.step); +} diff --git a/test/standalone/windows_resources/main.zig b/test/standalone/windows_resources/main.zig new file mode 100644 index 000000000000..f92e18124bb8 --- /dev/null +++ b/test/standalone/windows_resources/main.zig @@ -0,0 +1,5 @@ +const std = @import("std"); + +pub fn main() !void { + std.debug.print("All your {s} are belong to us.\n", .{"codebase"}); +} diff --git a/test/standalone/windows_resources/res/hello.bin b/test/standalone/windows_resources/res/hello.bin new file mode 100644 index 000000000000..dda6eb4b7b86 --- /dev/null +++ b/test/standalone/windows_resources/res/hello.bin @@ -0,0 +1 @@ +abcdefg \ No newline at end of file diff --git a/test/standalone/windows_resources/res/sub/sub.rc b/test/standalone/windows_resources/res/sub/sub.rc new file mode 100644 index 000000000000..b15ce306043b --- /dev/null +++ b/test/standalone/windows_resources/res/sub/sub.rc @@ -0,0 +1 @@ +2 RCDATA hello.bin diff --git a/test/standalone/windows_resources/res/zig.ico b/test/standalone/windows_resources/res/zig.ico new file mode 100644 index 000000000000..64610cc33226 Binary files /dev/null and b/test/standalone/windows_resources/res/zig.ico differ diff --git a/test/standalone/windows_resources/res/zig.rc b/test/standalone/windows_resources/res/zig.rc new file mode 100644 index 000000000000..88503a0f6ad7 --- /dev/null +++ b/test/standalone/windows_resources/res/zig.rc @@ -0,0 +1,40 @@ +#define ICO_ID 1 + +// Nothing from windows.h is used in this .rc file, +// but it's common to include it within a .rc file +// so this makes sure that it can be found on +// all platforms. +#include "windows.h" + +ICO_ID ICON "zig.ico" + +1 VERSIONINFO + FILEVERSION 1L,0,0,2 + PRODUCTVERSION 1,0,0,1 + FILEFLAGSMASK 0x3fL + FILEFLAGS 0x1L + FILEOS 0x4L + FILETYPE 0x1L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904e4" + BEGIN + VALUE "CompanyName", "Zig" + VALUE "FileDescription", "My cool zig program" + VALUE "FileVersion", "1.0.0.1" + VALUE "InternalName", "zig-ico.exe" + VALUE "LegalCopyright", "(c) no one" + VALUE "OriginalFilename", "zig-ico.exe" + VALUE "ProductName", "Zig but with an icon" + VALUE "ProductVersion", "1.0.0.1" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1252 + END +END + +#include "sub/sub.rc"