Skip to content

Commit 3031d00

Browse files
committed
std.zig.system: use both PATH and hardcoded locations to find env
Should help systems that have main `env` binary in different location than hardcoded `/usr/bin/env` **during build** (not neccessarily always), like Nix/Guix, Termux, Gentoo Prefix etc. Related: https://www.github.com/ziglang/zig/issues/12156 https://www.github.com/ziglang/zig/issues/14146 https://www.github.com/ziglang/zig/issues/14577 https://www.github.com/ziglang/zig/issues/15898 This is second attempt after https://www.github.com/ziglang/zig/pull/21540 was reverted before. In this version relative paths are handled too. Signed-off-by: Eric Joldasov <[email protected]>
1 parent 8453fb0 commit 3031d00

File tree

1 file changed

+173
-108
lines changed

1 file changed

+173
-108
lines changed

lib/std/zig/system.zig

Lines changed: 173 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,140 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
995995
return max_ver;
996996
}
997997

998+
/// This functions tries to open file located at `start_path`, and then guesses
999+
/// whether it is a script or an ELF file.
1000+
///
1001+
/// If it finds "shebang line", file is considered a script, and logic is re-run
1002+
/// using interpreter referenced after "#!" symbols. If interpreter is itself also a script,
1003+
/// logic becomes recursive until non-script file is found.
1004+
///
1005+
/// If it finds ELF magic sequence, file is considered an ELF file and function returns.
1006+
fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableToFindElfFile}!fs.File {
1007+
var current_path: std.BoundedArray(u8, fs.max_path_bytes) = .{};
1008+
current_path.appendSliceAssumeCapacity(start_path);
1009+
1010+
// Needed for storing `fs.path.resolve` result.
1011+
var buf: [fs.max_path_bytes + 1]u8 = undefined;
1012+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1013+
const allocator = fbs.allocator();
1014+
1015+
// Needed for storing file content.
1016+
// According to `man 2 execve`:
1017+
//
1018+
// The kernel imposes a maximum length on the text
1019+
// that follows the "#!" characters at the start of a script;
1020+
// characters beyond the limit are ignored.
1021+
// Before Linux 5.1, the limit is 127 characters.
1022+
// Since Linux 5.1, the limit is 255 characters.
1023+
//
1024+
// Tests show that bash and zsh consider 255 as total limit,
1025+
// *including* "#!" characters and ignoring newline.
1026+
// For safety, we set max length as 255 + \n (1).
1027+
var buffer: [255 + 1]u8 = undefined;
1028+
while (true) : (fbs.reset()) {
1029+
const file = cwd.openFile(current_path.constSlice(), .{}) catch |err| switch (err) {
1030+
error.NoSpaceLeft => unreachable,
1031+
error.NameTooLong => unreachable,
1032+
error.PathAlreadyExists => unreachable,
1033+
error.SharingViolation => unreachable,
1034+
error.InvalidUtf8 => unreachable, // WASI only
1035+
error.InvalidWtf8 => unreachable, // Windows only
1036+
error.BadPathName => unreachable,
1037+
error.PipeBusy => unreachable,
1038+
error.FileLocksNotSupported => unreachable,
1039+
error.WouldBlock => unreachable,
1040+
error.FileBusy => unreachable, // opened without write permissions
1041+
error.AntivirusInterference => unreachable, // Windows-only error
1042+
1043+
error.IsDir,
1044+
error.NotDir,
1045+
1046+
error.AccessDenied,
1047+
error.DeviceBusy,
1048+
error.FileTooBig,
1049+
error.SymLinkLoop,
1050+
error.ProcessFdQuotaExceeded,
1051+
error.SystemFdQuotaExceeded,
1052+
error.SystemResources,
1053+
1054+
error.FileNotFound,
1055+
error.NetworkNotFound,
1056+
error.NoDevice,
1057+
error.Unexpected,
1058+
=> return error.UnableToFindElfFile,
1059+
};
1060+
var is_elf_file = false;
1061+
defer if (is_elf_file == false) file.close();
1062+
1063+
// Shortest working interpreter path is "#!i" (3)
1064+
// (interpreter is "i", relative to file itself).
1065+
// ELF magic number length is 4.
1066+
const len = preadAtLeast(file, &buffer, 0, 4) catch |err| switch (err) {
1067+
error.UnexpectedEndOfFile => preadAtLeast(file, &buffer, 0, 3) catch
1068+
// If file is shorter than that, it is definitely not ELF file
1069+
// nor file with "shebang" line.
1070+
return error.UnableToFindElfFile,
1071+
else => return error.UnableToFindElfFile,
1072+
};
1073+
const content = buffer[0..len];
1074+
1075+
if (len > 4 and mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1076+
// It is very likely ELF file!
1077+
is_elf_file = true;
1078+
return file;
1079+
} else if (mem.eql(u8, content[0..2], "#!")) {
1080+
// We detected shebang, now parse entire line.
1081+
const interpreter_path = interpreter_path: {
1082+
// Trim leading "#!" and separate line from others.
1083+
const first_line = content[2 .. mem.indexOfScalar(u8, content, '\n') orelse content.len];
1084+
1085+
// Trim leading spaces and tabs.
1086+
const trimmed_line = mem.trimLeft(u8, first_line, &.{ ' ', '\t' });
1087+
1088+
// This line can have:
1089+
// * Interpreter path only,
1090+
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1091+
// And optionally newline at the end.
1092+
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1093+
1094+
// Separate path and args.
1095+
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1096+
1097+
break :interpreter_path path_maybe_args[0..path_end];
1098+
};
1099+
1100+
// We want these scenarios to work without using `realpath`:
1101+
// * Interpreter is absolute/relative path and real file.
1102+
// * Interpreter is absolute/relative path and absolute/relative symlink.
1103+
const interpreter_real_path = interpreter_real_path: {
1104+
var readlink_buffer: [std.fs.max_path_bytes]u8 = undefined;
1105+
1106+
const interpreter_real_path = cwd.readLink(interpreter_path, &readlink_buffer) catch |err| switch (err) {
1107+
error.NotLink => interpreter_path,
1108+
else => return error.UnableToFindElfFile,
1109+
};
1110+
1111+
const next_path = fs.path.resolve(allocator, &.{
1112+
// `dirname` can return `null` in two situations:
1113+
// * When path is '/': impossible since it always contain file path.
1114+
// * When path is "some_current_dir_file": use ".".
1115+
fs.path.dirname(interpreter_path) orelse ".",
1116+
interpreter_real_path,
1117+
}) catch return error.UnableToFindElfFile;
1118+
1119+
break :interpreter_real_path next_path;
1120+
};
1121+
1122+
current_path.clear();
1123+
current_path.appendSliceAssumeCapacity(interpreter_real_path);
1124+
continue;
1125+
} else {
1126+
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1127+
return error.UnableToFindElfFile;
1128+
}
1129+
}
1130+
}
1131+
9981132
/// In the past, this function attempted to use the executable's own binary if it was dynamically
9991133
/// linked to answer both the C ABI question and the dynamic linker question. However, this
10001134
/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
@@ -1003,11 +1137,14 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
10031137
/// the dynamic linker will match that of the compiler binary. Executables with these versions
10041138
/// mismatching will fail to run.
10051139
///
1006-
/// Therefore, this function works the same regardless of whether the compiler binary is
1007-
/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
1008-
/// answer to these questions, or if there is a shebang line, then it chases the referenced
1009-
/// file recursively. If that does not provide the answer, then the function falls back to
1010-
/// defaults.
1140+
/// Therefore, this function now does not inspect the executable's own binary.
1141+
/// Instead, it tries to find `env` program in PATH or in hardcoded location, and uses it
1142+
/// to find suitable ELF file. If `env` program is an executable, work is done and function starts to
1143+
/// inspect inner structure of a file. But if `env` is a script or other non-ELF file, it uses
1144+
/// interpreter path instead and tries to search ELF file again, going recursively in case interpreter
1145+
/// is also a script/non-ELF file.
1146+
///
1147+
/// If nothing was found, then the function falls back to defaults.
10111148
fn detectAbiAndDynamicLinker(
10121149
cpu: Target.Cpu,
10131150
os: Target.Os,
@@ -1075,113 +1212,44 @@ fn detectAbiAndDynamicLinker(
10751212

10761213
const ld_info_list = ld_info_list_buffer[0..ld_info_list_len];
10771214

1078-
// Best case scenario: the executable is dynamically linked, and we can iterate
1079-
// over our own shared objects and find a dynamic linker.
1080-
const elf_file = elf_file: {
1081-
// This block looks for a shebang line in /usr/bin/env,
1082-
// if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
1083-
// doing the same logic recursively in case it finds another shebang line.
1215+
const cwd = fs.cwd();
1216+
1217+
// Algorithm is:
1218+
// 1a) try_path: If PATH is non-empty and `env` file was found in one of the directories, use that.
1219+
// 1b) try_path: If `env` was not found or PATH is empty, try hardcoded path below.
1220+
// 2a) try_hardcoded: If `env` was found in hardcoded location, use that.
1221+
// 2b) try_hardcoded: If `env` was not found, fall back to default ABI and dynamic linker.
1222+
// Source: https://github.com/ziglang/zig/issues/14146#issuecomment-2308984936
1223+
const elf_file = (try_path: {
1224+
const PATH = std.posix.getenv("PATH") orelse break :try_path null;
1225+
var it = mem.tokenizeScalar(u8, PATH, fs.path.delimiter);
1226+
1227+
var buf: [fs.max_path_bytes + 1]u8 = undefined;
1228+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1229+
const allocator = fbs.allocator();
1230+
1231+
while (it.next()) |path| : (fbs.reset()) {
1232+
const start_path = fs.path.join(allocator, &.{ path, "env" }) catch |err| switch (err) {
1233+
error.OutOfMemory => continue,
1234+
};
10841235

1085-
var file_name: []const u8 = switch (os.tag) {
1236+
break :try_path resolveElfFileRecursively(cwd, start_path) catch |err| switch (err) {
1237+
error.UnableToFindElfFile => continue,
1238+
};
1239+
} else break :try_path null;
1240+
} orelse try_hardcoded: {
1241+
const hardcoded_file_name = switch (os.tag) {
10861242
// Since /usr/bin/env is hard-coded into the shebang line of many portable scripts, it's a
10871243
// reasonably reliable path to start with.
10881244
else => "/usr/bin/env",
10891245
// Haiku does not have a /usr root directory.
10901246
.haiku => "/bin/env",
10911247
};
10921248

1093-
// According to `man 2 execve`:
1094-
//
1095-
// The kernel imposes a maximum length on the text
1096-
// that follows the "#!" characters at the start of a script;
1097-
// characters beyond the limit are ignored.
1098-
// Before Linux 5.1, the limit is 127 characters.
1099-
// Since Linux 5.1, the limit is 255 characters.
1100-
//
1101-
// Tests show that bash and zsh consider 255 as total limit,
1102-
// *including* "#!" characters and ignoring newline.
1103-
// For safety, we set max length as 255 + \n (1).
1104-
var buffer: [255 + 1]u8 = undefined;
1105-
while (true) {
1106-
// Interpreter path can be relative on Linux, but
1107-
// for simplicity we are asserting it is an absolute path.
1108-
const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
1109-
error.NoSpaceLeft => unreachable,
1110-
error.NameTooLong => unreachable,
1111-
error.PathAlreadyExists => unreachable,
1112-
error.SharingViolation => unreachable,
1113-
error.InvalidUtf8 => unreachable, // WASI only
1114-
error.InvalidWtf8 => unreachable, // Windows only
1115-
error.BadPathName => unreachable,
1116-
error.PipeBusy => unreachable,
1117-
error.FileLocksNotSupported => unreachable,
1118-
error.WouldBlock => unreachable,
1119-
error.FileBusy => unreachable, // opened without write permissions
1120-
error.AntivirusInterference => unreachable, // Windows-only error
1121-
1122-
error.IsDir,
1123-
error.NotDir,
1124-
error.AccessDenied,
1125-
error.NoDevice,
1126-
error.FileNotFound,
1127-
error.NetworkNotFound,
1128-
error.FileTooBig,
1129-
error.Unexpected,
1130-
=> |e| {
1131-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1132-
return defaultAbiAndDynamicLinker(cpu, os, query);
1133-
},
1134-
1135-
else => |e| return e,
1136-
};
1137-
var is_elf_file = false;
1138-
defer if (is_elf_file == false) file.close();
1139-
1140-
// Shortest working interpreter path is "#!/i" (4)
1141-
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
1142-
// ELF magic number length is also 4.
1143-
//
1144-
// If file is shorter than that, it is definitely not ELF file
1145-
// nor file with "shebang" line.
1146-
const min_len: usize = 4;
1147-
1148-
const len = preadAtLeast(file, &buffer, 0, min_len) catch |err| switch (err) {
1149-
error.UnexpectedEndOfFile,
1150-
error.UnableToReadElfFile,
1151-
error.ProcessNotFound,
1152-
=> return defaultAbiAndDynamicLinker(cpu, os, query),
1153-
1154-
else => |e| return e,
1155-
};
1156-
const content = buffer[0..len];
1157-
1158-
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1159-
// It is very likely ELF file!
1160-
is_elf_file = true;
1161-
break :elf_file file;
1162-
} else if (mem.eql(u8, content[0..2], "#!")) {
1163-
// We detected shebang, now parse entire line.
1164-
1165-
// Trim leading "#!", spaces and tabs.
1166-
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1167-
1168-
// This line can have:
1169-
// * Interpreter path only,
1170-
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1171-
// And optionally newline at the end.
1172-
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1173-
1174-
// Separate path and args.
1175-
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1176-
1177-
file_name = path_maybe_args[0..path_end];
1178-
continue;
1179-
} else {
1180-
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1181-
return defaultAbiAndDynamicLinker(cpu, os, query);
1182-
}
1183-
}
1184-
};
1249+
break :try_hardcoded resolveElfFileRecursively(cwd, hardcoded_file_name) catch |err| switch (err) {
1250+
error.UnableToFindElfFile => null,
1251+
};
1252+
}) orelse return defaultAbiAndDynamicLinker(cpu, os, query);
11851253
defer elf_file.close();
11861254

11871255
// TODO: inline this function and combine the buffer we already read above to find
@@ -1205,10 +1273,7 @@ fn detectAbiAndDynamicLinker(
12051273
error.UnexpectedEndOfFile,
12061274
error.NameTooLong,
12071275
// Finally, we fall back on the standard path.
1208-
=> |e| {
1209-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1210-
return defaultAbiAndDynamicLinker(cpu, os, query);
1211-
},
1276+
=> defaultAbiAndDynamicLinker(cpu, os, query),
12121277
};
12131278
}
12141279

0 commit comments

Comments
 (0)