Skip to content

Commit 564a861

Browse files
committed
std.zig.system: use both PATH and hardcoded locations to find env
Should help systems that have main `env` binary in different location than hardcoded `/usr/bin/env` **during build** (not neccessarily always), like Nix/Guix, Termux, Gentoo Prefix etc. Related: https://www.github.com/ziglang/zig/issues/12156 https://www.github.com/ziglang/zig/issues/14146 https://www.github.com/ziglang/zig/issues/14577 https://www.github.com/ziglang/zig/issues/15898 This is second attempt after https://www.github.com/ziglang/zig/pull/21540 was reverted before. In this version relative paths are handled too. Signed-off-by: Eric Joldasov <[email protected]>
1 parent b5a5260 commit 564a861

File tree

1 file changed

+174
-109
lines changed

1 file changed

+174
-109
lines changed

lib/std/zig/system.zig

Lines changed: 174 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,141 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
10131013
return max_ver;
10141014
}
10151015

1016+
/// This functions tries to open file located at `start_path`, and then guesses
1017+
/// whether it is a script or an ELF file.
1018+
///
1019+
/// If it finds "shebang line", file is considered a script, and logic is re-run
1020+
/// using interpreter referenced after "#!" symbols. If interpreter is itself also a script,
1021+
/// logic becomes recursive until non-script file is found.
1022+
///
1023+
/// If it finds ELF magic sequence, file is considered an ELF file and function returns.
1024+
fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableToFindElfFile}!fs.File {
1025+
var current_path: std.BoundedArray(u8, fs.max_path_bytes) = .{};
1026+
current_path.appendSliceAssumeCapacity(start_path);
1027+
1028+
// Needed for storing `fs.path.resolve` result.
1029+
var buf: [fs.max_path_bytes + 1]u8 = undefined;
1030+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1031+
const allocator = fbs.allocator();
1032+
1033+
// Needed for storing file content.
1034+
// According to `man 2 execve`:
1035+
//
1036+
// The kernel imposes a maximum length on the text
1037+
// that follows the "#!" characters at the start of a script;
1038+
// characters beyond the limit are ignored.
1039+
// Before Linux 5.1, the limit is 127 characters.
1040+
// Since Linux 5.1, the limit is 255 characters.
1041+
//
1042+
// Tests show that bash and zsh consider 255 as total limit,
1043+
// *including* "#!" characters and ignoring newline.
1044+
// For safety, we set max length as 255 + \n (1).
1045+
var buffer: [255 + 1]u8 = undefined;
1046+
while (true) : (fbs.reset()) {
1047+
const file = cwd.openFile(current_path.constSlice(), .{}) catch |err| switch (err) {
1048+
error.NoSpaceLeft => unreachable,
1049+
error.NameTooLong => unreachable,
1050+
error.PathAlreadyExists => unreachable,
1051+
error.SharingViolation => unreachable,
1052+
error.InvalidUtf8 => unreachable, // WASI only
1053+
error.InvalidWtf8 => unreachable, // Windows only
1054+
error.BadPathName => unreachable,
1055+
error.PipeBusy => unreachable,
1056+
error.FileLocksNotSupported => unreachable,
1057+
error.WouldBlock => unreachable,
1058+
error.FileBusy => unreachable, // opened without write permissions
1059+
error.AntivirusInterference => unreachable, // Windows-only error
1060+
1061+
error.IsDir,
1062+
error.NotDir,
1063+
1064+
error.AccessDenied,
1065+
error.PermissionDenied,
1066+
error.DeviceBusy,
1067+
error.FileTooBig,
1068+
error.SymLinkLoop,
1069+
error.ProcessFdQuotaExceeded,
1070+
error.SystemFdQuotaExceeded,
1071+
error.SystemResources,
1072+
1073+
error.FileNotFound,
1074+
error.NetworkNotFound,
1075+
error.NoDevice,
1076+
error.Unexpected,
1077+
=> return error.UnableToFindElfFile,
1078+
};
1079+
var is_elf_file = false;
1080+
defer if (is_elf_file == false) file.close();
1081+
1082+
// Shortest working interpreter path is "#!i" (3)
1083+
// (interpreter is "i", relative to file itself).
1084+
// ELF magic number length is 4.
1085+
const len = preadAtLeast(file, &buffer, 0, 4) catch |err| switch (err) {
1086+
error.UnexpectedEndOfFile => preadAtLeast(file, &buffer, 0, 3) catch
1087+
// If file is shorter than that, it is definitely not ELF file
1088+
// nor file with "shebang" line.
1089+
return error.UnableToFindElfFile,
1090+
else => return error.UnableToFindElfFile,
1091+
};
1092+
const content = buffer[0..len];
1093+
1094+
if (len > 4 and mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1095+
// It is very likely ELF file!
1096+
is_elf_file = true;
1097+
return file;
1098+
} else if (mem.eql(u8, content[0..2], "#!")) {
1099+
// We detected shebang, now parse entire line.
1100+
const interpreter_path = interpreter_path: {
1101+
// Trim leading "#!" and separate line from others.
1102+
const first_line = content[2 .. mem.indexOfScalar(u8, content, '\n') orelse content.len];
1103+
1104+
// Trim leading spaces and tabs.
1105+
const trimmed_line = mem.trimLeft(u8, first_line, &.{ ' ', '\t' });
1106+
1107+
// This line can have:
1108+
// * Interpreter path only,
1109+
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1110+
// And optionally newline at the end.
1111+
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1112+
1113+
// Separate path and args.
1114+
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1115+
1116+
break :interpreter_path path_maybe_args[0..path_end];
1117+
};
1118+
1119+
// We want these scenarios to work without using `realpath`:
1120+
// * Interpreter is absolute/relative path and real file.
1121+
// * Interpreter is absolute/relative path and absolute/relative symlink.
1122+
const interpreter_real_path = interpreter_real_path: {
1123+
var readlink_buffer: [std.fs.max_path_bytes]u8 = undefined;
1124+
1125+
const interpreter_real_path = cwd.readLink(interpreter_path, &readlink_buffer) catch |err| switch (err) {
1126+
error.NotLink => interpreter_path,
1127+
else => return error.UnableToFindElfFile,
1128+
};
1129+
1130+
const next_path = fs.path.resolve(allocator, &.{
1131+
// `dirname` can return `null` in two situations:
1132+
// * When path is '/': impossible since it always contain file path.
1133+
// * When path is "some_current_dir_file": use ".".
1134+
fs.path.dirname(interpreter_path) orelse ".",
1135+
interpreter_real_path,
1136+
}) catch return error.UnableToFindElfFile;
1137+
1138+
break :interpreter_real_path next_path;
1139+
};
1140+
1141+
current_path.clear();
1142+
current_path.appendSliceAssumeCapacity(interpreter_real_path);
1143+
continue;
1144+
} else {
1145+
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1146+
return error.UnableToFindElfFile;
1147+
}
1148+
}
1149+
}
1150+
10161151
/// In the past, this function attempted to use the executable's own binary if it was dynamically
10171152
/// linked to answer both the C ABI question and the dynamic linker question. However, this
10181153
/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
@@ -1021,11 +1156,14 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
10211156
/// the dynamic linker will match that of the compiler binary. Executables with these versions
10221157
/// mismatching will fail to run.
10231158
///
1024-
/// Therefore, this function works the same regardless of whether the compiler binary is
1025-
/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
1026-
/// answer to these questions, or if there is a shebang line, then it chases the referenced
1027-
/// file recursively. If that does not provide the answer, then the function falls back to
1028-
/// defaults.
1159+
/// Therefore, this function now does not inspect the executable's own binary.
1160+
/// Instead, it tries to find `env` program in PATH or in hardcoded location, and uses it
1161+
/// to find suitable ELF file. If `env` program is an executable, work is done and function starts to
1162+
/// inspect inner structure of a file. But if `env` is a script or other non-ELF file, it uses
1163+
/// interpreter path instead and tries to search ELF file again, going recursively in case interpreter
1164+
/// is also a script/non-ELF file.
1165+
///
1166+
/// If nothing was found, then the function falls back to defaults.
10291167
fn detectAbiAndDynamicLinker(
10301168
cpu: Target.Cpu,
10311169
os: Target.Os,
@@ -1093,114 +1231,44 @@ fn detectAbiAndDynamicLinker(
10931231

10941232
const ld_info_list = ld_info_list_buffer[0..ld_info_list_len];
10951233

1096-
// Best case scenario: the executable is dynamically linked, and we can iterate
1097-
// over our own shared objects and find a dynamic linker.
1098-
const elf_file = elf_file: {
1099-
// This block looks for a shebang line in /usr/bin/env,
1100-
// if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
1101-
// doing the same logic recursively in case it finds another shebang line.
1234+
const cwd = fs.cwd();
1235+
1236+
// Algorithm is:
1237+
// 1a) try_path: If PATH is non-empty and `env` file was found in one of the directories, use that.
1238+
// 1b) try_path: If `env` was not found or PATH is empty, try hardcoded path below.
1239+
// 2a) try_hardcoded: If `env` was found in hardcoded location, use that.
1240+
// 2b) try_hardcoded: If `env` was not found, fall back to default ABI and dynamic linker.
1241+
// Source: https://github.com/ziglang/zig/issues/14146#issuecomment-2308984936
1242+
const elf_file = (try_path: {
1243+
const PATH = std.posix.getenv("PATH") orelse break :try_path null;
1244+
var it = mem.tokenizeScalar(u8, PATH, fs.path.delimiter);
1245+
1246+
var buf: [fs.max_path_bytes + 1]u8 = undefined;
1247+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1248+
const allocator = fbs.allocator();
1249+
1250+
while (it.next()) |path| : (fbs.reset()) {
1251+
const start_path = fs.path.join(allocator, &.{ path, "env" }) catch |err| switch (err) {
1252+
error.OutOfMemory => continue,
1253+
};
11021254

1103-
var file_name: []const u8 = switch (os.tag) {
1255+
break :try_path resolveElfFileRecursively(cwd, start_path) catch |err| switch (err) {
1256+
error.UnableToFindElfFile => continue,
1257+
};
1258+
} else break :try_path null;
1259+
} orelse try_hardcoded: {
1260+
const hardcoded_file_name = switch (os.tag) {
11041261
// Since /usr/bin/env is hard-coded into the shebang line of many portable scripts, it's a
11051262
// reasonably reliable path to start with.
11061263
else => "/usr/bin/env",
11071264
// Haiku does not have a /usr root directory.
11081265
.haiku => "/bin/env",
11091266
};
11101267

1111-
// According to `man 2 execve`:
1112-
//
1113-
// The kernel imposes a maximum length on the text
1114-
// that follows the "#!" characters at the start of a script;
1115-
// characters beyond the limit are ignored.
1116-
// Before Linux 5.1, the limit is 127 characters.
1117-
// Since Linux 5.1, the limit is 255 characters.
1118-
//
1119-
// Tests show that bash and zsh consider 255 as total limit,
1120-
// *including* "#!" characters and ignoring newline.
1121-
// For safety, we set max length as 255 + \n (1).
1122-
var buffer: [255 + 1]u8 = undefined;
1123-
while (true) {
1124-
// Interpreter path can be relative on Linux, but
1125-
// for simplicity we are asserting it is an absolute path.
1126-
const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
1127-
error.NoSpaceLeft => unreachable,
1128-
error.NameTooLong => unreachable,
1129-
error.PathAlreadyExists => unreachable,
1130-
error.SharingViolation => unreachable,
1131-
error.InvalidUtf8 => unreachable, // WASI only
1132-
error.InvalidWtf8 => unreachable, // Windows only
1133-
error.BadPathName => unreachable,
1134-
error.PipeBusy => unreachable,
1135-
error.FileLocksNotSupported => unreachable,
1136-
error.WouldBlock => unreachable,
1137-
error.FileBusy => unreachable, // opened without write permissions
1138-
error.AntivirusInterference => unreachable, // Windows-only error
1139-
1140-
error.IsDir,
1141-
error.NotDir,
1142-
error.AccessDenied,
1143-
error.PermissionDenied,
1144-
error.NoDevice,
1145-
error.FileNotFound,
1146-
error.NetworkNotFound,
1147-
error.FileTooBig,
1148-
error.Unexpected,
1149-
=> |e| {
1150-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1151-
return defaultAbiAndDynamicLinker(cpu, os, query);
1152-
},
1153-
1154-
else => |e| return e,
1155-
};
1156-
var is_elf_file = false;
1157-
defer if (is_elf_file == false) file.close();
1158-
1159-
// Shortest working interpreter path is "#!/i" (4)
1160-
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
1161-
// ELF magic number length is also 4.
1162-
//
1163-
// If file is shorter than that, it is definitely not ELF file
1164-
// nor file with "shebang" line.
1165-
const min_len: usize = 4;
1166-
1167-
const len = preadAtLeast(file, &buffer, 0, min_len) catch |err| switch (err) {
1168-
error.UnexpectedEndOfFile,
1169-
error.UnableToReadElfFile,
1170-
error.ProcessNotFound,
1171-
=> return defaultAbiAndDynamicLinker(cpu, os, query),
1172-
1173-
else => |e| return e,
1174-
};
1175-
const content = buffer[0..len];
1176-
1177-
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1178-
// It is very likely ELF file!
1179-
is_elf_file = true;
1180-
break :elf_file file;
1181-
} else if (mem.eql(u8, content[0..2], "#!")) {
1182-
// We detected shebang, now parse entire line.
1183-
1184-
// Trim leading "#!", spaces and tabs.
1185-
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1186-
1187-
// This line can have:
1188-
// * Interpreter path only,
1189-
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1190-
// And optionally newline at the end.
1191-
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1192-
1193-
// Separate path and args.
1194-
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1195-
1196-
file_name = path_maybe_args[0..path_end];
1197-
continue;
1198-
} else {
1199-
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1200-
return defaultAbiAndDynamicLinker(cpu, os, query);
1201-
}
1202-
}
1203-
};
1268+
break :try_hardcoded resolveElfFileRecursively(cwd, hardcoded_file_name) catch |err| switch (err) {
1269+
error.UnableToFindElfFile => null,
1270+
};
1271+
}) orelse return defaultAbiAndDynamicLinker(cpu, os, query);
12041272
defer elf_file.close();
12051273

12061274
// TODO: inline this function and combine the buffer we already read above to find
@@ -1225,10 +1293,7 @@ fn detectAbiAndDynamicLinker(
12251293
error.NameTooLong,
12261294
error.StaticElfFile,
12271295
// Finally, we fall back on the standard path.
1228-
=> |e| {
1229-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1230-
return defaultAbiAndDynamicLinker(cpu, os, query);
1231-
},
1296+
=> defaultAbiAndDynamicLinker(cpu, os, query),
12321297
};
12331298
}
12341299

0 commit comments

Comments
 (0)