Skip to content

Commit bd985a9

Browse files
committed
std.zig.system: use both PATH and hardcoded locations to find env
Should help systems that have main `env` binary in different location than hardcoded `/usr/bin/env` **during build** (not neccessarily always), like Nix/Guix, Termux, Gentoo Prefix etc. Related: https://www.github.com/ziglang/zig/issues/12156 https://www.github.com/ziglang/zig/issues/14146 https://www.github.com/ziglang/zig/issues/14577 https://www.github.com/ziglang/zig/issues/15898 Source for logic: https://www.github.com/ziglang/zig/issues/14146#issuecomment-2308984936 This is second attempt after https://www.github.com/ziglang/zig/pull/21540 was reverted before. In this version relative paths are handled too. Signed-off-by: Eric Joldasov <[email protected]>
1 parent 8453fb0 commit bd985a9

File tree

1 file changed

+149
-108
lines changed

1 file changed

+149
-108
lines changed

lib/std/zig/system.zig

Lines changed: 149 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,116 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
995995
return max_ver;
996996
}
997997

998+
/// This functions tries to open file located at `start_path`, and then guesses
999+
/// whether it is a script or an ELF file.
1000+
///
1001+
/// If it finds "shebang line", file is considered a script, and logic is re-run
1002+
/// using interpreter referenced after "#!" symbols. If interpreter is itself also a script,
1003+
/// logic becomes recursive until non-script file is found.
1004+
///
1005+
/// If it finds ELF magic sequence, file is considered an ELF file and function returns.
1006+
fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableToFindElfFile}!fs.File {
1007+
var current_path = start_path;
1008+
1009+
// Needed for storing `fs.path.resolve` result.
1010+
var buf: [fs.max_path_bytes + 1]u8 = undefined;
1011+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1012+
const allocator = fbs.allocator();
1013+
1014+
// Needed for storing file content.
1015+
// According to `man 2 execve`:
1016+
//
1017+
// The kernel imposes a maximum length on the text
1018+
// that follows the "#!" characters at the start of a script;
1019+
// characters beyond the limit are ignored.
1020+
// Before Linux 5.1, the limit is 127 characters.
1021+
// Since Linux 5.1, the limit is 255 characters.
1022+
//
1023+
// Tests show that bash and zsh consider 255 as total limit,
1024+
// *including* "#!" characters and ignoring newline.
1025+
// For safety, we set max length as 255 + \n (1).
1026+
var buffer: [255 + 1]u8 = undefined;
1027+
while (true) {
1028+
const file = cwd.openFile(current_path, .{}) catch |err| switch (err) {
1029+
error.NoSpaceLeft => unreachable,
1030+
error.NameTooLong => unreachable,
1031+
error.PathAlreadyExists => unreachable,
1032+
error.SharingViolation => unreachable,
1033+
error.InvalidUtf8 => unreachable, // WASI only
1034+
error.InvalidWtf8 => unreachable, // Windows only
1035+
error.BadPathName => unreachable,
1036+
error.PipeBusy => unreachable,
1037+
error.FileLocksNotSupported => unreachable,
1038+
error.WouldBlock => unreachable,
1039+
error.FileBusy => unreachable, // opened without write permissions
1040+
error.AntivirusInterference => unreachable, // Windows-only error
1041+
1042+
error.IsDir,
1043+
error.NotDir,
1044+
1045+
error.AccessDenied,
1046+
error.DeviceBusy,
1047+
error.FileTooBig,
1048+
error.SymLinkLoop,
1049+
error.ProcessFdQuotaExceeded,
1050+
error.SystemFdQuotaExceeded,
1051+
error.SystemResources,
1052+
1053+
error.FileNotFound,
1054+
error.NetworkNotFound,
1055+
error.NoDevice,
1056+
error.Unexpected,
1057+
=> return error.UnableToFindElfFile,
1058+
};
1059+
var is_elf_file = false;
1060+
defer if (is_elf_file == false) file.close();
1061+
1062+
// Shortest working interpreter path is "#!i" (3)
1063+
// (interpreter is "i", relative to file itself).
1064+
// ELF magic number length is 4.
1065+
const len = preadAtLeast(file, &buffer, 0, 4) catch |err| switch (err) {
1066+
error.UnexpectedEndOfFile => preadAtLeast(file, &buffer, 0, 3) catch
1067+
// If file is shorter than that, it is definitely not ELF file
1068+
// nor file with "shebang" line.
1069+
return error.UnableToFindElfFile,
1070+
else => return error.UnableToFindElfFile,
1071+
};
1072+
const content = buffer[0..len];
1073+
1074+
if (len > 4 and mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1075+
// It is very likely ELF file!
1076+
is_elf_file = true;
1077+
return file;
1078+
} else if (mem.eql(u8, content[0..2], "#!")) {
1079+
// We detected shebang, now parse entire line.
1080+
1081+
// Trim leading "#!", spaces and tabs.
1082+
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1083+
1084+
// This line can have:
1085+
// * Interpreter path only,
1086+
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1087+
// And optionally newline at the end.
1088+
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1089+
1090+
// Separate path and args.
1091+
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1092+
1093+
const interpreter_path = path_maybe_args[0..path_end];
1094+
1095+
// `dirname` can return `null` in two situations:
1096+
// * When path is '/': impossible since it always contain file path.
1097+
// * When path is "some_current_dir_file": use ".".
1098+
const current_dir = fs.path.dirname(current_path) orelse ".";
1099+
current_path = fs.path.resolve(allocator, &.{ current_dir, interpreter_path }) catch return error.UnableToFindElfFile;
1100+
continue;
1101+
} else {
1102+
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1103+
return error.UnableToFindElfFile;
1104+
}
1105+
}
1106+
}
1107+
9981108
/// In the past, this function attempted to use the executable's own binary if it was dynamically
9991109
/// linked to answer both the C ABI question and the dynamic linker question. However, this
10001110
/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
@@ -1003,11 +1113,14 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
10031113
/// the dynamic linker will match that of the compiler binary. Executables with these versions
10041114
/// mismatching will fail to run.
10051115
///
1006-
/// Therefore, this function works the same regardless of whether the compiler binary is
1007-
/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
1008-
/// answer to these questions, or if there is a shebang line, then it chases the referenced
1009-
/// file recursively. If that does not provide the answer, then the function falls back to
1010-
/// defaults.
1116+
/// Therefore, this function now does not inspect the executable's own binary.
1117+
/// Instead, it tries to find `env` program in PATH or in hardcoded location, and uses it
1118+
/// to find suitable ELF file. If `env` program is an executable, work is done and function starts to
1119+
/// inspect inner structure of a file. But if `env` is a script or other non-ELF file, it uses
1120+
/// interpreter path instead and tries to search ELF file again, going recursively in case interpreter
1121+
/// is also a script/non-ELF file.
1122+
///
1123+
/// If nothing was found, then the function falls back to defaults.
10111124
fn detectAbiAndDynamicLinker(
10121125
cpu: Target.Cpu,
10131126
os: Target.Os,
@@ -1075,113 +1188,44 @@ fn detectAbiAndDynamicLinker(
10751188

10761189
const ld_info_list = ld_info_list_buffer[0..ld_info_list_len];
10771190

1078-
// Best case scenario: the executable is dynamically linked, and we can iterate
1079-
// over our own shared objects and find a dynamic linker.
1080-
const elf_file = elf_file: {
1081-
// This block looks for a shebang line in /usr/bin/env,
1082-
// if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
1083-
// doing the same logic recursively in case it finds another shebang line.
1191+
const cwd = fs.cwd();
1192+
1193+
// Algorithm is:
1194+
// 1a) try_path: If PATH is non-empty and `env` file was found in one of the directories, use that.
1195+
// 1b) try_path: If `env` was not found or PATH is empty, try hardcoded path below.
1196+
// 2a) try_hardcoded: If `env` was found in hardcoded location, use that.
1197+
// 2b) try_hardcoded: If `env` was not found, fall back to default ABI and dynamic linker.
1198+
// Source: https://github.com/ziglang/zig/issues/14146#issuecomment-2308984936
1199+
const elf_file = (try_path: {
1200+
const PATH = std.posix.getenv("PATH") orelse break :try_path null;
1201+
var it = mem.tokenizeScalar(u8, PATH, fs.path.delimiter);
1202+
1203+
var buf: [fs.max_path_bytes + 1]u8 = undefined;
1204+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1205+
const allocator = fbs.allocator();
1206+
1207+
while (it.next()) |path| : (fbs.reset()) {
1208+
const start_path = fs.path.join(allocator, &.{ path, "env" }) catch |err| switch (err) {
1209+
error.OutOfMemory => continue,
1210+
};
10841211

1085-
var file_name: []const u8 = switch (os.tag) {
1212+
break :try_path resolveElfFileRecursively(cwd, start_path) catch |err| switch (err) {
1213+
error.UnableToFindElfFile => continue,
1214+
};
1215+
} else break :try_path null;
1216+
} orelse try_hardcoded: {
1217+
const hardcoded_file_name = switch (os.tag) {
10861218
// Since /usr/bin/env is hard-coded into the shebang line of many portable scripts, it's a
10871219
// reasonably reliable path to start with.
10881220
else => "/usr/bin/env",
10891221
// Haiku does not have a /usr root directory.
10901222
.haiku => "/bin/env",
10911223
};
10921224

1093-
// According to `man 2 execve`:
1094-
//
1095-
// The kernel imposes a maximum length on the text
1096-
// that follows the "#!" characters at the start of a script;
1097-
// characters beyond the limit are ignored.
1098-
// Before Linux 5.1, the limit is 127 characters.
1099-
// Since Linux 5.1, the limit is 255 characters.
1100-
//
1101-
// Tests show that bash and zsh consider 255 as total limit,
1102-
// *including* "#!" characters and ignoring newline.
1103-
// For safety, we set max length as 255 + \n (1).
1104-
var buffer: [255 + 1]u8 = undefined;
1105-
while (true) {
1106-
// Interpreter path can be relative on Linux, but
1107-
// for simplicity we are asserting it is an absolute path.
1108-
const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
1109-
error.NoSpaceLeft => unreachable,
1110-
error.NameTooLong => unreachable,
1111-
error.PathAlreadyExists => unreachable,
1112-
error.SharingViolation => unreachable,
1113-
error.InvalidUtf8 => unreachable, // WASI only
1114-
error.InvalidWtf8 => unreachable, // Windows only
1115-
error.BadPathName => unreachable,
1116-
error.PipeBusy => unreachable,
1117-
error.FileLocksNotSupported => unreachable,
1118-
error.WouldBlock => unreachable,
1119-
error.FileBusy => unreachable, // opened without write permissions
1120-
error.AntivirusInterference => unreachable, // Windows-only error
1121-
1122-
error.IsDir,
1123-
error.NotDir,
1124-
error.AccessDenied,
1125-
error.NoDevice,
1126-
error.FileNotFound,
1127-
error.NetworkNotFound,
1128-
error.FileTooBig,
1129-
error.Unexpected,
1130-
=> |e| {
1131-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1132-
return defaultAbiAndDynamicLinker(cpu, os, query);
1133-
},
1134-
1135-
else => |e| return e,
1136-
};
1137-
var is_elf_file = false;
1138-
defer if (is_elf_file == false) file.close();
1139-
1140-
// Shortest working interpreter path is "#!/i" (4)
1141-
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
1142-
// ELF magic number length is also 4.
1143-
//
1144-
// If file is shorter than that, it is definitely not ELF file
1145-
// nor file with "shebang" line.
1146-
const min_len: usize = 4;
1147-
1148-
const len = preadAtLeast(file, &buffer, 0, min_len) catch |err| switch (err) {
1149-
error.UnexpectedEndOfFile,
1150-
error.UnableToReadElfFile,
1151-
error.ProcessNotFound,
1152-
=> return defaultAbiAndDynamicLinker(cpu, os, query),
1153-
1154-
else => |e| return e,
1155-
};
1156-
const content = buffer[0..len];
1157-
1158-
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1159-
// It is very likely ELF file!
1160-
is_elf_file = true;
1161-
break :elf_file file;
1162-
} else if (mem.eql(u8, content[0..2], "#!")) {
1163-
// We detected shebang, now parse entire line.
1164-
1165-
// Trim leading "#!", spaces and tabs.
1166-
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1167-
1168-
// This line can have:
1169-
// * Interpreter path only,
1170-
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1171-
// And optionally newline at the end.
1172-
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1173-
1174-
// Separate path and args.
1175-
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1176-
1177-
file_name = path_maybe_args[0..path_end];
1178-
continue;
1179-
} else {
1180-
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1181-
return defaultAbiAndDynamicLinker(cpu, os, query);
1182-
}
1183-
}
1184-
};
1225+
break :try_hardcoded resolveElfFileRecursively(cwd, hardcoded_file_name) catch |err| switch (err) {
1226+
error.UnableToFindElfFile => null,
1227+
};
1228+
}) orelse return defaultAbiAndDynamicLinker(cpu, os, query);
11851229
defer elf_file.close();
11861230

11871231
// TODO: inline this function and combine the buffer we already read above to find
@@ -1205,10 +1249,7 @@ fn detectAbiAndDynamicLinker(
12051249
error.UnexpectedEndOfFile,
12061250
error.NameTooLong,
12071251
// Finally, we fall back on the standard path.
1208-
=> |e| {
1209-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1210-
return defaultAbiAndDynamicLinker(cpu, os, query);
1211-
},
1252+
=> defaultAbiAndDynamicLinker(cpu, os, query),
12121253
};
12131254
}
12141255

0 commit comments

Comments
 (0)