Skip to content

Commit 0ec6b2d

Browse files
committed
compiler: simplify generic functions, fix issues with inline calls
The original motivation here was to fix regressions caused by #22414. However, while working on this, I ended up discussing a language simplification with Andrew, which changes things a little from how they worked before #22414. The main user-facing change here is that any reference to a prior function parameter, even if potentially comptime-known at the usage site or even not analyzed, now makes a function generic. This applies even if the parameter being referenced is not a `comptime` parameter, since it could still be populated when performing an inline call. This is a breaking language change. The detection of this is done in AstGen; when evaluating a parameter type or return type, we track whether it referenced any prior parameter, and if so, we mark this type as being "generic" in ZIR. This will cause Sema to not evaluate it until the time of instantiation or inline call. A lovely consequence of this from an implementation perspective is that it eliminates the need for most of the "generic poison" system. In particular, `error.GenericPoison` is now completely unnecessary, because we identify generic expressions earlier in the pipeline; this simplifies the compiler and avoids redundant work. This also entirely eliminates the concept of the "generic poison value". The only remnant of this system is the "generic poison type" (`Type.generic_poison` and `InternPool.Index.generic_poison_type`). This type is used in two places: * During semantic analysis, to represent an unknown result type. * When storing generic function types, to represent a generic parameter/return type. It's possible that these use cases should instead use `.none`, but I leave that investigation to a future adventurer. One last thing. Prior to #22414, inline calls were a little inefficient, because they re-evaluated even non-generic parameter types whenever they were called. Changing this behavior is what ultimately led to #22538. Well, because the new logic will mark a type expression as generic if there is any change its resolved type could differ in an inline call, this redundant work is unnecessary! So, this is another way in which the new design reduces redundant work and complexity. Resolves: #22494 Resolves: #22532 Resolves: #22538
1 parent 216e0f3 commit 0ec6b2d

23 files changed

+265
-378
lines changed

lib/std/zig/AstGen.zig

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ fn setExtra(astgen: *AstGen, index: usize, extra: anytype) void {
107107
Zir.Inst.SwitchBlock.Bits,
108108
Zir.Inst.SwitchBlockErrUnion.Bits,
109109
Zir.Inst.FuncFancy.Bits,
110+
Zir.Inst.Param.Type,
111+
Zir.Inst.Func.RetTy,
110112
=> @bitCast(@field(extra, field.name)),
111113

112114
else => @compileError("bad field type"),
@@ -1384,7 +1386,7 @@ fn fnProtoExprInner(
13841386
const tag: Zir.Inst.Tag = if (is_comptime) .param_comptime else .param;
13851387
// We pass `prev_param_insts` as `&.{}` here because a function prototype can't refer to previous
13861388
// arguments (we haven't set up scopes here).
1387-
const param_inst = try block_scope.addParam(&param_gz, &.{}, tag, name_token, param_name);
1389+
const param_inst = try block_scope.addParam(&param_gz, &.{}, false, tag, name_token, param_name);
13881390
assert(param_inst_expected == param_inst);
13891391
}
13901392
}
@@ -1416,6 +1418,7 @@ fn fnProtoExprInner(
14161418

14171419
.ret_param_refs = &.{},
14181420
.param_insts = &.{},
1421+
.ret_ty_is_generic = false,
14191422

14201423
.param_block = block_inst,
14211424
.body_gz = null,
@@ -4336,6 +4339,9 @@ fn fnDeclInner(
43364339
// Note that the capacity here may not be sufficient, as this does not include `anytype` parameters.
43374340
var param_insts: std.ArrayListUnmanaged(Zir.Inst.Index) = try .initCapacity(astgen.arena, fn_proto.ast.params.len);
43384341

4342+
// We use this as `is_used_or_discarded` to figure out if parameters / return types are generic.
4343+
var any_param_used = false;
4344+
43394345
var noalias_bits: u32 = 0;
43404346
var params_scope = scope;
43414347
const is_var_args = is_var_args: {
@@ -4409,16 +4415,18 @@ fn fnDeclInner(
44094415
} else param: {
44104416
const param_type_node = param.type_expr;
44114417
assert(param_type_node != 0);
4418+
any_param_used = false; // we will check this later
44124419
var param_gz = decl_gz.makeSubBlock(scope);
44134420
defer param_gz.unstack();
44144421
const param_type = try fullBodyExpr(&param_gz, params_scope, coerced_type_ri, param_type_node, .normal);
44154422
const param_inst_expected: Zir.Inst.Index = @enumFromInt(astgen.instructions.len + 1);
44164423
_ = try param_gz.addBreakWithSrcNode(.break_inline, param_inst_expected, param_type, param_type_node);
4424+
const param_type_is_generic = any_param_used;
44174425

44184426
const main_tokens = tree.nodes.items(.main_token);
44194427
const name_token = param.name_token orelse main_tokens[param_type_node];
44204428
const tag: Zir.Inst.Tag = if (is_comptime) .param_comptime else .param;
4421-
const param_inst = try decl_gz.addParam(&param_gz, param_insts.items, tag, name_token, param_name);
4429+
const param_inst = try decl_gz.addParam(&param_gz, param_insts.items, param_type_is_generic, tag, name_token, param_name);
44224430
assert(param_inst_expected == param_inst);
44234431
break :param param_inst.toRef();
44244432
};
@@ -4433,6 +4441,7 @@ fn fnDeclInner(
44334441
.inst = param_inst,
44344442
.token_src = param.name_token.?,
44354443
.id_cat = .@"function parameter",
4444+
.is_used_or_discarded = &any_param_used,
44364445
};
44374446
params_scope = &sub_scope.base;
44384447
try param_insts.append(astgen.arena, param_inst.toIndex().?);
@@ -4446,6 +4455,7 @@ fn fnDeclInner(
44464455

44474456
var ret_gz = decl_gz.makeSubBlock(params_scope);
44484457
defer ret_gz.unstack();
4458+
any_param_used = false; // we will check this later
44494459
const ret_ref: Zir.Inst.Ref = inst: {
44504460
// Parameters are in scope for the return type, so we use `params_scope` here.
44514461
// The calling convention will not have parameters in scope, so we'll just use `scope`.
@@ -4459,6 +4469,7 @@ fn fnDeclInner(
44594469
break :inst inst;
44604470
};
44614471
const ret_body_param_refs = try astgen.fetchRemoveRefEntries(param_insts.items);
4472+
const ret_ty_is_generic = any_param_used;
44624473

44634474
// We're jumping back in source, so restore the cursor.
44644475
astgen.restoreSourceCursor(saved_cursor);
@@ -4556,6 +4567,7 @@ fn fnDeclInner(
45564567
.ret_ref = ret_ref,
45574568
.ret_gz = &ret_gz,
45584569
.ret_param_refs = ret_body_param_refs,
4570+
.ret_ty_is_generic = ret_ty_is_generic,
45594571
.lbrace_line = lbrace_line,
45604572
.lbrace_column = lbrace_column,
45614573
.param_block = decl_inst,
@@ -5028,6 +5040,7 @@ fn testDecl(
50285040

50295041
.ret_param_refs = &.{},
50305042
.param_insts = &.{},
5043+
.ret_ty_is_generic = false,
50315044

50325045
.lbrace_line = lbrace_line,
50335046
.lbrace_column = lbrace_column,
@@ -8546,6 +8559,8 @@ fn localVarRef(
85468559
local_val.used = ident_token;
85478560
}
85488561

8562+
if (local_val.is_used_or_discarded) |ptr| ptr.* = true;
8563+
85498564
const value_inst = if (num_namespaces_out != 0) try tunnelThroughClosure(
85508565
gz,
85518566
ident,
@@ -11876,6 +11891,7 @@ const Scope = struct {
1187611891
/// Track the identifier where it is discarded, like this `_ = foo;`.
1187711892
/// 0 means never discarded.
1187811893
discarded: Ast.TokenIndex = 0,
11894+
is_used_or_discarded: ?*bool = null,
1187911895
/// String table index.
1188011896
name: Zir.NullTerminatedString,
1188111897
id_cat: IdCat,
@@ -12223,6 +12239,7 @@ const GenZir = struct {
1222312239

1222412240
ret_param_refs: []Zir.Inst.Index,
1222512241
param_insts: []Zir.Inst.Index, // refs to params in `body_gz` should still be in `astgen.ref_table`
12242+
ret_ty_is_generic: bool,
1222612243

1222712244
cc_ref: Zir.Inst.Ref,
1222812245
ret_ref: Zir.Inst.Ref,
@@ -12322,6 +12339,8 @@ const GenZir = struct {
1232212339

1232312340
.has_cc_body = cc_body.len != 0,
1232412341
.has_ret_ty_body = ret_body.len != 0,
12342+
12343+
.ret_ty_is_generic = args.ret_ty_is_generic,
1232512344
},
1232612345
});
1232712346

@@ -12372,7 +12391,10 @@ const GenZir = struct {
1237212391

1237312392
const payload_index = astgen.addExtraAssumeCapacity(Zir.Inst.Func{
1237412393
.param_block = args.param_block,
12375-
.ret_body_len = ret_body_len,
12394+
.ret_ty = .{
12395+
.body_len = @intCast(ret_body_len),
12396+
.is_generic = args.ret_ty_is_generic,
12397+
},
1237612398
.body_len = body_len,
1237712399
});
1237812400
const zir_datas = astgen.instructions.items(.data);
@@ -12535,6 +12557,7 @@ const GenZir = struct {
1253512557
/// Previous parameters, which might be referenced in `param_gz` (the new parameter type).
1253612558
/// `ref`s of these instructions will be put into this param's type body, and removed from `AstGen.ref_table`.
1253712559
prev_param_insts: []const Zir.Inst.Index,
12560+
ty_is_generic: bool,
1253812561
tag: Zir.Inst.Tag,
1253912562
/// Absolute token index. This function does the conversion to Decl offset.
1254012563
abs_tok_index: Ast.TokenIndex,
@@ -12548,7 +12571,10 @@ const GenZir = struct {
1254812571

1254912572
const payload_index = gz.astgen.addExtraAssumeCapacity(Zir.Inst.Param{
1255012573
.name = name,
12551-
.body_len = @intCast(body_len),
12574+
.type = .{
12575+
.body_len = @intCast(body_len),
12576+
.is_generic = ty_is_generic,
12577+
},
1255212578
});
1255312579
gz.astgen.appendBodyWithFixupsExtraRefsArrayList(&gz.astgen.extra, param_body, prev_param_insts);
1255412580
param_gz.unstack();

lib/std/zig/Zir.zig

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ pub fn extraData(code: Zir, comptime T: type, index: usize) ExtraData(T) {
8989
Inst.SwitchBlockErrUnion.Bits,
9090
Inst.FuncFancy.Bits,
9191
Inst.Declaration.Flags,
92+
Inst.Param.Type,
93+
Inst.Func.RetTy,
9294
=> @bitCast(code.extra[i]),
9395

9496
else => @compileError("bad field type"),
@@ -2126,7 +2128,7 @@ pub const Inst = struct {
21262128
ref_start_index = static_len,
21272129
_,
21282130

2129-
pub const static_len = 71;
2131+
pub const static_len = 70;
21302132

21312133
pub fn toRef(i: Index) Inst.Ref {
21322134
return @enumFromInt(@intFromEnum(Index.ref_start_index) + @intFromEnum(i));
@@ -2229,7 +2231,6 @@ pub const Inst = struct {
22292231
bool_true,
22302232
bool_false,
22312233
empty_tuple,
2232-
generic_poison,
22332234

22342235
/// This Ref does not correspond to any ZIR instruction or constant
22352236
/// value and may instead be used as a sentinel to indicate null.
@@ -2472,24 +2473,31 @@ pub const Inst = struct {
24722473
};
24732474

24742475
/// Trailing:
2475-
/// if (ret_body_len == 1) {
2476+
/// if (ret_ty.body_len == 1) {
24762477
/// 0. return_type: Ref
24772478
/// }
2478-
/// if (ret_body_len > 1) {
2479-
/// 1. return_type: Index // for each ret_body_len
2479+
/// if (ret_ty.body_len > 1) {
2480+
/// 1. return_type: Index // for each ret_ty.body_len
24802481
/// }
24812482
/// 2. body: Index // for each body_len
24822483
/// 3. src_locs: SrcLocs // if body_len != 0
24832484
/// 4. proto_hash: std.zig.SrcHash // if body_len != 0; hash of function prototype
24842485
pub const Func = struct {
2485-
/// If this is 0 it means a void return type.
2486-
/// If this is 1 it means return_type is a simple Ref
2487-
ret_body_len: u32,
2486+
ret_ty: RetTy,
24882487
/// Points to the block that contains the param instructions for this function.
24892488
/// If this is a `declaration`, it refers to the declaration's value body.
24902489
param_block: Index,
24912490
body_len: u32,
24922491

2492+
pub const RetTy = packed struct(u32) {
2493+
/// 0 means `void`.
2494+
/// 1 means the type is a simple `Ref`.
2495+
/// Otherwise, the length of a trailing body.
2496+
body_len: u31,
2497+
/// Whether the return type is generic, i.e. refers to one or more previous parameters.
2498+
is_generic: bool,
2499+
};
2500+
24932501
pub const SrcLocs = struct {
24942502
/// Line index in the source file relative to the parent decl.
24952503
lbrace_line: u32,
@@ -2539,7 +2547,8 @@ pub const Inst = struct {
25392547
has_ret_ty_ref: bool,
25402548
has_ret_ty_body: bool,
25412549
has_any_noalias: bool,
2542-
_: u24 = undefined,
2550+
ret_ty_is_generic: bool,
2551+
_: u23 = undefined,
25432552
};
25442553
};
25452554

@@ -3708,8 +3717,14 @@ pub const Inst = struct {
37083717
pub const Param = struct {
37093718
/// Null-terminated string index.
37103719
name: NullTerminatedString,
3711-
/// The body contains the type of the parameter.
3712-
body_len: u32,
3720+
type: Type,
3721+
3722+
pub const Type = packed struct(u32) {
3723+
/// The body contains the type of the parameter.
3724+
body_len: u31,
3725+
/// Whether the type is generic, i.e. refers to one or more previous parameters.
3726+
is_generic: bool,
3727+
};
37133728
};
37143729

37153730
/// Trailing:
@@ -4492,19 +4507,19 @@ fn findTrackableInner(
44924507

44934508
if (extra.data.body_len == 0) {
44944509
// This is just a prototype. No need to track.
4495-
assert(extra.data.ret_body_len < 2);
4510+
assert(extra.data.ret_ty.body_len < 2);
44964511
return;
44974512
}
44984513

44994514
assert(contents.func_decl == null);
45004515
contents.func_decl = inst;
45014516

45024517
var extra_index: usize = extra.end;
4503-
switch (extra.data.ret_body_len) {
4518+
switch (extra.data.ret_ty.body_len) {
45044519
0 => {},
45054520
1 => extra_index += 1,
45064521
else => {
4507-
const body = zir.bodySlice(extra_index, extra.data.ret_body_len);
4522+
const body = zir.bodySlice(extra_index, extra.data.ret_ty.body_len);
45084523
extra_index += body.len;
45094524
try zir.findTrackableBody(gpa, contents, defers, body);
45104525
},
@@ -4595,7 +4610,7 @@ fn findTrackableInner(
45954610
.param, .param_comptime => {
45964611
const inst_data = datas[@intFromEnum(inst)].pl_tok;
45974612
const extra = zir.extraData(Inst.Param, inst_data.payload_index);
4598-
const body = zir.bodySlice(extra.end, extra.data.body_len);
4613+
const body = zir.bodySlice(extra.end, extra.data.type.body_len);
45994614
try zir.findTrackableBody(gpa, contents, defers, body);
46004615
},
46014616

@@ -4738,6 +4753,7 @@ pub const FnInfo = struct {
47384753
ret_ty_body: []const Inst.Index,
47394754
body: []const Inst.Index,
47404755
ret_ty_ref: Zir.Inst.Ref,
4756+
ret_ty_is_generic: bool,
47414757
total_params_len: u32,
47424758
inferred_error_set: bool,
47434759
};
@@ -4779,6 +4795,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
47794795
body: []const Inst.Index,
47804796
ret_ty_ref: Inst.Ref,
47814797
ret_ty_body: []const Inst.Index,
4798+
ret_ty_is_generic: bool,
47824799
ies: bool,
47834800
} = switch (tags[@intFromEnum(fn_inst)]) {
47844801
.func, .func_inferred => |tag| blk: {
@@ -4789,7 +4806,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
47894806
var ret_ty_ref: Inst.Ref = .none;
47904807
var ret_ty_body: []const Inst.Index = &.{};
47914808

4792-
switch (extra.data.ret_body_len) {
4809+
switch (extra.data.ret_ty.body_len) {
47934810
0 => {
47944811
ret_ty_ref = .void_type;
47954812
},
@@ -4798,7 +4815,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
47984815
extra_index += 1;
47994816
},
48004817
else => {
4801-
ret_ty_body = zir.bodySlice(extra_index, extra.data.ret_body_len);
4818+
ret_ty_body = zir.bodySlice(extra_index, extra.data.ret_ty.body_len);
48024819
extra_index += ret_ty_body.len;
48034820
},
48044821
}
@@ -4811,6 +4828,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
48114828
.ret_ty_ref = ret_ty_ref,
48124829
.ret_ty_body = ret_ty_body,
48134830
.body = body,
4831+
.ret_ty_is_generic = extra.data.ret_ty.is_generic,
48144832
.ies = tag == .func_inferred,
48154833
};
48164834
},
@@ -4848,6 +4866,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
48484866
.ret_ty_ref = ret_ty_ref,
48494867
.ret_ty_body = ret_ty_body,
48504868
.body = body,
4869+
.ret_ty_is_generic = extra.data.bits.ret_ty_is_generic,
48514870
.ies = extra.data.bits.is_inferred_error,
48524871
};
48534872
},
@@ -4870,6 +4889,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
48704889
.ret_ty_ref = info.ret_ty_ref,
48714890
.body = info.body,
48724891
.total_params_len = total_params_len,
4892+
.ret_ty_is_generic = info.ret_ty_is_generic,
48734893
.inferred_error_set = info.ies,
48744894
};
48754895
}
@@ -4967,7 +4987,7 @@ pub fn getAssociatedSrcHash(zir: Zir, inst: Zir.Inst.Index) ?std.zig.SrcHash {
49674987
return null;
49684988
}
49694989
const extra_index = extra.end +
4970-
extra.data.ret_body_len +
4990+
extra.data.ret_ty.body_len +
49714991
extra.data.body_len +
49724992
@typeInfo(Inst.Func.SrcLocs).@"struct".fields.len;
49734993
return @bitCast([4]u32{

src/Air.zig

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,6 @@ pub const Inst = struct {
10041004
bool_true = @intFromEnum(InternPool.Index.bool_true),
10051005
bool_false = @intFromEnum(InternPool.Index.bool_false),
10061006
empty_tuple = @intFromEnum(InternPool.Index.empty_tuple),
1007-
generic_poison = @intFromEnum(InternPool.Index.generic_poison),
10081007

10091008
/// This Ref does not correspond to any AIR instruction or constant
10101009
/// value and may instead be used as a sentinel to indicate null.

src/Air/types_resolved.zig

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -455,9 +455,8 @@ pub fn checkVal(val: Value, zcu: *Zcu) bool {
455455

456456
pub fn checkType(ty: Type, zcu: *Zcu) bool {
457457
const ip = &zcu.intern_pool;
458-
return switch (ty.zigTypeTagOrPoison(zcu) catch |err| switch (err) {
459-
error.GenericPoison => return true,
460-
}) {
458+
if (ty.isGenericPoison()) return true;
459+
return switch (ty.zigTypeTag(zcu)) {
461460
.type,
462461
.void,
463462
.bool,

0 commit comments

Comments
 (0)