E.g. this allows `finalizer` inlining in the following case:
```julia
mutable struct ForeignBuffer{T}
const ptr::Ptr{T}
end
const foreign_buffer_finalized = Ref(false)
function foreign_alloc(::Type{T}, length) where T
ptr = Libc.malloc(sizeof(T) * length)
ptr = Base.unsafe_convert(Ptr{T}, ptr)
obj = ForeignBuffer{T}(ptr)
return finalizer(obj) do obj
Base.@assume_effects :notaskstate :nothrow
foreign_buffer_finalized[] = true
Libc.free(obj.ptr)
end
end
function f_EA_finalizer(N::Int)
workspace = foreign_alloc(Float64, N)
GC.@preserve workspace begin
(;ptr) = workspace
Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr)
end
end
```
```julia
julia> @code_typed f_EA_finalizer(42)
CodeInfo(
1 ── %1 = Base.mul_int(8, N)::Int64
│ %2 = Core.lshr_int(%1, 63)::Int64
│ %3 = Core.trunc_int(Core.UInt8, %2)::UInt8
│ %4 = Core.eq_int(%3, 0x01)::Bool
└─── goto #3 if not %4
2 ── invoke Core.throw_inexacterror(:convert::Symbol, UInt64::Type, %1::Int64)::Union{}
└─── unreachable
3 ── goto #4
4 ── %9 = Core.bitcast(Core.UInt64, %1)::UInt64
└─── goto #5
5 ── goto #6
6 ── goto #7
7 ── goto #8
8 ── %14 = $(Expr(:foreigncall, :(:malloc), Ptr{Nothing}, svec(UInt64), 0, :(:ccall), :(%9), :(%9)))::Ptr{Nothing}
└─── goto #9
9 ── %16 = Base.bitcast(Ptr{Float64}, %14)::Ptr{Float64}
│ %17 = %new(ForeignBuffer{Float64}, %16)::ForeignBuffer{Float64}
└─── goto #10
10 ─ %19 = $(Expr(:gc_preserve_begin, :(%17)))
│ %20 = Base.getfield(%17, :ptr)::Ptr{Float64}
│ invoke Main.println(Main.devnull::Base.DevNull, "ptr = "::String, %20::Ptr{Float64})::Nothing
│ $(Expr(:gc_preserve_end, :(%19)))
│ %23 = Main.foreign_buffer_finalized::Base.RefValue{Bool}
│ Base.setfield!(%23, :x, true)::Bool
│ %25 = Base.getfield(%17, :ptr)::Ptr{Float64}
│ %26 = Base.bitcast(Ptr{Nothing}, %25)::Ptr{Nothing}
│ $(Expr(:foreigncall, :(:free), Nothing, svec(Ptr{Nothing}), 0, :(:ccall), :(%26), :(%25)))::Nothing
└─── return nothing
) => Nothing
```
However, this is still a WIP. Before merging, I want to improve EA's
precision a bit and at least fix the test case that is currently marked as
`broken`. I also need to check its impact on compiler performance.
Additionally, I believe this feature is not yet practical.
In particular, there is still significant room for improvement in the
following areas:
- EA's interprocedural capabilities: currently EA is performed ad-hoc
for limited frames because of latency reasons, which significantly
reduces its precision in the presence of interprocedural calls.
- Relaxing the `:nothrow` check for finalizer inlining: the current
algorithm requires `:nothrow`-ness on all paths from the allocation of
the mutable struct to its last use, which is not practical for
real-world cases. Even when `:nothrow` cannot be guaranteed, auxiliary
optimizations such as inserting a `finalize` call after the last use
might still be possible.