Skip to content

Commit 6014e7d

Browse files
committed
Don't allow reinterprets that would expose padding
In #25908 it was noted that reinterpreting structures with paddings exposes undef LLVM values to user code. This is problematic, because an LLVM undef value is quite dangerous (it can have a different value at every use, e.g. for `a::Bool` undef, we can have `a || !a == true`. There are proposal in LLVM to create values that are merely arbitrary (but the same at every use), but that capability does not currently exist in LLVM. As such, we should try hard to prevent `undef` showing up in a user-visible way. There are several ways to fix this: 1. Wait until LLVM comes up with a safer `undef` and have the value merely be arbitrary, but not dangerous. 2. Always guarantee that padding bytes will be 0. 3. For contiguous-memory arrays, guarantee that we end up with the underlying bytes from that array. However, for now, I think don't think we should make a choice here. Issues like #21912, may play into the consideration, and I think we should be able to reserve making a choice until that point. So what this PR does is only allow reinterprets when they would not expose padding. This should hopefully cover the most common use cases of reinterpret: - Reinterpreting a vector or matrix of values to StaticVectors of the same element type. These should generally always have compatiable padding (if not, reinterpret was likely the wrong API to use). - Reinterpreting from a Vector{UInt8} to a vector of structs (that may have padding). This PR allows this for reading (but not for writing). Both cases are generally better served by the IO APIs, but hopefully this should still allow the common cases. Fixes #25908
1 parent a60119b commit 6014e7d

File tree

5 files changed

+148
-6
lines changed

5 files changed

+148
-6
lines changed

base/atomics.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ inttype(::Type{Float32}) = Int32
337337
inttype(::Type{Float64}) = Int64
338338

339339

340-
alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T))
340+
gc_alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T))
341341

342342
# All atomic operations have acquire and/or release semantics, depending on
343343
# whether the load or store values. Most of the time, this is what one wants
@@ -350,13 +350,13 @@ for typ in atomictypes
350350
@eval getindex(x::Atomic{$typ}) =
351351
llvmcall($"""
352352
%ptr = inttoptr i$WORD_SIZE %0 to $lt*
353-
%rv = load atomic $rt %ptr acquire, align $(alignment(typ))
353+
%rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
354354
ret $lt %rv
355355
""", $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
356356
@eval setindex!(x::Atomic{$typ}, v::$typ) =
357357
llvmcall($"""
358358
%ptr = inttoptr i$WORD_SIZE %0 to $lt*
359-
store atomic $lt %1, $lt* %ptr release, align $(alignment(typ))
359+
store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
360360
ret void
361361
""", Cvoid, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
362362

base/iterators.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,9 @@ mutable struct Stateful{T, VS}
10301030
# A bit awkward right now, but adapted to the new iteration protocol
10311031
nextvalstate::Union{VS, Nothing}
10321032
taken::Int
1033+
@inline function Stateful{<:Any, Any}(itr::T) where {T}
1034+
new{T, Any}(itr, iterate(itr), 0)
1035+
end
10331036
@inline function Stateful(itr::T) where {T}
10341037
VS = approx_iter_type(T)
10351038
new{T, VS}(itr, iterate(itr)::VS, 0)

base/reinterpretarray.jl

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ the first dimension.
77
"""
88
struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
99
parent::A
10+
readable::Bool
11+
writable::Bool
1012
global reinterpret
1113
function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
1214
function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U}
@@ -31,7 +33,28 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
3133
dim = size(a)[1]
3234
rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim)
3335
end
34-
new{T, N, S, A}(a)
36+
readable = array_subpadding(T, S)
37+
writable = array_subpadding(S, T)
38+
new{T, N, S, A}(a, readable, writable)
39+
end
40+
end
41+
42+
function check_readable(a::ReinterpretArray{T, N, S} where N) where {T,S}
43+
# See comment in check_writable
44+
if !a.readable && !array_subpadding(T, S)
45+
throw(PaddingError(T, S))
46+
end
47+
end
48+
49+
function check_writable(a::ReinterpretArray{T, N, S} where N) where {T,S}
50+
# `array_subpadding` is relatively expensive (compared to a simple arrayref),
51+
# so it is cached in the array. However, it is computable at compile time if,
52+
# inference has the types available. By using this form of the check, we can
53+
# get the best of both worlds for the success case. If the types were not
54+
# available to inference, we simply need to check the field (relatively cheap)
55+
# and if they were we should be able to fold this check away entirely.
56+
if !a.writable && !array_subpadding(S, T)
57+
throw(PaddingError(T, S))
3558
end
3659
end
3760

@@ -53,10 +76,12 @@ unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} =
5376
@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1]
5477

5578
@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
79+
check_readable(a)
5680
_getindex_ra(a, inds[1], tail(inds))
5781
end
5882

5983
@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, i::Int) where {T,N,S}
84+
check_readable(a)
6085
if isa(IndexStyle(a), IndexLinear)
6186
return _getindex_ra(a, i, ())
6287
end
@@ -102,10 +127,12 @@ end
102127
@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v)
103128

104129
@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
130+
check_writable(a)
105131
_setindex_ra!(a, v, inds[1], tail(inds))
106132
end
107133

108134
@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, i::Int) where {T,N,S}
135+
check_writable(a)
109136
if isa(IndexStyle(a), IndexLinear)
110137
return _setindex_ra!(a, v, i, ())
111138
end
@@ -165,3 +192,97 @@ end
165192
end
166193
return a
167194
end
195+
196+
# Padding
197+
struct Padding
198+
offset::Int
199+
size::Int
200+
end
201+
function intersect(p1::Padding, p2::Padding)
202+
start = max(p1.offset, p2.offset)
203+
stop = min(p1.offset + p1.size, p2.offset + p2.size)
204+
Padding(start, max(0, stop-start))
205+
end
206+
207+
struct PaddingError
208+
S::Type
209+
T::Type
210+
end
211+
212+
function showerror(io::IO, p::PaddingError)
213+
print(io, "Padding of type $(p.S) is not compatible with type $(p.T).")
214+
end
215+
216+
"""
217+
CyclePadding(padding, total_size)
218+
219+
Cylces an iterator of `Padding` structs, restarting the padding at `total_size`.
220+
E.g. if `padding` is all the padding in a struct and `total_size` is the total
221+
aligned size of that array, `CyclePadding` will correspond to the padding in an
222+
infinite vector of such structs.
223+
"""
224+
struct CyclePadding{P}
225+
padding::P
226+
total_size::Int
227+
end
228+
eltype(::Type{<:CyclePadding}) = Padding
229+
IteratorSize(::Type{<:CyclePadding}) = IsInfinite()
230+
isempty(cp::CyclePadding) = isempty(cp.padding)
231+
function iterate(cp::CyclePadding)
232+
y = iterate(cp.padding)
233+
y === nothing && return nothing
234+
y[1], (0, y[2])
235+
end
236+
function iterate(cp::CyclePadding, state::Tuple)
237+
y = iterate(cp.padding, tail(state)...)
238+
y === nothing && return iterate(cp, (state[1]+cp.total_size,))
239+
Padding(y[1].offset+state[1], y[1].size), (state[1], tail(y)...)
240+
end
241+
242+
"""
243+
Compute the location of padding in a type.
244+
"""
245+
function padding(T)
246+
padding = Padding[]
247+
last_end::Int = 0
248+
for i = 1:fieldcount(T)
249+
offset = fieldoffset(T, i)
250+
fT = fieldtype(T, i)
251+
if offset != last_end
252+
push!(padding, Padding(offset, offset-last_end))
253+
end
254+
last_end = offset + sizeof(fT)
255+
end
256+
padding
257+
end
258+
259+
function CyclePadding(T::DataType)
260+
a, s = datatype_alignment(T), sizeof(T)
261+
as = s + (a - (s % a)) % a
262+
pad = padding(T)
263+
s != as && push!(pad, Padding(s, as - s))
264+
CyclePadding(pad, as)
265+
end
266+
267+
using .Iterators: Stateful
268+
@pure function array_subpadding(S, T)
269+
checked_size = 0
270+
lcm_size = lcm(sizeof(S), sizeof(T))
271+
s, t = Stateful{<:Any, Any}(CyclePadding(S)),
272+
Stateful{<:Any, Any}(CyclePadding(T))
273+
isempty(t) && return true
274+
isempty(s) && return false
275+
while checked_size < lcm_size
276+
# Take padding in T
277+
pad = popfirst!(t)
278+
# See if there's corresponding padding in S
279+
while true
280+
ps = peek(s)
281+
ps.offset > pad.offset && return false
282+
intersect(ps, pad) == pad && break
283+
popfirst!(s)
284+
end
285+
checked_size = pad.offset + pad.size
286+
end
287+
return true
288+
end

base/sysimg.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,6 @@ include("array.jl")
137137
include("abstractarray.jl")
138138
include("subarray.jl")
139139
include("views.jl")
140-
include("reinterpretarray.jl")
141-
142140

143141
# ## dims-type-converting Array constructors for convenience
144142
# type and dimensionality specified, accepting dims as series of Integers
@@ -205,6 +203,7 @@ include("reduce.jl")
205203

206204
## core structures
207205
include("reshapedarray.jl")
206+
include("reinterpretarray.jl")
208207
include("bitarray.jl")
209208
include("bitset.jl")
210209

test/reinterpretarray.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,22 @@ let a = fill(1.0, 5, 3)
6868
@test all(a[1:2:5,:] .=== reinterpret(Float64, [Int64(4)])[1])
6969
@test all(r .=== Int64(4))
7070
end
71+
72+
# Error on reinterprets that would expose padding
73+
struct S1
74+
a::Int8
75+
b::Int64
76+
end
77+
78+
struct S2
79+
a::Int16
80+
b::Int64
81+
end
82+
83+
A1 = S1[S1(0, 0)]
84+
A2 = S2[S2(0, 0)]
85+
@test reinterpret(S1, A2)[1] == S1(0, 0)
86+
@test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
87+
@test_throws Base.PaddingError reinterpret(S2, A1)[1]
88+
reinterpret(S2, A1)[1] = S2(1, 2)
89+
@test A1[1] == S1(1, 2)

0 commit comments

Comments
 (0)