Skip to content

Commit 3b5d398

Browse files
authored
Fix performance problem pooling strings (JuliaArrays#63)
* optimize pooling of stringarray * minor change * test pooling pooled arrays
1 parent 30c673f commit 3b5d398

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

src/StructArrays.jl

+7-1
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,16 @@ function __init__()
1818
Requires.@require Tables="bd369af6-aec1-5ad0-b16a-f7cc5008161c" include("tables.jl")
1919
Requires.@require WeakRefStrings="ea10d353-3f73-51f8-a26c-33c1cb351aa5" begin
2020
fastpermute!(v::WeakRefStrings.StringArray, p::AbstractVector) = permute!(v, p)
21+
function to_weakrefs(a::WeakRefStrings.StringArray{String})
22+
convert(WeakRefStrings.StringArray{WeakRefStrings.WeakRefString{UInt8}}, a)
23+
end
2124
@inline function roweq(a::WeakRefStrings.StringArray{String}, i, j)
22-
weaksa = convert(WeakRefStrings.StringArray{WeakRefStrings.WeakRefString{UInt8}}, a)
25+
weaksa = to_weakrefs(a)
2326
@inbounds isequal(weaksa[i], weaksa[j])
2427
end
28+
function pool(v::WeakRefStrings.StringArray{String}, condition = !isbitstypeeltype)
29+
condition(v) ? map(String, PooledArray(to_weakrefs(v))) : v
30+
end
2531
end
2632
end
2733

src/sort.jl

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ function Base.permute!(c::StructArray, p::AbstractVector)
99
return c
1010
end
1111

12-
pool(v::AbstractArray, condition = !isbitstypeeltype) = condition(v) ? convert(PooledArray, v) : v
12+
pool(v::PooledArray, condition = !isbitstypeeltype) = v
13+
pool(v::AbstractArray, condition = !isbitstypeeltype) = condition(v) ? PooledArray(v) : v
1314
pool(v::StructArray, condition = !isbitstypeeltype) = replace_storage(t -> pool(t, condition), v)
1415

1516
struct GroupPerm{V<:AbstractVector, P<:AbstractVector{<:Integer}, U<:AbstractUnitRange}

test/runtests.jl

+6
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ end
2828
@test !isa(v_pooled.a, PooledArrays.PooledArray)
2929
@test isa(v_pooled.b, PooledArrays.PooledArray)
3030
@test v_pooled == StructArrays.pool(v)
31+
s = WeakRefStrings.StringArray(["a", "b", "c"])
32+
@test StructArrays.pool(s) isa PooledArrays.PooledArray{String}
33+
@test StructArrays.pool(s)[1] == "a"
34+
@test StructArrays.pool(s)[2] == "b"
35+
@test StructArrays.pool(s)[3] == "c"
36+
@test StructArrays.pool(StructArrays.pool(s)) == StructArrays.pool(s)
3137
end
3238

3339
@testset "roweq" begin

0 commit comments

Comments
 (0)