adrhill · adrhill · May 3, 2024 · May 3, 2024 · May 3, 2024 · May 3, 2024
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -27,3 +27,9 @@ ConnectivityTracer
 JacobianTracer
 HessianTracer
 ```
+
+We also define a custom alternative to sets that can deliver faster `union`:
+
+```@docs
+SortedVector
+```
diff --git a/src/SparseConnectivityTracer.jl b/src/SparseConnectivityTracer.jl
@@ -12,6 +12,7 @@ include("overload_jacobian.jl")
 include("overload_hessian.jl")
 include("pattern.jl")
 include("adtypes.jl")
+include("sortedvector.jl")
 
 export ConnectivityTracer, connectivity_pattern
 export JacobianTracer, jacobian_pattern

diff --git a/src/sortedvector.jl b/src/sortedvector.jl
@@ -0,0 +1,86 @@
+"""
+    SortedVector
+
+A wrapper for sorted vectors, designed for fast unions.
+
+# Constructor
+
+    SortedVector(data::AbstractVector; already_sorted=false)
+
+# Example
+
+```jldoctest
+x = SortedVector([3, 4, 2])
+x = SortedVector([1, 3, 5]; already_sorted=true)
+z = union(x, y)
+
+# output
+
+SortedVector([1, 2, 3, 4, 5])
+````
+"""
+struct SortedVector{T,V<:AbstractVector{T}} <: AbstractVector{T}
+    data::V
+
+    function SortedVector{T,V}(data::V; already_sorted=false) where {T,V<:AbstractVector{T}}
+        if already_sorted
+            new{T,V}(data)
+        else
+            new{T,V}(sort(data))
+        end
+    end
+
+    function SortedVector{T,V}(x::Number) where {T,V<:AbstractVector{T}}
+        return new{T,V}(convert(V, [T(x)]))
+    end
+
+    function SortedVector{T,V}() where {T,V<:AbstractVector{T}}
+        return new{T,V}(convert(V, T[]))
+    end
+end
+
+Base.eltype(::SortedVector{T}) where {T} = T
+Base.size(v::SortedVector) = size(v.data)
+Base.getindex(v::SortedVector, i) = v.data[i]
+Base.IndexStyle(::Type{SortedVector{T,V}}) where {T,V} = IndexStyle(V)
+
+function SortedVector(data::V; already_sorted=false) where {T,V<:AbstractVector{T}}
+    return SortedVector{T,V}(data; already_sorted)
+end
+
+Base.show(io::IO, sv::SortedVector) = print(io, "SortedVector($(sv.data))")
+
+function Base.union(v1::SortedVector{T,V}, v2::SortedVector{T,V}) where {T,V}
+    left, right = v1.data, v2.data
+    result = similar(left, length(left) + length(right))
+    left_index, right_index, result_index = 1, 1, 1
+    # common part of left and right
+    @inbounds while (
+        left_index in eachindex(left) &&
+        right_index in eachindex(right) &&
+        result_index in eachindex(result)
+    )
+        left_item = left[left_index]
+        right_item = right[right_index]
+        left_smaller = left_item <= right_item
+        right_smaller = right_item <= left_item
+        result_item = ifelse(left_smaller, left_item, right_item)
+        result[result_index] = result_item
+        result_index += 1
+        left_index = ifelse(left_smaller, left_index + 1, left_index)
+        right_index = ifelse(right_smaller, right_index + 1, right_index)
+    end
+    # either left or right has reached its end at this point
+    @inbounds while left_index in eachindex(left) && result_index in eachindex(result)
+        result[result_index] = left[left_index]
+        left_index += 1
+        result_index += 1
+    end
+    @inbounds while right_index in eachindex(right) && result_index in eachindex(result)
+        result[result_index] = right[right_index]
+        right_index += 1
+        result_index += 1
+    end
+    resize!(result, result_index - 1)
+    return SortedVector(result; already_sorted=true)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -40,6 +40,9 @@ DocMeta.setdocmeta!(
     @testset "Doctests" begin
         Documenter.doctest(SparseConnectivityTracer)
     end
+    @testset "SortedVector" begin
+        include("sortedvector.jl")
+    end
     @testset "Classification of operators by diff'ability" begin
         include("test_differentiability.jl")
     end

diff --git a/test/sortedvector.jl b/test/sortedvector.jl
@@ -0,0 +1,30 @@
+using ADTypes
+using SparseArrays
+using SparseConnectivityTracer
+using SparseConnectivityTracer: SortedVector
+using Test
+
+@testset "Correctness" begin
+    @testset "$T - ($k1, $k2)" for T in (Int32, Int64),
+        k1 in (0, 10, 100, 1000),
+        k2 in (0, 10, 100, 1000)
+
+        for _ in 1:100
+            x = SortedVector(rand(T(1):T(1000), k1); already_sorted=false)
+            y = SortedVector(sort(rand(T(1):T(1000), k2)); already_sorted=true)
+            z = union(x, y)
+            @test eltype(z) == T
+            @test issorted(z.data)
+            @test Set(z.data) == union(Set(x.data), Set(y.data))
+            if k1 > 0 && k2 > 0
+                @test z[1] == min(x[1], y[1])
+                @test z[end] == max(x[end], y[end])
+            end
+        end
+    end
+end;
+
+sd = TracerSparsityDetector(SortedVector{UInt,Vector{UInt}})
+@test ADTypes.jacobian_sparsity(diff, rand(10), sd) isa SparseMatrixCSC
+@test_broken ADTypes.hessian_sparsity(x -> sum(abs2, diff(x)), rand(10), sd) isa
+    SparseMatrixCSC