[BlockSparseArrays] Generalize matrix multiplication, dual the axes in adjoint (#1480)

mtfishman · web-flow · commit e6cdf37b6c87 · 2024-05-31T15:46:25.000-04:00
* [BlockSparseArrays] Generalize matrix multiplication, dual the axes in adjoint

* [NDTensors] Bump to v0.3.17
diff --git a/NDTensors/Project.toml b/NDTensors/Project.toml
@@ -1,7 +1,7 @@
 name = "NDTensors"
 uuid = "23ae76d9-e61a-49c4-8f12-3f1a16adf9cf"
 authors = ["Matthew Fishman <mfishman@flatironinstitute.org>"]
-version = "0.3.16"
+version = "0.3.17"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
diff --git a/NDTensors/src/lib/BlockSparseArrays/ext/BlockSparseArraysGradedAxesExt/src/BlockSparseArraysGradedAxesExt.jl b/NDTensors/src/lib/BlockSparseArrays/ext/BlockSparseArraysGradedAxesExt/src/BlockSparseArraysGradedAxesExt.jl
@@ -1,15 +1,22 @@
 module BlockSparseArraysGradedAxesExt
 using BlockArrays: AbstractBlockVector, Block, BlockedUnitRange, blocks
 using ..BlockSparseArrays:
-  BlockSparseArrays, AbstractBlockSparseArray, BlockSparseArray, block_merge
+  BlockSparseArrays,
+  AbstractBlockSparseArray,
+  AbstractBlockSparseMatrix,
+  BlockSparseArray,
+  BlockSparseMatrix,
+  block_merge
 using ...GradedAxes:
   GradedUnitRange,
   OneToOne,
   blockmergesortperm,
   blocksortperm,
+  dual,
   invblockperm,
   nondual,
   tensor_product
+using LinearAlgebra: Adjoint, Transpose
 using ...TensorAlgebra:
   TensorAlgebra, FusionStyle, BlockReshapeFusion, SectorFusion, fusedims, splitdims
 
@@ -61,19 +68,59 @@ function Base.eachindex(a::AbstractBlockSparseArray)
   return CartesianIndices(nondual.(axes(a)))
 end
 
+# TODO: Handle this through some kind of trait dispatch, maybe
+# a `SymmetryStyle`-like trait to check if the block sparse
+# matrix has graded axes.
+function Base.axes(a::Adjoint{<:Any,<:AbstractBlockSparseMatrix})
+  return dual.(reverse(axes(a')))
+end
+
 # This is a temporary fix for `show` being broken for BlockSparseArrays
 # with mixed dual and non-dual axes. This shouldn't be needed once
 # GradedAxes is rewritten using BlockArrays v1.
 # TODO: Delete this once GradedAxes is rewritten.
-function Base.show(io::IO, mime::MIME"text/plain", a::BlockSparseArray; kwargs...)
-  a_nondual = BlockSparseArray(blocks(a), nondual.(axes(a)))
-  println(io, "typeof(axes) = ", typeof(axes(a)), "\n")
+function blocksparse_show(
+  io::IO, mime::MIME"text/plain", a::AbstractArray, axes_a::Tuple; kwargs...
+)
+  println(io, "typeof(axes) = ", typeof(axes_a), "\n")
   println(
     io,
     "Warning: To temporarily circumvent a bug in printing BlockSparseArrays with mixtures of dual and non-dual axes, the types of the dual axes printed below might not be accurate. The types printed above this message are the correct ones.\n",
   )
-  return invoke(
-    show, Tuple{IO,MIME"text/plain",AbstractArray}, io, mime, a_nondual; kwargs...
-  )
+  return invoke(show, Tuple{IO,MIME"text/plain",AbstractArray}, io, mime, a; kwargs...)
+end
+
+# This is a temporary fix for `show` being broken for BlockSparseArrays
+# with mixed dual and non-dual axes. This shouldn't be needed once
+# GradedAxes is rewritten using BlockArrays v1.
+# TODO: Delete this once GradedAxes is rewritten.
+function Base.show(io::IO, mime::MIME"text/plain", a::BlockSparseArray; kwargs...)
+  axes_a = axes(a)
+  a_nondual = BlockSparseArray(blocks(a), nondual.(axes(a)))
+  return blocksparse_show(io, mime, a_nondual, axes_a; kwargs...)
+end
+
+# This is a temporary fix for `show` being broken for BlockSparseArrays
+# with mixed dual and non-dual axes. This shouldn't be needed once
+# GradedAxes is rewritten using BlockArrays v1.
+# TODO: Delete this once GradedAxes is rewritten.
+function Base.show(
+  io::IO, mime::MIME"text/plain", a::Adjoint{<:Any,<:BlockSparseMatrix}; kwargs...
+)
+  axes_a = axes(a)
+  a_nondual = BlockSparseArray(blocks(a'), dual.(nondual.(axes(a))))'
+  return blocksparse_show(io, mime, a_nondual, axes_a; kwargs...)
+end
+
+# This is a temporary fix for `show` being broken for BlockSparseArrays
+# with mixed dual and non-dual axes. This shouldn't be needed once
+# GradedAxes is rewritten using BlockArrays v1.
+# TODO: Delete this once GradedAxes is rewritten.
+function Base.show(
+  io::IO, mime::MIME"text/plain", a::Transpose{<:Any,<:BlockSparseMatrix}; kwargs...
+)
+  axes_a = axes(a)
+  a_nondual = tranpose(BlockSparseArray(transpose(blocks(a)), nondual.(axes(a))))
+  return blocksparse_show(io, mime, a_nondual, axes_a; kwargs...)
 end
 end
diff --git a/NDTensors/src/lib/BlockSparseArrays/ext/BlockSparseArraysGradedAxesExt/test/runtests.jl b/NDTensors/src/lib/BlockSparseArrays/ext/BlockSparseArraysGradedAxesExt/test/runtests.jl
@@ -3,7 +3,7 @@ using Compat: Returns
 using Test: @test, @testset, @test_broken
 using BlockArrays: Block, blocksize
 using NDTensors.BlockSparseArrays: BlockSparseArray, block_nstored
-using NDTensors.GradedAxes: GradedAxes, GradedUnitRange, dual, gradedrange
+using NDTensors.GradedAxes: GradedAxes, GradedUnitRange, UnitRangeDual, dual, gradedrange
 using NDTensors.LabelledNumbers: label
 using NDTensors.SparseArrayInterface: nstored
 using NDTensors.TensorAlgebra: fusedims, splitdims
@@ -87,8 +87,28 @@ const elts = (Float32, Float64, Complex{Float32}, Complex{Float64})
     for I in eachindex(a)
       @test a[I] == a_dense[I]
     end
-
+    @test axes(a') == dual.(reverse(axes(a)))
+    # TODO: Define and use `isdual` here.
+    @test axes(a', 1) isa UnitRangeDual
+    @test !(axes(a', 2) isa UnitRangeDual)
     @test isnothing(show(devnull, MIME("text/plain"), a))
   end
+  @testset "Matrix multiplication" begin
+    r = gradedrange([U1(0) => 2, U1(1) => 3])
+    a1 = BlockSparseArray{elt}(dual(r), r)
+    a1[Block(1, 2)] = randn(elt, size(@view(a1[Block(1, 2)])))
+    a1[Block(2, 1)] = randn(elt, size(@view(a1[Block(2, 1)])))
+    a2 = BlockSparseArray{elt}(dual(r), r)
+    a2[Block(1, 2)] = randn(elt, size(@view(a2[Block(1, 2)])))
+    a2[Block(2, 1)] = randn(elt, size(@view(a2[Block(2, 1)])))
+    @test Array(a1 * a2) ≈ Array(a1) * Array(a2)
+    @test Array(a1' * a2') ≈ Array(a1') * Array(a2')
+
+    a2 = BlockSparseArray{elt}(r, dual(r))
+    a2[Block(1, 2)] = randn(elt, size(@view(a2[Block(1, 2)])))
+    a2[Block(2, 1)] = randn(elt, size(@view(a2[Block(2, 1)])))
+    @test Array(a1' * a2) ≈ Array(a1') * Array(a2)
+    @test Array(a1 * a2') ≈ Array(a1) * Array(a2')
+  end
 end
 end
diff --git a/NDTensors/src/lib/BlockSparseArrays/src/BlockSparseArrays.jl b/NDTensors/src/lib/BlockSparseArrays/src/BlockSparseArrays.jl
@@ -11,7 +11,6 @@ include("abstractblocksparsearray/abstractblocksparsevector.jl")
 include("abstractblocksparsearray/view.jl")
 include("abstractblocksparsearray/arraylayouts.jl")
 include("abstractblocksparsearray/sparsearrayinterface.jl")
-include("abstractblocksparsearray/linearalgebra.jl")
 include("abstractblocksparsearray/broadcast.jl")
 include("abstractblocksparsearray/map.jl")
 include("blocksparsearray/defaults.jl")
diff --git a/NDTensors/src/lib/BlockSparseArrays/src/abstractblocksparsearray/arraylayouts.jl b/NDTensors/src/lib/BlockSparseArrays/src/abstractblocksparsearray/arraylayouts.jl
@@ -1,7 +1,6 @@
 using ArrayLayouts: ArrayLayouts, MemoryLayout, MulAdd
 using BlockArrays: BlockLayout
 using ..SparseArrayInterface: SparseLayout
-using LinearAlgebra: mul!
 
 function ArrayLayouts.MemoryLayout(arraytype::Type{<:BlockSparseArrayLike})
   outer_layout = typeof(MemoryLayout(blockstype(arraytype)))
diff --git a/NDTensors/src/lib/BlockSparseArrays/src/abstractblocksparsearray/linearalgebra.jl b/NDTensors/src/lib/BlockSparseArrays/src/abstractblocksparsearray/linearalgebra.jl
diff --git a/NDTensors/src/lib/BlockSparseArrays/src/abstractblocksparsearray/wrappedabstractblocksparsearray.jl b/NDTensors/src/lib/BlockSparseArrays/src/abstractblocksparsearray/wrappedabstractblocksparsearray.jl
@@ -88,6 +88,35 @@ function Base.similar(
   return similar(arraytype, eltype(arraytype), axes)
 end
 
+# Needed by `BlockArrays` matrix multiplication interface
+# TODO: This fixes an ambiguity error with `OffsetArrays.jl`, but
+# is only appears to be needed in older versions of Julia like v1.6.
+# Delete once we drop support for older versions of Julia.
+function Base.similar(
+  arraytype::Type{<:BlockSparseArrayLike},
+  axes::Tuple{AbstractUnitRange,Vararg{AbstractUnitRange}},
+)
+  return similar(arraytype, eltype(arraytype), axes)
+end
+
+# Needed by `BlockArrays` matrix multiplication interface
+# Fixes ambiguity error with `BlockArrays.jl`.
+function Base.similar(
+  arraytype::Type{<:BlockSparseArrayLike},
+  axes::Tuple{BlockedUnitRange,Vararg{AbstractUnitRange{Int}}},
+)
+  return similar(arraytype, eltype(arraytype), axes)
+end
+
+# Needed by `BlockArrays` matrix multiplication interface
+# Fixes ambiguity error with `BlockArrays.jl`.
+function Base.similar(
+  arraytype::Type{<:BlockSparseArrayLike},
+  axes::Tuple{AbstractUnitRange{Int},BlockedUnitRange,Vararg{AbstractUnitRange{Int}}},
+)
+  return similar(arraytype, eltype(arraytype), axes)
+end
+
 # Needed for disambiguation
 function Base.similar(
   arraytype::Type{<:BlockSparseArrayLike}, axes::Tuple{Vararg{BlockedUnitRange}}
diff --git a/NDTensors/src/lib/BlockSparseArrays/src/blocksparsearrayinterface/arraylayouts.jl b/NDTensors/src/lib/BlockSparseArrays/src/blocksparsearrayinterface/arraylayouts.jl
@@ -3,6 +3,13 @@ using BlockArrays: BlockLayout
 using ..SparseArrayInterface: SparseLayout
 using LinearAlgebra: mul!
 
+function blocksparse_muladd!(
+  α::Number, a1::AbstractMatrix, a2::AbstractMatrix, β::Number, a_dest::AbstractMatrix
+)
+  mul!(blocks(a_dest), blocks(a1), blocks(a2), α, β)
+  return a_dest
+end
+
 function ArrayLayouts.materialize!(
   m::MatMulMatAdd{
     <:BlockLayout{<:SparseLayout},
@@ -11,6 +18,6 @@ function ArrayLayouts.materialize!(
   },
 )
   α, a1, a2, β, a_dest = m.α, m.A, m.B, m.β, m.C
-  mul!(a_dest, a1, a2, α, β)
+  blocksparse_muladd!(α, a1, a2, β, a_dest)
   return a_dest
 end
diff --git a/NDTensors/src/lib/BlockSparseArrays/src/blocksparsearrayinterface/blocksparsearrayinterface.jl b/NDTensors/src/lib/BlockSparseArrays/src/blocksparsearrayinterface/blocksparsearrayinterface.jl
@@ -141,6 +141,15 @@ end
 function Base.getindex(a::SparseTransposeBlocks, index::Vararg{Int,2})
   return transpose(blocks(parent(a.array))[reverse(index)...])
 end
+# TODO: This should be handled by generic `AbstractSparseArray` code.
+function Base.getindex(a::SparseTransposeBlocks, index::CartesianIndex{2})
+  return a[Tuple(index)...]
+end
+# TODO: Create a generic `parent_index` function to map an index
+# a parent index.
+function Base.isassigned(a::SparseTransposeBlocks, index::Vararg{Int,2})
+  return isassigned(blocks(parent(a.array)), reverse(index)...)
+end
 function SparseArrayInterface.stored_indices(a::SparseTransposeBlocks)
   return map(reverse_index, stored_indices(blocks(parent(a.array))))
 end
@@ -163,9 +172,22 @@ end
 function Base.size(a::SparseAdjointBlocks)
   return reverse(size(blocks(parent(a.array))))
 end
+# TODO: Create a generic `parent_index` function to map an index
+# a parent index.
 function Base.getindex(a::SparseAdjointBlocks, index::Vararg{Int,2})
   return blocks(parent(a.array))[reverse(index)...]'
 end
+# TODO: Create a generic `parent_index` function to map an index
+# a parent index.
+# TODO: This should be handled by generic `AbstractSparseArray` code.
+function Base.getindex(a::SparseAdjointBlocks, index::CartesianIndex{2})
+  return a[Tuple(index)...]
+end
+# TODO: Create a generic `parent_index` function to map an index
+# a parent index.
+function Base.isassigned(a::SparseAdjointBlocks, index::Vararg{Int,2})
+  return isassigned(blocks(parent(a.array)), reverse(index)...)
+end
 function SparseArrayInterface.stored_indices(a::SparseAdjointBlocks)
   return map(reverse_index, stored_indices(blocks(parent(a.array))))
 end
@@ -229,16 +251,17 @@ end
 function Base.size(a::SparseSubArrayBlocks)
   return length.(axes(a))
 end
-function Base.getindex(a::SparseSubArrayBlocks{<:Any,N}, I::CartesianIndex{N}) where {N}
-  return a[Tuple(I)...]
-end
 function Base.getindex(a::SparseSubArrayBlocks{<:Any,N}, I::Vararg{Int,N}) where {N}
   parent_blocks = @view blocks(parent(a.array))[blockrange(a)...]
   parent_block = parent_blocks[I...]
   # TODO: Define this using `blockrange(a::AbstractArray, indices::Tuple{Vararg{AbstractUnitRange}})`.
   block = Block(ntuple(i -> blockrange(a)[i][I[i]], ndims(a)))
   return @view parent_block[blockindices(parent(a.array), block, a.array.indices)...]
 end
+# TODO: This should be handled by generic `AbstractSparseArray` code.
+function Base.getindex(a::SparseSubArrayBlocks{<:Any,N}, I::CartesianIndex{N}) where {N}
+  return a[Tuple(I)...]
+end
 function Base.setindex!(a::SparseSubArrayBlocks{<:Any,N}, value, I::Vararg{Int,N}) where {N}
   parent_blocks = view(blocks(parent(a.array)), axes(a)...)
   return parent_blocks[I...][blockindices(parent(a.array), Block(I), a.array.indices)...] =
diff --git a/NDTensors/src/lib/BlockSparseArrays/test/test_basics.jl b/NDTensors/src/lib/BlockSparseArrays/test/test_basics.jl
@@ -266,6 +266,18 @@ include("TestBlockSparseArraysUtils.jl")
     @test a_dest isa BlockSparseArray{elt}
     @test block_nstored(a_dest) == 1
   end
+  @testset "Matrix multiplication" begin
+    a1 = BlockSparseArray{elt}([2, 3], [2, 3])
+    a1[Block(1, 2)] = randn(elt, size(@view(a1[Block(1, 2)])))
+    a1[Block(2, 1)] = randn(elt, size(@view(a1[Block(2, 1)])))
+    a2 = BlockSparseArray{elt}([2, 3], [2, 3])
+    a2[Block(1, 2)] = randn(elt, size(@view(a2[Block(1, 2)])))
+    a2[Block(2, 1)] = randn(elt, size(@view(a2[Block(2, 1)])))
+    @test Array(a1 * a2) ≈ Array(a1) * Array(a2)
+    @test Array(a1' * a2) ≈ Array(a1') * Array(a2)
+    @test Array(a1 * a2') ≈ Array(a1) * Array(a2')
+    @test Array(a1' * a2') ≈ Array(a1') * Array(a2')
+  end
   @testset "TensorAlgebra" begin
     a1 = BlockSparseArray{elt}([2, 3], [2, 3])
     a1[Block(1, 1)] = randn(elt, size(@view(a1[Block(1, 1)])))