Skip to content

Commit 8bf818a

Browse files
authored
First version of MATX sparse2dense conversion (dispatch to cuSPARSE) (#856)
* First version of MATX dense2sparse conversion (using dispatch to cuSPARSE)
1 parent 55dd664 commit 8bf818a

File tree

7 files changed

+446
-42
lines changed

7 files changed

+446
-42
lines changed

examples/sparse_tensor.cu

+17-8
Original file line numberDiff line numberDiff line change
@@ -90,24 +90,33 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
9090
//
9191
// A very naive way to convert the sparse matrix back to a dense
9292
// matrix. Note that one should **never** use the ()-operator in
93-
// performance critical code, since sparse data structures do
93+
// performance critical code, since sparse storage formats do
9494
// not provide O(1) random access to their elements (compressed
9595
// levels will use some form of search to determine if an element
9696
// is present). Instead, conversions (and other operations) should
97-
// use sparse operations that are tailored for the sparse data
98-
// structure (such as scanning by row for CSR).
97+
// use sparse operations that are tailored for the sparse storage
98+
// format (such as scanning by row for CSR).
9999
//
100-
auto A = make_tensor<float>({4, 8});
100+
auto A1 = make_tensor<float>({4, 8});
101101
for (index_t i = 0; i < 4; i++) {
102102
for (index_t j = 0; j < 8; j++) {
103-
A(i, j) = Acoo(i, j);
103+
A1(i, j) = Acoo(i, j);
104104
}
105105
}
106-
print(A);
106+
print(A1);
107107

108108
//
109-
// SpMM is implemented on COO through cuSPARSE. This is the
110-
// correct way of performing an efficient sparse operation.
109+
// A direct sparse2dense conversion. This is the correct way of
110+
// performing the conversion, since the underlying implementation
111+
// knows how to properly manipulate the sparse storage format.
112+
//
113+
auto A2 = make_tensor<float>({4, 8});
114+
(A2 = sparse2dense(Acoo)).run(exec);
115+
print(A2);
116+
117+
//
118+
// Perform a direct SpMM. This is also the correct way of performing
119+
// an efficient sparse operation.
111120
//
112121
auto B = make_tensor<float, 2>({8, 4});
113122
auto C = make_tensor<float>({4, 4});

include/matx/core/type_utils.h

+18
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <cublas_v2.h>
3939
#include <cuda/std/complex>
4040
#include <cuda/std/tuple>
41+
#include <cusparse.h>
4142
#include <type_traits>
4243

4344
#include "cuda_fp16.h"
@@ -1166,6 +1167,23 @@ template <typename T> constexpr cublasComputeType_t MatXTypeToCudaComputeType()
11661167

11671168
return CUBLAS_COMPUTE_32F;
11681169
}
1170+
1171+
template <typename T>
1172+
constexpr cusparseIndexType_t MatXTypeToCuSparseIndexType() {
1173+
if constexpr (std::is_same_v<T, uint16_t>) {
1174+
return CUSPARSE_INDEX_16U;
1175+
}
1176+
if constexpr (std::is_same_v<T, int32_t>) {
1177+
return CUSPARSE_INDEX_32I;
1178+
}
1179+
if constexpr (std::is_same_v<T, int64_t>) {
1180+
return CUSPARSE_INDEX_64I;
1181+
}
1182+
if constexpr (std::is_same_v<T, index_t>) {
1183+
return CUSPARSE_INDEX_64I;
1184+
}
1185+
}
1186+
11691187
} // end namespace detail
11701188

11711189
} // end namespace matx

include/matx/operators/operators.h

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
#include "matx/operators/shift.h"
100100
#include "matx/operators/sign.h"
101101
#include "matx/operators/slice.h"
102+
#include "matx/operators/sparse2dense.h"
102103
#include "matx/operators/solve.h"
103104
#include "matx/operators/sort.h"
104105
#include "matx/operators/sph2cart.h"

include/matx/operators/sparse2dense.h

+146
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
////////////////////////////////////////////////////////////////////////////////
2+
// BSD 3-Clause License
3+
//
4+
// Copyright (c) 2025, NVIDIA Corporation
5+
// All rights reserved.
6+
//
7+
// Redistribution and use in source and binary forms, with or without
8+
// modification, are permitted provided that the following conditions are met:
9+
//
10+
// 1. Redistributions of source code must retain the above copyright notice, this
11+
// list of conditions and the following disclaimer.
12+
//
13+
// 2. Redistributions in binary form must reproduce the above copyright notice,
14+
// this list of conditions and the following disclaimer in the documentation
15+
// and/or other materials provided with the distribution.
16+
//
17+
// 3. Neither the name of the copyright holder nor the names of its
18+
// contributors may be used to endorse or promote products derived from
19+
// this software without specific prior written permission.
20+
//
21+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24+
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25+
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26+
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27+
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28+
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29+
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
/////////////////////////////////////////////////////////////////////////////////
32+
33+
#pragma once
34+
35+
#include "matx/core/type_utils.h"
36+
#include "matx/operators/base_operator.h"
37+
#include "matx/transforms/convert/sparse2dense_cusparse.h"
38+
39+
namespace matx {
40+
namespace detail {
41+
42+
template <typename OpA>
43+
class Sparse2DenseOp : public BaseOp<Sparse2DenseOp<OpA>> {
44+
private:
45+
typename detail::base_type_t<OpA> a_;
46+
47+
static constexpr int out_rank = OpA::Rank();
48+
cuda::std::array<index_t, out_rank> out_dims_;
49+
mutable detail::tensor_impl_t<typename OpA::value_type, out_rank> tmp_out_;
50+
mutable typename OpA::value_type *ptr = nullptr;
51+
52+
public:
53+
using matxop = bool;
54+
using matx_transform_op = bool;
55+
using sparse2dense_xform_op = bool;
56+
using value_type = typename OpA::value_type;
57+
58+
__MATX_INLINE__ Sparse2DenseOp(const OpA &a) : a_(a) {
59+
for (int r = 0; r < Rank(); r++) {
60+
out_dims_[r] = a_.Size(r);
61+
}
62+
}
63+
64+
__MATX_INLINE__ std::string str() const {
65+
return "sparse2dense(" + get_type_str(a_) + ")";
66+
}
67+
68+
__MATX_HOST__ __MATX_INLINE__ auto Data() const noexcept { return ptr; }
69+
70+
template <typename... Is>
71+
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto)
72+
operator()(Is... indices) const {
73+
return tmp_out_(indices...);
74+
}
75+
76+
static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t
77+
Rank() {
78+
return remove_cvref_t<OpA>::Rank();
79+
}
80+
81+
constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t
82+
Size(int dim) const {
83+
return out_dims_[dim];
84+
}
85+
86+
template <typename Out, typename Executor>
87+
void Exec([[maybe_unused]] Out &&out, [[maybe_unused]] Executor &&ex) const {
88+
if constexpr (is_sparse_tensor_v<OpA>) {
89+
auto ref = cuda::std::get<0>(out);
90+
using Rtype = decltype(ref);
91+
if constexpr (is_sparse_tensor_v<Rtype>) {
92+
MATX_THROW(matxNotSupported,
93+
"Cannot use sparse2dense for sparse output");
94+
} else {
95+
sparse2dense_impl(ref, a_, ex);
96+
}
97+
} else {
98+
MATX_THROW(matxNotSupported, "Cannot use sparse2dense on dense input");
99+
}
100+
}
101+
102+
template <typename ShapeType, typename Executor>
103+
__MATX_INLINE__ void
104+
InnerPreRun([[maybe_unused]] ShapeType &&shape,
105+
[[maybe_unused]] Executor &&ex) const noexcept {
106+
static_assert(is_sparse_tensor_v<OpA>,
107+
"Cannot use sparse2dense on dense input");
108+
}
109+
110+
template <typename ShapeType, typename Executor>
111+
__MATX_INLINE__ void PreRun([[maybe_unused]] ShapeType &&shape,
112+
[[maybe_unused]] Executor &&ex) const noexcept {
113+
InnerPreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
114+
detail::AllocateTempTensor(tmp_out_, std::forward<Executor>(ex), out_dims_,
115+
&ptr);
116+
Exec(cuda::std::make_tuple(tmp_out_), std::forward<Executor>(ex));
117+
}
118+
119+
template <typename ShapeType, typename Executor>
120+
__MATX_INLINE__ void PostRun([[maybe_unused]] ShapeType &&shape,
121+
[[maybe_unused]] Executor &&ex) const noexcept {
122+
static_assert(is_sparse_tensor_v<OpA>,
123+
"Cannot use sparse2dense on dense input");
124+
matxFree(ptr);
125+
}
126+
};
127+
128+
} // end namespace detail
129+
130+
/**
131+
* Convert a sparse tensor into a dense tensor.
132+
*
133+
* @tparam OpA
134+
* Data type of A tensor
135+
*
136+
* @param A
137+
* Sparse input tensor
138+
*
139+
* @return
140+
* Dense output tensor
141+
*/
142+
template <typename OpA> __MATX_INLINE__ auto sparse2dense(const OpA &A) {
143+
return detail::Sparse2DenseOp(A);
144+
}
145+
146+
} // end namespace matx

0 commit comments

Comments
 (0)