allow transforming output for sparse2dense (#882)

aartbik · cliffburdick · commit 22a71fd3c947 · 2025-03-03T16:00:43.000-08:00
diff --git a/include/matx/transforms/convert/dense2sparse_cusparse.h b/include/matx/transforms/convert/dense2sparse_cusparse.h
@@ -270,6 +270,7 @@ void dense2sparse_impl(OutputTensorType &o, const InputTensorType &A,
   if (!is_matx_transform_op<InputTensorType>() && !a.isSameView(A)) {
     (a = A).run(stream);
   }
+
   using atype = decltype(a);
   using otype = OutputTensorType;
 
diff --git a/include/matx/transforms/convert/sparse2dense_cusparse.h b/include/matx/transforms/convert/sparse2dense_cusparse.h
@@ -207,16 +207,29 @@ using sparse2dense_cache_t =
     std::unordered_map<Sparse2DenseParams_t, std::any,
                        Sparse2DenseParamsKeyHash, Sparse2DenseParamsKeyEq>;
 
+template <typename Op>
+__MATX_INLINE__ auto getS2DSupportedTensor(const Op &in, cudaStream_t stream) {
+  const auto func = [&]() {
+    if constexpr (is_tensor_view_v<Op>)
+      return in.Stride(Op::Rank() - 1) == 1;
+    return true;
+  };
+  return GetSupportedTensor(in, func, MATX_ASYNC_DEVICE_MEMORY, stream);
+}
+
 } // end namespace detail
 
 template <typename OutputTensorType, typename InputTensorType>
-void sparse2dense_impl(OutputTensorType &o, const InputTensorType &a,
+void sparse2dense_impl(OutputTensorType &O, const InputTensorType &a,
                        const cudaExecutor &exec) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
   const auto stream = exec.getStream();
 
+   // Transform into supported form.
+  auto o = getS2DSupportedTensor(O, stream);
+
   using atype = InputTensorType;
-  using otype = OutputTensorType;
+  using otype = decltype(o);
 
   using TA = typename atype::value_type;
   using TO = typename otype::value_type;
@@ -248,6 +261,11 @@ void sparse2dense_impl(OutputTensorType &o, const InputTensorType &a,
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(o, a);
       });
+
+  // Copy transformed output back.
+  if (!o.isSameView(O)) {
+    (O = o).run(stream);
+  }
 }
 
 } // end namespace matx
diff --git a/test/00_sparse/Convert.cu b/test/00_sparse/Convert.cu
@@ -150,6 +150,17 @@ TYPED_TEST(ConvertSparseTestsAll, ConvertCSR) {
     }
   }
 
+  // Allow transforming output.
+  (transpose(O) = sparse2dense(S)).run(exec);
+
+  // Verify result.
+  exec.sync();
+  for (index_t i = 0; i < m; i++) {
+    for (index_t j = 0; j < n; j++) {
+      ASSERT_EQ(O(j, i), D(i, j));
+    }
+  }
+
   MATX_EXIT_HANDLER();
 }
 

Original file line number	Diff line number	Diff line change
`@@ -270,6 +270,7 @@ void dense2sparse_impl(OutputTensorType &o, const InputTensorType &A,`
`270`	`270`	`if (!is_matx_transform_op<InputTensorType>() && !a.isSameView(A)) {`
`271`	`271`	`(a = A).run(stream);`
`272`	`272`	`}`
	`273`	`+`
`273`	`274`	`using atype = decltype(a);`
`274`	`275`	`using otype = OutputTensorType;`
`275`	`276`
Original file line number	Diff line number	Diff line change
`@@ -150,6 +150,17 @@ TYPED_TEST(ConvertSparseTestsAll, ConvertCSR) {`
`150`	`150`	`}`
`151`	`151`	`}`
`152`	`152`
	`153`	`+ // Allow transforming output.`
	`154`	`+ (transpose(O) = sparse2dense(S)).run(exec);`
	`155`	`+`
	`156`	`+ // Verify result.`
	`157`	`+ exec.sync();`
	`158`	`+ for (index_t i = 0; i < m; i++) {`
	`159`	`+ for (index_t j = 0; j < n; j++) {`
	`160`	`+ ASSERT_EQ(O(j, i), D(i, j));`
	`161`	`+ }`
	`162`	`+ }`
	`163`	`+`
`153`	`164`	`MATX_EXIT_HANDLER();`
`154`	`165`	`}`
`155`	`166`