Move nvcc-specific features behind __CUDACC__ guards and add

tmartin-gh · tmartin-gh · commit 9bb79f1a7a5b · 2025-03-04T10:30:01.000-08:00
static_asserts for signal type
diff --git a/examples/pwelch.cu b/examples/pwelch.cu
@@ -69,7 +69,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   (x = tmp_x).run(exec); // pre-compute x, tmp_x is otherwise lazily evaluated
 
   // Create window
-  auto w = make_tensor<complex>({nperseg});
+  auto w = make_tensor<float>({nperseg});
   (w = flattop<0>({nperseg})).run(exec);
 
   // Create output tensor
diff --git a/include/matx/kernels/pwelch.cuh b/include/matx/kernels/pwelch.cuh
@@ -0,0 +1,101 @@
+////////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (c) 2023, NVIDIA Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+namespace matx {
+
+  enum PwelchOutputScaleMode {
+    PwelchOutputScaleMode_Spectrum,
+    PwelchOutputScaleMode_Density,
+    PwelchOutputScaleMode_Spectrum_dB,
+    PwelchOutputScaleMode_Density_dB
+  };
+
+  namespace detail {
+
+#ifdef __CUDACC__
+    template<PwelchOutputScaleMode OUTPUT_SCALE_MODE, typename T_IN, typename T_OUT, typename fsType>
+    __global__ void pwelch_kernel(const T_IN t_in, T_OUT t_out, fsType fs)
+    {
+      const index_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+      const index_t batches = t_in.Shape()[0];
+      const index_t nfft = t_in.Shape()[1];
+
+      if (tid < nfft)
+      {
+        typename T_OUT::value_type pxx = 0;
+        constexpr typename T_OUT::value_type ten = 10;
+
+        for (index_t batch = 0; batch < batches; batch++)
+        {
+          pxx += cuda::std::norm(t_in(batch, tid));
+        }
+
+        if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Spectrum)
+        {
+          t_out(tid) = pxx / batches;
+        }
+        else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Density)
+        {
+          t_out(tid) = pxx / (batches * fs);
+        }
+        else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Spectrum_dB)
+        {
+          pxx /= batches;
+          if (pxx != 0)
+          {
+            t_out(tid) = ten * cuda::std::log10(pxx);
+          }
+          else
+          {
+            t_out(tid) = cuda::std::numeric_limits<typename T_OUT::value_type>::lowest();
+          }
+        }
+        else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Density_dB)
+        {
+          pxx /= (batches * fs);
+          if (pxx != 0)
+          {
+            t_out(tid) = ten * cuda::std::log10(pxx);
+          }
+          else
+          {
+            t_out(tid) = cuda::std::numeric_limits<typename T_OUT::value_type>::lowest();
+          }
+        }
+      }
+    }
+#endif
+
+  };
+};
diff --git a/include/matx/operators/pwelch.h b/include/matx/operators/pwelch.h
@@ -40,16 +40,16 @@
 namespace matx
 {
   namespace detail {
-    template <typename OpX, typename OpW>
-    class PWelchOp : public BaseOp<PWelchOp<OpX,OpW>>
+    template <typename OpX, typename OpW, typename fsType>
+    class PWelchOp : public BaseOp<PWelchOp<OpX,OpW,fsType>>
     {
       public:
+        static_assert(is_complex_v<typename OpX::value_type>, "pwelch() must have a complex input type");
         using matxop = bool;
         using value_type = typename OpX::value_type::value_type;
         using matx_transform_op = bool;
         using pwelch_xform_op = bool;
 
-        static_assert(is_complex_v<typename OpX::value_type>, "pwelch() must have a complex input type");
 
         __MATX_INLINE__ std::string str() const {
           return "pwelch(" + get_type_str(x_) + "," + get_type_str(w_) + ")";
@@ -62,7 +62,7 @@ namespace matx
               index_t noverlap,
               index_t nfft,
               PwelchOutputScaleMode output_scale_mode,
-              value_type fs
+              fsType fs
           ) :
               x_(x),
               w_(w),
@@ -146,7 +146,7 @@ namespace matx
         index_t noverlap_;
         index_t nfft_;
         PwelchOutputScaleMode output_scale_mode_;
-        value_type fs_;
+        fsType fs_;
         cuda::std::array<index_t, 1> out_dims_;
         mutable detail::tensor_impl_t<typename remove_cvref_t<OpX>::value_type, 1> tmp_out_;
         mutable typename remove_cvref_t<OpX>::value_type *ptr = nullptr;
@@ -160,6 +160,8 @@ namespace matx
    *   Input time domain data type
    * @tparam wType
    *   Input window type
+   * @tparam fsType
+   *   Sampling frequency type
    * @param x
    *   Input time domain tensor
    * @param w
@@ -179,32 +181,60 @@ namespace matx
    *
    */
 
-  template <typename xType, typename wType>
+  template <
+      typename xType,
+      typename wType,
+      typename fsType>
     __MATX_INLINE__ auto pwelch(
         const xType& x,
         const wType& w,
         index_t nperseg,
         index_t noverlap,
         index_t nfft,
         PwelchOutputScaleMode output_scale_mode = PwelchOutputScaleMode_Spectrum,
-        typename xType::value_type::value_type fs = 1
+        fsType fs = 1
     )
   {
-    MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
-
     return detail::PWelchOp(x, w, nperseg, noverlap, nfft, output_scale_mode, fs);
   }
 
-  template <typename xType>
+  template <
+      typename xType,
+      typename fsType>
     __MATX_INLINE__ auto pwelch(
         const xType& x,
         index_t nperseg,
         index_t noverlap,
         index_t nfft,
         PwelchOutputScaleMode output_scale_mode = PwelchOutputScaleMode_Spectrum,
-        typename xType::value_type::value_type fs = 1
+        fsType fs = 1
     )
   {
     return detail::PWelchOp(x, std::nullopt, nperseg, noverlap, nfft, output_scale_mode, fs);
   }
+
+  template <typename xType, typename wType>
+    __MATX_INLINE__ auto pwelch(
+        const xType& x,
+        const wType& w,
+        index_t nperseg,
+        index_t noverlap,
+        index_t nfft,
+        PwelchOutputScaleMode output_scale_mode = PwelchOutputScaleMode_Spectrum
+    )
+  {
+    return detail::PWelchOp(x, w, nperseg, noverlap, nfft, output_scale_mode, 1.f);
+  }
+
+  template <typename xType>
+    __MATX_INLINE__ auto pwelch(
+        const xType& x,
+        index_t nperseg,
+        index_t noverlap,
+        index_t nfft,
+        PwelchOutputScaleMode output_scale_mode = PwelchOutputScaleMode_Spectrum
+    )
+  {
+    return detail::PWelchOp(x, std::nullopt, nperseg, noverlap, nfft, output_scale_mode, 1.f);
+  }
 }
diff --git a/include/matx/transforms/pwelch.h b/include/matx/transforms/pwelch.h
@@ -32,74 +32,16 @@
 
 #pragma once
 
+#include "matx/kernels/pwelch.cuh"
+
 namespace matx
 {
-
-  enum PwelchOutputScaleMode {
-    PwelchOutputScaleMode_Spectrum,
-    PwelchOutputScaleMode_Density,
-    PwelchOutputScaleMode_Spectrum_dB,
-    PwelchOutputScaleMode_Density_dB
-  };
-
-  namespace detail {
-    template<PwelchOutputScaleMode OUTPUT_SCALE_MODE, typename T_IN, typename T_OUT>
-    __global__ void pwelch_kernel(const T_IN t_in, T_OUT t_out, typename T_OUT::value_type fs)
-    {
-      const index_t tid = blockIdx.x * blockDim.x + threadIdx.x;
-      const index_t batches = t_in.Shape()[0];
-      const index_t nfft = t_in.Shape()[1];
-
-      if (tid < nfft)
-      {
-        typename T_OUT::value_type pxx = 0;
-        constexpr typename T_OUT::value_type ten = 10;
-
-        for (index_t batch = 0; batch < batches; batch++)
-        {
-          pxx += cuda::std::norm(t_in(batch, tid));
-        }
-
-        if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Spectrum)
-        {
-          t_out(tid) = pxx / batches;
-        }
-        else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Density)
-        {
-          t_out(tid) = pxx / (batches * fs);
-        }
-        else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Spectrum_dB)
-        {
-          pxx /= batches;
-          if (pxx != 0)
-          {
-            t_out(tid) = ten * cuda::std::log10(pxx);
-          }
-          else
-          {
-            t_out(tid) = cuda::std::numeric_limits<typename T_OUT::value_type>::lowest();
-          }
-        }
-        else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Density_dB)
-        {
-          pxx /= (batches * fs);
-          if (pxx != 0)
-          {
-            t_out(tid) = ten * cuda::std::log10(pxx);
-          }
-          else
-          {
-            t_out(tid) = cuda::std::numeric_limits<typename T_OUT::value_type>::lowest();
-          }
-        }
-      }
-    }
-  };
-
-  extern int g_pwelch_alg_mode;
-  template <typename PxxType, typename xType, typename wType>
-    __MATX_INLINE__ void pwelch_impl(PxxType Pxx, const xType& x, const wType& w, index_t nperseg, index_t noverlap, index_t nfft, PwelchOutputScaleMode output_scale_mode, typename PxxType::value_type fs, cudaStream_t stream=0)
-    {
+  template <typename PxxType, typename xType, typename wType, typename fsType>
+    __MATX_INLINE__ void pwelch_impl(PxxType Pxx, const xType& x, const wType& w, index_t nperseg, index_t noverlap, index_t nfft, PwelchOutputScaleMode output_scale_mode, fsType fs, cudaStream_t stream=0)
+  {
+    #ifndef __CUDACC__
+      MATX_THROW(matxNotSupported, "pwelch not supported on host");
+    #else
       MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
 
       MATX_ASSERT_STR(Pxx.Rank() == x.Rank(), matxInvalidDim, "pwelch:  Pxx rank must be the same as x rank");
@@ -141,6 +83,6 @@ namespace matx
       {
         detail::pwelch_kernel<PwelchOutputScaleMode_Density_dB><<<bpk, tpb, 0, stream>>>(X_with_overlaps, Pxx, fs);
       }
-    }
-
+    #endif
+  }
 } // end namespace matx