From 1ea57a09bbb2c08b4a2fccd1813c4de76a16ec59 Mon Sep 17 00:00:00 2001 From: Phy-Ren <909629481@qq.com> Date: Wed, 26 Feb 2025 18:30:42 +0800 Subject: [PATCH 1/3] feat(lapack): add CUDA support for SymMatEVD --- include/qlten/framework/hp_numeric/lapack.h | 129 +++++++++++++++++- .../blk_spar_data_ten/global_operations.h | 4 +- tests/CMakeLists.txt | 23 +++- 3 files changed, 148 insertions(+), 8 deletions(-) diff --git a/include/qlten/framework/hp_numeric/lapack.h b/include/qlten/framework/hp_numeric/lapack.h index 08930b3..e8d433c 100644 --- a/include/qlten/framework/hp_numeric/lapack.h +++ b/include/qlten/framework/hp_numeric/lapack.h @@ -270,7 +270,7 @@ inline void MatQR( // roughly estimate for complex number #endif } -#else // define USE GPU +#else // define USE_GPU #ifndef NDEBUG //row-major matrix A, m: rows, n: cols; lda: leading-dimension, usually be n @@ -659,8 +659,135 @@ inline cusolverStatus_t MatSVD( } +inline cusolverStatus_t SymMatEVD( + const QLTEN_Double *mat, + const size_t n, + QLTEN_Double *d, + QLTEN_Double *u +) { + cusolverDnHandle_t handle = CusolverHandleManager::GetHandle(); + cublasHandle_t cublasHandle = CublasHandleManager::GetHandle(); + + // Device memory allocations + int *devInfo = nullptr; + QLTEN_Double *eigenvalues = nullptr; + QLTEN_Double *d_mat = nullptr; + QLTEN_Double *d_work = nullptr; + + HANDLE_CUDA_ERROR(cudaMalloc(&devInfo, sizeof(int))); + HANDLE_CUDA_ERROR(cudaMalloc(&eigenvalues, n * sizeof(QLTEN_Double))); + HANDLE_CUDA_ERROR(cudaMalloc(&d_mat, n * n * sizeof(QLTEN_Double))); + + // Transpose input matrix to column-major + const QLTEN_Double alpha = 1.0; + const QLTEN_Double beta = 0.0; + HANDLE_CUBLAS_ERROR(cublasDgeam(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, + n, n, &alpha, mat, n, &beta, nullptr, n, + d_mat, n)); + + // Query and allocate workspace + int lwork = 0; + HANDLE_CUSOLVER_ERROR(cusolverDnDsyevd_bufferSize(handle, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n, + d_mat, n, eigenvalues, &lwork)); + HANDLE_CUDA_ERROR(cudaMalloc(&d_work, lwork * sizeof(QLTEN_Double))); + + // Compute eigenvalues/vectors + auto status = cusolverDnDsyevd(handle, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n, d_mat, n, + eigenvalues, d_work, lwork, devInfo); + HANDLE_CUSOLVER_ERROR(status); + // Check convergence + int info; + HANDLE_CUDA_ERROR(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); + if (info != 0) { + cudaFree(d_mat); cudaFree(d_work); cudaFree(devInfo); cudaFree(eigenvalues); + return CUSOLVER_STATUS_INTERNAL_ERROR; + } + + // Transpose eigenvectors to row-major + HANDLE_CUBLAS_ERROR(cublasDgeam(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, + n, n, &alpha, d_mat, n, &beta, nullptr, n, + u, n)); + + // Set diagonal matrix (device-to-device copy) + HANDLE_CUBLAS_ERROR(cublasDcopy(cublasHandle, n, eigenvalues, 1, d, n+1)); + + // Cleanup + cudaFree(d_mat); + cudaFree(d_work); + cudaFree(devInfo); + cudaFree(eigenvalues); + + return status; +} +inline cusolverStatus_t SymMatEVD( + const QLTEN_Complex *mat, + const size_t n, + QLTEN_Double *d, + QLTEN_Complex *u +) { + cusolverDnHandle_t handle = CusolverHandleManager::GetHandle(); + cublasHandle_t cublasHandle = CublasHandleManager::GetHandle(); + + // Device memory allocations + int *devInfo = nullptr; + QLTEN_Double *eigenvalues = nullptr; + cuDoubleComplex *d_mat = nullptr; + cuDoubleComplex *d_work = nullptr; + + HANDLE_CUDA_ERROR(cudaMalloc(&devInfo, sizeof(int))); + HANDLE_CUDA_ERROR(cudaMalloc(&eigenvalues, n * sizeof(QLTEN_Double))); + HANDLE_CUDA_ERROR(cudaMalloc(&d_mat, n * n * sizeof(cuDoubleComplex))); + + // Transpose input matrix to column-major + const cuDoubleComplex alpha = {1.0, 0.0}; + const cuDoubleComplex beta = {0.0, 0.0}; + HANDLE_CUBLAS_ERROR(cublasZgeam(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, + n, n, &alpha, + reinterpret_cast(mat), n, + &beta, nullptr, n, + d_mat, n)); + + // Query and allocate workspace + int lwork = 0; + HANDLE_CUSOLVER_ERROR(cusolverDnZheevd_bufferSize(handle, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n, + d_mat, n, eigenvalues, &lwork)); + HANDLE_CUDA_ERROR(cudaMalloc(&d_work, lwork * sizeof(cuDoubleComplex))); + + // Compute eigenvalues/vectors + auto status = cusolverDnZheevd(handle, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n, d_mat, n, + eigenvalues, d_work, lwork, devInfo); + HANDLE_CUSOLVER_ERROR(status); + + // Check convergence + int info; + HANDLE_CUDA_ERROR(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); + if (info != 0) { + cudaFree(d_mat); cudaFree(d_work); cudaFree(devInfo); cudaFree(eigenvalues); + return CUSOLVER_STATUS_INTERNAL_ERROR; + } + + // Transpose eigenvectors to row-major + HANDLE_CUBLAS_ERROR(cublasZgeam(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, + n, n, &alpha, d_mat, n, &beta, nullptr, n, + reinterpret_cast(u), n)); + + // Set diagonal matrix (device-to-device copy) + HANDLE_CUBLAS_ERROR(cublasDcopy(cublasHandle, n, eigenvalues, 1, d, n+1)); + + // Cleanup + cudaFree(d_mat); + cudaFree(d_work); + cudaFree(devInfo); + cudaFree(eigenvalues); + + return status; +} #endif diff --git a/include/qlten/qltensor/blk_spar_data_ten/global_operations.h b/include/qlten/qltensor/blk_spar_data_ten/global_operations.h index cca7c3a..5ced3fe 100644 --- a/include/qlten/qltensor/blk_spar_data_ten/global_operations.h +++ b/include/qlten/qltensor/blk_spar_data_ten/global_operations.h @@ -1050,7 +1050,7 @@ void BlockSparseDataTensor::CollectiveLinearCombine( } RawDataCopy_(source_pointers, dest_pointers, copy_size); } -#ifndef USE_GPU +// #ifndef USE_GPU template void BlockSparseDataTensor::SymMatEVDRawDataDecomposition( BlockSparseDataTensor &u, @@ -1070,6 +1070,6 @@ void BlockSparseDataTensor::SymMatEVDRawDataDecomposition( pu_start + task.data_offset); } } -#endif +// #endif } /* qlten */ #endif /* ifndef QLTEN_QLTENSOR_BLK_SPAR_DATA_TEN_GLOBAL_OPERATIONS_H */ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 509d4d2..5dbe761 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -11,13 +11,16 @@ if (QLTEN_USE_GPU) # Enable CUDA language support enable_language(CUDA) + set(CUDAToolkit_ROOT /usr/local/cuda) + find_package(CUDA REQUIRED) find_package(CUDAToolkit REQUIRED) # Modern CUDA Toolkit detection + find_library(CUBLAS_LIBRARY cublas HINTS ${CUDAToolkit_ROOT}/lib64) include_directories(${CUDAToolkit_INCLUDE_DIRS}) message(STATUS "CUDA_INCLUDE_DIRS: '${CUDAToolkit_INCLUDE_DIRS}'") # Set CUDA architectures - set(CMAKE_CUDA_ARCHITECTURES 70 80) # For V100 & A100, adjust if needed + set(CMAKE_CUDA_ARCHITECTURES 70 80 86) # For V100 & A100, adjust if needed # Compiler flags set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo -Xcompiler=-fPIC") @@ -39,7 +42,7 @@ endif () if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") set(BLA_VENDOR Intel10_64lp) #FOR VENDOR CHOICE: https://cmake.org/cmake/help/latest/module/FindBLAS.html - set(BLAS_INCLUDE_DIR "$ENV{MKLROOT}/include") + set(BLAS_INCLUDE_DIR "$ENV{MKLROOT}") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") # (e.g., Apple Silicon) add_definitions(-DUSE_OPENBLAS) set(BLA_VENDOR OpenBLAS) @@ -48,6 +51,7 @@ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") # (e.g., Apple Silicon) set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${OpenBLAS_ROOT} ${Lapack_ROOT}) # For find set(BLAS_INCLUDE_DIR "${OpenBLAS_ROOT}/include") endif () +message(STATUS "${BLAS_INCLUDE_DIR}") FIND_PACKAGE(BLAS REQUIRED) FIND_PACKAGE(LAPACK REQUIRED) @@ -91,6 +95,8 @@ if (QLTEN_USE_GPU) ) target_link_libraries(${TEST_NAME} PRIVATE CUDA::cublas_static + PRIVATE CUDA::cublasLt + PRIVATE CUDA::cublasLt_static PRIVATE CUDA::cusolver_static PRIVATE ${CUTENSOR_LIBRARY} PRIVATE GTest::GTest GTest::Main @@ -217,13 +223,20 @@ add_unittest(test_ten_svd "test_tensor_manipulation/test_ten_svd.cc" "${BLAS_INCLUDE_DIR}" "" "${MATH_LIB_LINK_FLAGS}" ) -if (NOT QLTEN_USE_GPU) # temporary remove because it is not used in DMRG - # EVD +if (QLTEN_USE_GPU) +#test EVD + # GPU-specific code + add_unittest(test_sym_mat_evd + "test_tensor_manipulation/test_sym_mat_evd.cc" + "${CUDA_INCLUDE_DIRS}" "" "${CUDA_LIBRARIES}" + ) +else() + # CPU-specific code add_unittest(test_sym_mat_evd "test_tensor_manipulation/test_sym_mat_evd.cc" "${BLAS_INCLUDE_DIR}" "" "${MATH_LIB_LINK_FLAGS}" ) -endif () +endif() # Test tensor QR. add_unittest(test_ten_qr "test_tensor_manipulation/test_ten_qr.cc" From f411dc7cf2924d055b05f476fdbd1b8292bf421e Mon Sep 17 00:00:00 2001 From: Phy-Ren <909629481@qq.com> Date: Wed, 26 Feb 2025 23:07:16 +0800 Subject: [PATCH 2/3] perf(lapack): reduce transpositions in SymMatEVD --- include/qlten/framework/hp_numeric/lapack.h | 76 +++++++++++---------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/include/qlten/framework/hp_numeric/lapack.h b/include/qlten/framework/hp_numeric/lapack.h index e8d433c..e678db5 100644 --- a/include/qlten/framework/hp_numeric/lapack.h +++ b/include/qlten/framework/hp_numeric/lapack.h @@ -678,24 +678,24 @@ inline cusolverStatus_t SymMatEVD( HANDLE_CUDA_ERROR(cudaMalloc(&eigenvalues, n * sizeof(QLTEN_Double))); HANDLE_CUDA_ERROR(cudaMalloc(&d_mat, n * n * sizeof(QLTEN_Double))); - // Transpose input matrix to column-major - const QLTEN_Double alpha = 1.0; - const QLTEN_Double beta = 0.0; - HANDLE_CUBLAS_ERROR(cublasDgeam(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, - n, n, &alpha, mat, n, &beta, nullptr, n, - d_mat, n)); + // Copy input matrix directly (no transpose needed for symmetric matrix) + HANDLE_CUDA_ERROR(cudaMemcpy(d_mat, mat, n * n * sizeof(QLTEN_Double), cudaMemcpyDeviceToDevice)); // Query and allocate workspace int lwork = 0; - HANDLE_CUSOLVER_ERROR(cusolverDnDsyevd_bufferSize(handle, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n, - d_mat, n, eigenvalues, &lwork)); + HANDLE_CUSOLVER_ERROR(cusolverDnDsyevd_bufferSize( + handle, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_LOWER, n, + d_mat, n, eigenvalues, &lwork + )); HANDLE_CUDA_ERROR(cudaMalloc(&d_work, lwork * sizeof(QLTEN_Double))); - // Compute eigenvalues/vectors - auto status = cusolverDnDsyevd(handle, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n, d_mat, n, - eigenvalues, d_work, lwork, devInfo); + // Compute eigenvalues/vectors using lower fill mode + auto status = cusolverDnDsyevd( + handle, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_LOWER, n, d_mat, n, + eigenvalues, d_work, lwork, devInfo + ); HANDLE_CUSOLVER_ERROR(status); // Check convergence @@ -706,13 +706,17 @@ inline cusolverStatus_t SymMatEVD( return CUSOLVER_STATUS_INTERNAL_ERROR; } - // Transpose eigenvectors to row-major - HANDLE_CUBLAS_ERROR(cublasDgeam(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, - n, n, &alpha, d_mat, n, &beta, nullptr, n, - u, n)); + // Transpose eigenvectors to row-major (columns to rows) + const QLTEN_Double alpha = 1.0; + const QLTEN_Double beta = 0.0; + HANDLE_CUBLAS_ERROR(cublasDgeam( + cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, + n, n, &alpha, d_mat, n, &beta, nullptr, n, + u, n + )); // Set diagonal matrix (device-to-device copy) - HANDLE_CUBLAS_ERROR(cublasDcopy(cublasHandle, n, eigenvalues, 1, d, n+1)); + HANDLE_CUBLAS_ERROR(cublasDcopy(cublasHandle, n, eigenvalues, 1, d, n + 1)); // Cleanup cudaFree(d_mat); @@ -742,26 +746,23 @@ inline cusolverStatus_t SymMatEVD( HANDLE_CUDA_ERROR(cudaMalloc(&eigenvalues, n * sizeof(QLTEN_Double))); HANDLE_CUDA_ERROR(cudaMalloc(&d_mat, n * n * sizeof(cuDoubleComplex))); - // Transpose input matrix to column-major - const cuDoubleComplex alpha = {1.0, 0.0}; - const cuDoubleComplex beta = {0.0, 0.0}; - HANDLE_CUBLAS_ERROR(cublasZgeam(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, - n, n, &alpha, - reinterpret_cast(mat), n, - &beta, nullptr, n, - d_mat, n)); + // Direct copy input matrix (no initial transpose needed) + HANDLE_CUDA_ERROR(cudaMemcpy(d_mat, mat, n * n * sizeof(cuDoubleComplex), + cudaMemcpyDeviceToDevice)); // Query and allocate workspace int lwork = 0; - HANDLE_CUSOLVER_ERROR(cusolverDnZheevd_bufferSize(handle, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n, - d_mat, n, eigenvalues, &lwork)); + HANDLE_CUSOLVER_ERROR(cusolverDnZheevd_bufferSize( + handle, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_LOWER, n, // Use lower triangle for column-major interpretation + d_mat, n, eigenvalues, &lwork)); HANDLE_CUDA_ERROR(cudaMalloc(&d_work, lwork * sizeof(cuDoubleComplex))); // Compute eigenvalues/vectors - auto status = cusolverDnZheevd(handle, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n, d_mat, n, - eigenvalues, d_work, lwork, devInfo); + auto status = cusolverDnZheevd( + handle, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_LOWER, n, + d_mat, n, eigenvalues, d_work, lwork, devInfo); HANDLE_CUSOLVER_ERROR(status); // Check convergence @@ -772,10 +773,13 @@ inline cusolverStatus_t SymMatEVD( return CUSOLVER_STATUS_INTERNAL_ERROR; } - // Transpose eigenvectors to row-major - HANDLE_CUBLAS_ERROR(cublasZgeam(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, - n, n, &alpha, d_mat, n, &beta, nullptr, n, - reinterpret_cast(u), n)); + // Conjugate transpose eigenvectors (CUBLAS_OP_C) for Hermitian correctness + const cuDoubleComplex alpha = {1.0, 0.0}; + const cuDoubleComplex beta = {0.0, 0.0}; + HANDLE_CUBLAS_ERROR(cublasZgeam( + cublasHandle, CUBLAS_OP_C, CUBLAS_OP_N, // Changed to conjugate transpose + n, n, &alpha, d_mat, n, &beta, nullptr, n, + reinterpret_cast(u), n)); // Set diagonal matrix (device-to-device copy) HANDLE_CUBLAS_ERROR(cublasDcopy(cublasHandle, n, eigenvalues, 1, d, n+1)); From aae9bd96ef0d1681f64169de3b742cdf13333e37 Mon Sep 17 00:00:00 2001 From: Hao-Xin Wang Date: Thu, 6 Mar 2025 20:06:40 +0800 Subject: [PATCH 3/3] correct Cmake --- include/qlten/framework/hp_numeric/lapack.h | 8 ++--- tests/CMakeLists.txt | 33 ++++++--------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/include/qlten/framework/hp_numeric/lapack.h b/include/qlten/framework/hp_numeric/lapack.h index e678db5..caa5510 100644 --- a/include/qlten/framework/hp_numeric/lapack.h +++ b/include/qlten/framework/hp_numeric/lapack.h @@ -746,7 +746,7 @@ inline cusolverStatus_t SymMatEVD( HANDLE_CUDA_ERROR(cudaMalloc(&eigenvalues, n * sizeof(QLTEN_Double))); HANDLE_CUDA_ERROR(cudaMalloc(&d_mat, n * n * sizeof(cuDoubleComplex))); - // Direct copy input matrix (no initial transpose needed) + // Direct copy input matrix (no initial transpose) HANDLE_CUDA_ERROR(cudaMemcpy(d_mat, mat, n * n * sizeof(cuDoubleComplex), cudaMemcpyDeviceToDevice)); @@ -773,15 +773,15 @@ inline cusolverStatus_t SymMatEVD( return CUSOLVER_STATUS_INTERNAL_ERROR; } - // Conjugate transpose eigenvectors (CUBLAS_OP_C) for Hermitian correctness + // Conjugate transpose eigenvectors const cuDoubleComplex alpha = {1.0, 0.0}; const cuDoubleComplex beta = {0.0, 0.0}; HANDLE_CUBLAS_ERROR(cublasZgeam( - cublasHandle, CUBLAS_OP_C, CUBLAS_OP_N, // Changed to conjugate transpose + cublasHandle, CUBLAS_OP_C, CUBLAS_OP_N, n, n, &alpha, d_mat, n, &beta, nullptr, n, reinterpret_cast(u), n)); - // Set diagonal matrix (device-to-device copy) + // Set diagonal matrix HANDLE_CUBLAS_ERROR(cublasDcopy(cublasHandle, n, eigenvalues, 1, d, n+1)); // Cleanup diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 261a0fd..aa3586b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,8 +1,8 @@ # SPDX-License-Identifier: LGPL-3.0-only -# +# # Author: Hao-Xin Wang # Creation Date: 2024-Jan-28 -# +# # Description: QuantumLiquids/tensor project. CMake file to control unittest. # @@ -11,16 +11,13 @@ if (QLTEN_USE_GPU) # Enable CUDA language support enable_language(CUDA) - set(CUDAToolkit_ROOT /usr/local/cuda) - find_package(CUDA REQUIRED) find_package(CUDAToolkit REQUIRED) # Modern CUDA Toolkit detection - find_library(CUBLAS_LIBRARY cublas HINTS ${CUDAToolkit_ROOT}/lib64) include_directories(${CUDAToolkit_INCLUDE_DIRS}) message(STATUS "CUDA_INCLUDE_DIRS: '${CUDAToolkit_INCLUDE_DIRS}'") # Set CUDA architectures - set(CMAKE_CUDA_ARCHITECTURES 70 80 86) # For V100 & A100, adjust if needed + set(CMAKE_CUDA_ARCHITECTURES 70 80) # For V100 & A100, adjust if needed # Compiler flags set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo -Xcompiler=-fPIC") @@ -42,7 +39,7 @@ endif () if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") set(BLA_VENDOR Intel10_64lp) #FOR VENDOR CHOICE: https://cmake.org/cmake/help/latest/module/FindBLAS.html - set(BLAS_INCLUDE_DIR "$ENV{MKLROOT}") + set(BLAS_INCLUDE_DIR "$ENV{MKLROOT}/include") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") # (e.g., Apple Silicon) add_definitions(-DUSE_OPENBLAS) set(BLA_VENDOR OpenBLAS) @@ -51,7 +48,6 @@ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") # (e.g., Apple Silicon) set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${OpenBLAS_ROOT} ${Lapack_ROOT}) # For find set(BLAS_INCLUDE_DIR "${OpenBLAS_ROOT}/include") endif () -message(STATUS "${BLAS_INCLUDE_DIR}") FIND_PACKAGE(BLAS REQUIRED) FIND_PACKAGE(LAPACK REQUIRED) @@ -95,8 +91,6 @@ if (QLTEN_USE_GPU) ) target_link_libraries(${TEST_NAME} PRIVATE CUDA::cublas_static - PRIVATE CUDA::cublasLt - PRIVATE CUDA::cublasLt_static PRIVATE CUDA::cusolver_static PRIVATE ${CUTENSOR_LIBRARY} PRIVATE GTest::GTest GTest::Main @@ -225,20 +219,11 @@ add_unittest(test_ten_svd "test_tensor_manipulation/test_ten_svd.cc" "${BLAS_INCLUDE_DIR}" "" "${MATH_LIB_LINK_FLAGS}" ) -if (QLTEN_USE_GPU) -#test EVD - # GPU-specific code - add_unittest(test_sym_mat_evd - "test_tensor_manipulation/test_sym_mat_evd.cc" - "${CUDA_INCLUDE_DIRS}" "" "${CUDA_LIBRARIES}" - ) -else() - # CPU-specific code - add_unittest(test_sym_mat_evd - "test_tensor_manipulation/test_sym_mat_evd.cc" - "${BLAS_INCLUDE_DIR}" "" "${MATH_LIB_LINK_FLAGS}" - ) -endif() +# EVD +add_unittest(test_sym_mat_evd + "test_tensor_manipulation/test_sym_mat_evd.cc" + "${BLAS_INCLUDE_DIR}" "" "${MATH_LIB_LINK_FLAGS}" +) # Test tensor QR. add_unittest(test_ten_qr "test_tensor_manipulation/test_ten_qr.cc"