From 3debb2970d1222f562ddec1fa992bc6ad2a26c92 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Fri, 22 Nov 2024 13:20:16 -0600 Subject: [PATCH] clean up some casting in tests --- tests/contract_ft_test.cpp | 11 +- tests/gauge_alg_test.cpp | 2 +- tests/host_reference/contract_ft_reference.h | 8 +- .../host_reference/gauge_force_reference.cpp | 43 +++--- tests/host_reference/hisq_force_reference.cpp | 139 ++++++++---------- .../staggered_dslash_reference.cpp | 8 +- tests/laph_test.cpp | 4 +- 7 files changed, 102 insertions(+), 113 deletions(-) diff --git a/tests/contract_ft_test.cpp b/tests/contract_ft_test.cpp index cc793ede5b..740adf6cc9 100644 --- a/tests/contract_ft_test.cpp +++ b/tests/contract_ft_test.cpp @@ -171,9 +171,9 @@ inline int launch_contract_test(const QudaContractType cType, const std::array(buffs, X, dof); - for (int s = 0; s < nprops; ++s, off += spinor_field_floats * sizeof(Float)) { - spinorX[s] = (void *)((uintptr_t)buffs[0].data() + off); - spinorY[s] = (void *)((uintptr_t)buffs[1].data() + off); + for (int s = 0; s < nprops; ++s, off += spinor_field_floats) { + spinorX[s] = static_cast(buffs[0].data() + off); + spinorY[s] = static_cast(buffs[1].data() + off); } // Perform GPU contraction: void *d_result_ = static_cast(d_result.data()); @@ -181,9 +181,8 @@ inline int launch_contract_test(const QudaContractType cType, const std::array((Float **)spinorX.data(), (Float **)spinorY.data(), d_result.data(), cType, - src_colors, X.data(), source_position.data(), n_mom, mom.data(), fft_type.data()); + int faults = contractionFT_reference(spinorX.data(), spinorY.data(), d_result.data(), cType, src_colors, + X.data(), source_position.data(), n_mom, mom.data(), fft_type.data()); return faults; } diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index c5197af83c..2994904e29 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -79,7 +79,7 @@ struct GaugeAlgTest : public ::testing::TestWithParam { #ifndef QUDA_BUILD_NATIVE_FFT // skip FFT tests if FFT not available const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); const char *name = test_info->name(); - if (strcmp(name, "Landau_FFT") == 0 || strcmp(name, "Coulomb_FFT") == 0) { + if (strncmp(name, "Landau_FFT", 10) == 0 || strncmp(name, "Coulomb_FFT", 11) == 0) { execute = false; GTEST_SKIP(); } diff --git a/tests/host_reference/contract_ft_reference.h b/tests/host_reference/contract_ft_reference.h index 697ed550e2..e811d4ec99 100644 --- a/tests/host_reference/contract_ft_reference.h +++ b/tests/host_reference/contract_ft_reference.h @@ -58,7 +58,7 @@ template inline void FourierPhase(Float z[2], const Float theta }; template -void contractFTHost(Float **h_prop_array_flavor_1, Float **h_prop_array_flavor_2, double *h_result, +void contractFTHost(void **h_prop_array_flavor_1, void **h_prop_array_flavor_2, double *h_result, const QudaContractType cType, const int src_colors, const int *X, const int *const source_position, const int n_mom, const int *const mom_modes, const QudaFFTSymmType *const fft_type) { @@ -126,8 +126,8 @@ void contractFTHost(Float **h_prop_array_flavor_1, Float **h_prop_array_flavor_2 for (int c1 = 0; c1 < src_colors; c1++) { // color contraction size_t off = nSpin * 3 * 2 * (Vh * parity + cb_idx); - contractColors(h_prop_array_flavor_1[s1 * src_colors + c1] + off, - h_prop_array_flavor_2[s2 * src_colors + c1] + off, nSpin, M.data()); + contractColors(static_cast(h_prop_array_flavor_1[s1 * src_colors + c1]) + off, + static_cast(h_prop_array_flavor_2[s2 * src_colors + c1]) + off, nSpin, M.data()); // apply gamma matrices here @@ -158,7 +158,7 @@ void contractFTHost(Float **h_prop_array_flavor_1, Float **h_prop_array_flavor_2 }; template -int contractionFT_reference(Float **spinorX, Float **spinorY, const double *const d_result, const QudaContractType cType, +int contractionFT_reference(void **spinorX, void **spinorY, const double *const d_result, const QudaContractType cType, const int src_colors, const int *X, const int *const source_position, const int n_mom, const int *const mom_modes, const QudaFFTSymmType *const fft_type) { diff --git a/tests/host_reference/gauge_force_reference.cpp b/tests/host_reference/gauge_force_reference.cpp index ce285a6b99..741efdf0b7 100644 --- a/tests/host_reference/gauge_force_reference.cpp +++ b/tests/host_reference/gauge_force_reference.cpp @@ -426,7 +426,7 @@ static void update_gauge(su3_matrix *gauge, int dir, su3_matrix **sitelink, su3_ /* This function only computes one direction @dir * */ -void gauge_force_reference_dir(void *refMom, int dir, double eb3, void *const *sitelink, void *const *sitelink_ex, +void gauge_force_reference_dir(void *refMom, int dir, double eb3, quda::GaugeField &u, quda::GaugeField &u_ex, QudaPrecision prec, int **path_dir, int *length, void *loop_coeff, int num_paths, const lattice_t &lat, bool compute_force) { @@ -437,26 +437,30 @@ void gauge_force_reference_dir(void *refMom, int dir, double eb3, void *const *s for (int i = 0; i < num_paths; i++) { if (prec == QUDA_DOUBLE_PRECISION) { double *my_loop_coeff = (double *)loop_coeff; - compute_path_product((dsu3_matrix *)staple, (dsu3_matrix **)sitelink_ex, path_dir[i], length[i], my_loop_coeff[i], - dir, lat); + compute_path_product((dsu3_matrix *)staple, u_ex.data_array().data, path_dir[i], length[i], + my_loop_coeff[i], dir, lat); } else { float *my_loop_coeff = (float *)loop_coeff; - compute_path_product((fsu3_matrix *)staple, (fsu3_matrix **)sitelink_ex, path_dir[i], length[i], my_loop_coeff[i], - dir, lat); + compute_path_product((fsu3_matrix *)staple, u_ex.data_array().data, path_dir[i], length[i], + my_loop_coeff[i], dir, lat); } } if (compute_force) { if (prec == QUDA_DOUBLE_PRECISION) { - update_mom((danti_hermitmat *)refMom, dir, (dsu3_matrix **)sitelink, (dsu3_matrix *)staple, (double)eb3, lat); + update_mom((danti_hermitmat *)refMom, dir, u.data_array().data, (dsu3_matrix *)staple, (double)eb3, + lat); } else { - update_mom((fanti_hermitmat *)refMom, dir, (fsu3_matrix **)sitelink, (fsu3_matrix *)staple, (float)eb3, lat); + update_mom((fanti_hermitmat *)refMom, dir, u.data_array().data, (fsu3_matrix *)staple, (float)eb3, + lat); } } else { if (prec == QUDA_DOUBLE_PRECISION) { - update_gauge((dsu3_matrix *)refMom, dir, (dsu3_matrix **)sitelink, (dsu3_matrix *)staple, (double)eb3, lat); + update_gauge((dsu3_matrix *)refMom, dir, u.data_array().data, (dsu3_matrix *)staple, (double)eb3, + lat); } else { - update_gauge((fsu3_matrix *)refMom, dir, (fsu3_matrix **)sitelink, (fsu3_matrix *)staple, (float)eb3, lat); + update_gauge((fsu3_matrix *)refMom, dir, u.data_array().data, (fsu3_matrix *)staple, (float)eb3, + lat); } } host_free(staple); @@ -465,8 +469,6 @@ void gauge_force_reference_dir(void *refMom, int dir, double eb3, void *const *s void gauge_force_reference(void *refMom, double eb3, quda::GaugeField &u, int ***path_dir, int *length, void *loop_coeff, int num_paths, bool compute_force) { - void *sitelink[] = {u.data(0), u.data(1), u.data(2), u.data(3)}; - // created extended field quda::lat_dim_t R; for (int d = 0; d < 4; d++) R[d] = 2 * quda::comm_dim_partitioned(d); @@ -475,13 +477,12 @@ void gauge_force_reference(void *refMom, double eb3, quda::GaugeField &u, int ** param.gauge_order = QUDA_QDP_GAUGE_ORDER; param.t_boundary = QUDA_PERIODIC_T; - auto qdp_ex = quda::createExtendedGauge((void **)sitelink, param, R); + auto qdp_ex = quda::createExtendedGauge(u.data_array().data, param, R); lattice_t lat(*qdp_ex); - void *sitelink_ex[] = {qdp_ex->data(0), qdp_ex->data(1), qdp_ex->data(2), qdp_ex->data(3)}; for (int dir = 0; dir < 4; dir++) { - gauge_force_reference_dir(refMom, dir, eb3, sitelink, sitelink_ex, u.Precision(), path_dir[dir], length, loop_coeff, - num_paths, lat, compute_force); + gauge_force_reference_dir(refMom, dir, eb3, u, *qdp_ex, u.Precision(), path_dir[dir], length, loop_coeff, num_paths, + lat, compute_force); } delete qdp_ex; @@ -490,8 +491,6 @@ void gauge_force_reference(void *refMom, double eb3, quda::GaugeField &u, int ** void gauge_loop_trace_reference(quda::GaugeField &u, std::vector &loop_traces, double factor, int **input_path, int *length, double *path_coeff, int num_paths) { - void *sitelink[] = {u.data(0), u.data(1), u.data(2), u.data(3)}; - // create extended field quda::lat_dim_t R; for (int d = 0; d < 4; d++) R[d] = 2 * quda::comm_dim_partitioned(d); @@ -499,20 +498,20 @@ void gauge_loop_trace_reference(quda::GaugeField &u, std::vector setGaugeParam(param); param.gauge_order = QUDA_QDP_GAUGE_ORDER; param.t_boundary = QUDA_PERIODIC_T; - - auto qdp_ex = quda::createExtendedGauge((void **)sitelink, param, R); + auto qdp_ex = quda::createExtendedGauge(u.data_array().data, param, R); lattice_t lat(*qdp_ex); - void *sitelink_ex[] = {qdp_ex->data(0), qdp_ex->data(1), qdp_ex->data(2), qdp_ex->data(3)}; std::vector loop_tr_dbl(2 * num_paths); for (int i = 0; i < num_paths; i++) { if (u.Precision() == QUDA_DOUBLE_PRECISION) { - dcomplex tr = compute_loop_trace((dsu3_matrix **)sitelink_ex, input_path[i], length[i], path_coeff[i], lat); + dcomplex tr + = compute_loop_trace(qdp_ex->data_array().data, input_path[i], length[i], path_coeff[i], lat); loop_tr_dbl[2 * i] = factor * tr.real; loop_tr_dbl[2 * i + 1] = factor * tr.imag; } else { - dcomplex tr = compute_loop_trace((fsu3_matrix **)sitelink_ex, input_path[i], length[i], path_coeff[i], lat); + dcomplex tr + = compute_loop_trace(qdp_ex->data_array().data, input_path[i], length[i], path_coeff[i], lat); loop_tr_dbl[2 * i] = factor * tr.real; loop_tr_dbl[2 * i + 1] = factor * tr.imag; } diff --git a/tests/host_reference/hisq_force_reference.cpp b/tests/host_reference/hisq_force_reference.cpp index 1bef7b74bb..33d773ed95 100644 --- a/tests/host_reference/hisq_force_reference.cpp +++ b/tests/host_reference/hisq_force_reference.cpp @@ -117,9 +117,9 @@ void computeLinkOrderedOuterProduct(su3_vector *src, quda::GaugeField &dest, siz void computeLinkOrderedOuterProduct(void *src, quda::GaugeField &dst, QudaPrecision precision, size_t nhops) { if (precision == QUDA_SINGLE_PRECISION) { - computeLinkOrderedOuterProduct((fsu3_vector *)src, dst, nhops); + computeLinkOrderedOuterProduct(static_cast(src), dst, nhops); } else { - computeLinkOrderedOuterProduct((dsu3_vector *)src, dst, nhops); + computeLinkOrderedOuterProduct(static_cast(src), dst, nhops); } } @@ -342,7 +342,7 @@ template class LoadStore void loadMatrixFromField(const Real *const field, int oddBit, int half_lattice_index, Matrix<3, std::complex> *const mat) const; - void loadMatrixFromField(const Real *const field, int oddBit, int dir, int half_lattice_index, + void loadMatrixFromField(const Real *const *const field, int oddBit, int dir, int half_lattice_index, Matrix<3, std::complex> *const mat) const; void storeMatrixToField(const Matrix<3, std::complex> &mat, int oddBit, int half_lattice_index, @@ -352,12 +352,12 @@ template class LoadStore Real *const) const; void addMatrixToField(const Matrix<3, std::complex> &mat, int oddBit, int dir, int half_lattice_index, - Real coeff, Real *const) const; + Real coeff, Real *const *const) const; void storeMatrixToMomentumField(const Matrix<3, std::complex> &mat, int oddBit, int dir, int half_lattice_index, Real coeff, Real *const) const; - Real getData(const Real *const field, int idx, int dir, int oddBit, int offset, int hfv) const; - void addData(Real *const field, int idx, int dir, int oddBit, int offset, Real, int hfv) const; + Real getData(const Real *const *const field, int idx, int dir, int oddBit, int offset, int hfv) const; + void addData(Real *const *const field, int idx, int dir, int oddBit, int offset, Real, int hfv) const; int half_idx_conversion_ex2normal(int half_lattice_index, const int *dim, int oddBit) const; int half_idx_conversion_normal2ex(int half_lattice_index, const int *dim, int oddBit) const; }; @@ -423,16 +423,16 @@ int LoadStore::half_idx_conversion_normal2ex(int half_lattice_index, const } template -Real LoadStore::getData(const Real *const field, int idx, int dir, int oddBit, int offset, int hfv) const +Real LoadStore::getData(const Real *const *const field, int idx, int dir, int oddBit, int offset, int hfv) const { // QDP format - return ((Real **)field)[dir][(hfv * oddBit + idx) * 18 + offset]; + return field[dir][(hfv * oddBit + idx) * 18 + offset]; } template -void LoadStore::addData(Real *const field, int idx, int dir, int oddBit, int offset, Real v, int hfv) const +void LoadStore::addData(Real *const *const field, int idx, int dir, int oddBit, int offset, Real v, int hfv) const { // QDP format - ((Real **)field)[dir][(hfv * oddBit + idx) * 18 + offset] += v; + field[dir][(hfv * oddBit + idx) * 18 + offset] += v; } template @@ -455,7 +455,7 @@ void LoadStore::loadMatrixFromField(const Real *const field, int oddBit, i } template -void LoadStore::loadMatrixFromField(const Real *const field, int oddBit, int dir, int half_lattice_index, +void LoadStore::loadMatrixFromField(const Real *const *const field, int oddBit, int dir, int half_lattice_index, Matrix<3, std::complex> *const mat) const { #ifdef MULTI_GPU @@ -464,11 +464,10 @@ void LoadStore::loadMatrixFromField(const Real *const field, int oddBit, i int hfv = Vh; #endif - // const Real* const local_field = field + ((oddBit*half_volume + half_lattice_index)*4 + dir)*18; int offset = 0; for (int i = 0; i < 3; ++i) { for (int j = 0; j < 3; ++j) { - (*mat)(i, j) = (getData(field, half_lattice_index, dir, oddBit, offset++, hfv)); + (*mat)(i, j) = getData(field, half_lattice_index, dir, oddBit, offset++, hfv); (*mat)(i, j) += std::complex(0, getData(field, half_lattice_index, dir, oddBit, offset++, hfv)); } } @@ -515,23 +514,18 @@ void LoadStore::addMatrixToField(const Matrix<3, std::complex> &mat, template void LoadStore::addMatrixToField(const Matrix<3, std::complex> &mat, int oddBit, int dir, - int half_lattice_index, Real coeff, Real *const field) const + int half_lattice_index, Real coeff, Real *const *const field) const { - #ifdef MULTI_GPU int hfv = Vh_ex; #else int hfv = Vh; #endif - // Real* const local_field = field + ((oddBit*half_volume + half_lattice_index)*4 + dir)*18; int offset = 0; for (int i = 0; i < 3; ++i) { for (int j = 0; j < 3; ++j) { - // local_field[offset++] += coeff*mat(i,j).real(); addData(field, half_lattice_index, dir, oddBit, offset++, coeff * mat(i, j).real(), hfv); - - // local_field[offset++] += coeff*mat(i,j).imag(); addData(field, half_lattice_index, dir, oddBit, offset++, coeff * mat(i, j).imag(), hfv); } } @@ -762,7 +756,8 @@ void computeOneLinkSite( #else const int[], #endif - int half_lattice_index, const Real *const oprod, int sig, Real coeff, const LoadStore &ls, Real *const output) + int half_lattice_index, const Real *const *const oprod, int sig, Real coeff, const LoadStore &ls, + Real *const *const output) { if (GOES_FORWARDS(sig)) { typename ColorMatrix::Type colorMatW; @@ -777,7 +772,7 @@ void computeOneLinkSite( } template -void computeOneLinkField(const int dim[4], const Real *const oprod, int sig, Real coeff, Real *const output) +void computeOneLinkField(const int dim[4], const Real *const *const oprod, int sig, Real coeff, Real *const *const output) { int volume = 1; for (int dir = 0; dir < 4; ++dir) volume *= dim[dir]; @@ -795,10 +790,10 @@ void computeOneLinkField(const int dim[4], const Real *const oprod, int sig, Rea // middleLinkKernel compiles for now, but lots of debugging to be done template void computeMiddleLinkSite(int half_lattice_index, // half_lattice_index to better match the GPU code. - const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const link, + const int dim[4], void *const oprod, const Real *const Qprev, const Real *const *const link, int sig, int mu, Real coeff, const LoadStore &ls, // pass a function object to read from and write to matrix fields - Real *const Pmu, Real *const P3, Real *const Qmu, Real *const newOprod) + Real *const Pmu, Real *const P3, Real *const Qmu, Real *const *const newOprod) { const bool mu_positive = (GOES_FORWARDS(mu)) ? true : false; const bool sig_positive = (GOES_FORWARDS(sig)) ? true : false; @@ -842,13 +837,13 @@ void computeMiddleLinkSite(int half_lattice_index, // half_lattice_index to bett if (Qprev == NULL) { if (sig_positive) { - ls.loadMatrixFromField(oprod, 1 - oddBit, sig, point_d, &colorMatY); + ls.loadMatrixFromField(static_cast(oprod), 1 - oddBit, sig, point_d, &colorMatY); } else { - ls.loadMatrixFromField(oprod, oddBit, OPP_DIR(sig), point_c, &colorMatY); + ls.loadMatrixFromField(static_cast(oprod), oddBit, OPP_DIR(sig), point_c, &colorMatY); colorMatY = conj(colorMatY); } } else { // Qprev != NULL - ls.loadMatrixFromField(oprod, oddBit, point_c, &colorMatY); + ls.loadMatrixFromField(static_cast(oprod), oddBit, point_c, &colorMatY); } colorMatW = (!mu_positive) ? bc_link * colorMatY : conj(bc_link) * colorMatY; @@ -880,9 +875,9 @@ void computeMiddleLinkSite(int half_lattice_index, // half_lattice_index to bett } // computeMiddleLinkSite template -void computeMiddleLinkField(const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const link, +void computeMiddleLinkField(const int dim[4], void *const oprod, const Real *const Qprev, const Real *const *const link, int sig, int mu, Real coeff, Real *const Pmu, Real *const P3, Real *const Qmu, - Real *const newOprod) + Real *const *const newOprod) { int volume = 1; @@ -911,9 +906,9 @@ template void computeSideLinkSite(int half_lattice_index, // half_lattice_index to better match the GPU code. const int dim[4], const Real *const P3, const Real *const Qprod, // why? - const Real *const link, int sig, int mu, Real coeff, Real accumu_coeff, + const Real *const *const link, int sig, int mu, Real coeff, Real accumu_coeff, const LoadStore &ls, // pass a function object to read from and write to matrix fields - Real *const shortP, Real *const newOprod) + Real *const shortP, Real *const *const newOprod) { const bool mu_positive = (GOES_FORWARDS(mu)) ? true : false; @@ -979,8 +974,8 @@ void computeSideLinkSite(int half_lattice_index, // half_lattice_index to better template void computeSideLinkField(const int dim[4], const Real *const P3, const Real *const Qprod, // why? - const Real *const link, int sig, int mu, Real coeff, Real accumu_coeff, Real *const shortP, - Real *const newOprod) + const Real *const *const link, int sig, int mu, Real coeff, Real accumu_coeff, + Real *const shortP, Real *const *const newOprod) { // Need some way of setting half_volume int volume = 1; @@ -1005,10 +1000,10 @@ void computeSideLinkField(const int dim[4], const Real *const P3, template void computeAllLinkSite(int half_lattice_index, // half_lattice_index to better match the GPU code. - const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const link, - int sig, int mu, Real coeff, Real accumu_coeff, + const int dim[4], const Real *const oprod, const Real *const Qprev, + const Real *const *const link, int sig, int mu, Real coeff, Real accumu_coeff, const LoadStore &ls, // pass a function object to read from and write to matrix fields - Real *const shortP, Real *const newOprod) + Real *const shortP, Real *const *const newOprod) { const bool mu_positive = (GOES_FORWARDS(mu)) ? true : false; @@ -1092,8 +1087,9 @@ void computeAllLinkSite(int half_lattice_index, // half_lattice_index to better } // allLinkKernel template -void computeAllLinkField(const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const link, - int sig, int mu, Real coeff, Real accumu_coeff, Real *const shortP, Real *const newOprod) +void computeAllLinkField(const int dim[4], const Real *const oprod, const Real *const Qprev, + const Real *const *const link, int sig, int mu, Real coeff, Real accumu_coeff, + Real *const shortP, Real *const *const newOprod) { int volume = 1; for (int dir = 0; dir < 4; ++dir) volume *= dim[dir]; @@ -1132,8 +1128,8 @@ template struct PathCoefficients { }; template -void doHisqStaplesForceCPU(const int dim[4], PathCoefficients staple_coeff, Real *oprod, Real *link, - Real **tempmat, Real *newOprod) +void doHisqStaplesForceCPU(const int dim[4], PathCoefficients staple_coeff, Real **oprod, Real **link, + Real **tempmat, Real **newOprod) { Real OneLink, ThreeSt, FiveSt, SevenSt, Lepage, coeff; @@ -1217,9 +1213,6 @@ void hisqStaplesForceCPU(const double *path_coeff, quda::GaugeField &oprod, quda uint64_t len = 1; for (int dir = 0; dir < 4; ++dir) len *= X_[dir]; #endif - // allocate memory for temporary fields - void *tempmat[6]; - for (int i = 0; i < 6; i++) { tempmat[i] = safe_malloc(len * 18 * precision); } PathCoefficients act_path_coeff; act_path_coeff.one = path_coeff[0]; @@ -1229,27 +1222,29 @@ void hisqStaplesForceCPU(const double *path_coeff, quda::GaugeField &oprod, quda act_path_coeff.seven = path_coeff[4]; act_path_coeff.lepage = path_coeff[5]; - void *oprod_array[] = {oprod.data(0), oprod.data(1), oprod.data(2), oprod.data(3)}; - void *link_array[] = {link.data(0), link.data(1), link.data(2), link.data(3)}; - void *noprod_array[] = {newOprod->data(0), newOprod->data(1), newOprod->data(2), newOprod->data(3)}; - if (precision == QUDA_DOUBLE_PRECISION) { - doHisqStaplesForceCPU(X_, act_path_coeff, reinterpret_cast(oprod_array), - reinterpret_cast(link_array), (double **)tempmat, - reinterpret_cast(noprod_array)); - } else if (precision == QUDA_SINGLE_PRECISION) { - doHisqStaplesForceCPU(X_, act_path_coeff, reinterpret_cast(oprod_array), - reinterpret_cast(link_array), (float **)tempmat, - reinterpret_cast(noprod_array)); + if (precision == QUDA_SINGLE_PRECISION) { + // allocate memory for temporary fields + float *tempmat[6]; + for (int i = 0; i < 6; i++) { tempmat[i] = static_cast(safe_malloc(len * 18 * precision)); } + doHisqStaplesForceCPU(X_, act_path_coeff, oprod.data_array().data, link.data_array().data, + tempmat, newOprod->data_array().data); + for (int i = 0; i < 6; ++i) { host_free(tempmat[i]); } + } else if (precision == QUDA_DOUBLE_PRECISION) { + // allocate memory for temporary fields + double *tempmat[6]; + for (int i = 0; i < 6; i++) { tempmat[i] = static_cast(safe_malloc(len * 18 * precision)); } + doHisqStaplesForceCPU(X_, act_path_coeff, oprod.data_array().data, + link.data_array().data, tempmat, newOprod->data_array().data); + for (int i = 0; i < 6; ++i) { host_free(tempmat[i]); } } else { errorQuda("Unsupported precision"); } - - for (int i = 0; i < 6; ++i) { host_free(tempmat[i]); } } template -void computeLongLinkSite(int half_lattice_index, const int dim[4], const Real *const oprod, const Real *const link, - int sig, Real coeff, const LoadStore &ls, Real *const output) +void computeLongLinkSite(int half_lattice_index, const int dim[4], const Real *const *const oprod, + const Real *const *const link, int sig, Real coeff, const LoadStore &ls, + Real *const *const output) { if (GOES_FORWARDS(sig)) { @@ -1296,8 +1291,8 @@ void computeLongLinkSite(int half_lattice_index, const int dim[4], const Real *c } template -void computeLongLinkField(const int dim[4], const Real *const oprod, const Real *const link, int sig, Real coeff, - Real *const output) +void computeLongLinkField(const int dim[4], const Real *const *const oprod, const Real *const *const link, int sig, + Real coeff, Real *const *const output) { int volume = 1; for (int dir = 0; dir < 4; ++dir) volume *= dim[dir]; @@ -1321,16 +1316,13 @@ void hisqLongLinkForceCPU(double coeff, quda::GaugeField &oprod, quda::GaugeFiel for (int d = 0; d < 4; d++) X_[d] = oprod.X()[d] - 2 * oprod.R()[d]; QudaPrecision precision = oprod.Precision(); - void *oprod_array[] = {oprod.data(0), oprod.data(1), oprod.data(2), oprod.data(3)}; - void *link_array[] = {link.data(0), link.data(1), link.data(2), link.data(3)}; - void *noprod_array[] = {newOprod->data(0), newOprod->data(1), newOprod->data(2), newOprod->data(3)}; for (int sig = 0; sig < 4; ++sig) { if (precision == QUDA_SINGLE_PRECISION) { - computeLongLinkField(X_, reinterpret_cast(oprod_array), reinterpret_cast(link_array), - sig, coeff, reinterpret_cast(noprod_array)); + computeLongLinkField(X_, oprod.data_array().data, link.data_array().data, sig, coeff, + newOprod->data_array().data); } else if (precision == QUDA_DOUBLE_PRECISION) { - computeLongLinkField(X_, reinterpret_cast(oprod_array), reinterpret_cast(link_array), - sig, coeff, reinterpret_cast(noprod_array)); + computeLongLinkField(X_, oprod.data_array().data, link.data_array().data, sig, coeff, + newOprod->data_array().data); } else { errorQuda("Unrecognised precision"); } @@ -1344,8 +1336,8 @@ void completeForceSite(int half_lattice_index, #else const int[], #endif - const Real *const oprod, const Real *const link, int sig, const LoadStore &ls, - Real *const mom) + const Real *const *const oprod, const Real *const *const link, int sig, + const LoadStore &ls, Real *const mom) { typename ColorMatrix::Type colorMatX, colorMatY, linkW; @@ -1366,7 +1358,8 @@ void completeForceSite(int half_lattice_index, } template -void completeForceField(const int dim[4], const Real *const oprod, const Real *const link, int sig, Real *const mom) +void completeForceField(const int dim[4], const Real *const *const oprod, const Real *const *const link, int sig, + Real *const mom) { int volume = dim[0] * dim[1] * dim[2] * dim[3]; const int half_volume = volume / 2; @@ -1384,15 +1377,13 @@ void hisqCompleteForceCPU(quda::GaugeField &oprod, quda::GaugeField &link, quda: for (int d = 0; d < 4; d++) X_[d] = oprod.X()[d] - 2 * oprod.R()[d]; QudaPrecision precision = oprod.Precision(); - void *oprod_array[] = {oprod.data(0), oprod.data(1), oprod.data(2), oprod.data(3)}; - void *link_array[] = {link.data(0), link.data(1), link.data(2), link.data(3)}; for (int sig = 0; sig < 4; ++sig) { if (precision == QUDA_SINGLE_PRECISION) { - completeForceField(X_, reinterpret_cast(oprod_array), reinterpret_cast(link_array), sig, + completeForceField(X_, oprod.data_array().data, link.data_array().data, sig, mom->data()); } else if (precision == QUDA_DOUBLE_PRECISION) { - completeForceField(X_, reinterpret_cast(oprod_array), reinterpret_cast(link_array), - sig, mom->data()); + completeForceField(X_, oprod.data_array().data, link.data_array().data, sig, + mom->data()); } else { errorQuda("Unrecognised precision"); } diff --git a/tests/host_reference/staggered_dslash_reference.cpp b/tests/host_reference/staggered_dslash_reference.cpp index 212549dc38..a04d28dedb 100644 --- a/tests/host_reference/staggered_dslash_reference.cpp +++ b/tests/host_reference/staggered_dslash_reference.cpp @@ -139,15 +139,15 @@ void stag_dslash(ColorSpinorField &out, const GaugeField &fat_link, const GaugeF long_link.Ghost()[3].data()}; if (in.Precision() == QUDA_DOUBLE_PRECISION) { - staggeredDslashReference(static_cast(out.data()), reinterpret_cast(qdp_fatlink), + staggeredDslashReference(out.data(), reinterpret_cast(qdp_fatlink), reinterpret_cast(qdp_longlink), reinterpret_cast(ghost_fatlink), - reinterpret_cast(ghost_longlink), static_cast(in.data()), + reinterpret_cast(ghost_longlink), in.data(), reinterpret_cast(in.fwdGhostFaceBuffer), reinterpret_cast(in.backGhostFaceBuffer), oddBit, daggerBit, dslash_type); } else if (in.Precision() == QUDA_SINGLE_PRECISION) { - staggeredDslashReference(static_cast(out.data()), reinterpret_cast(qdp_fatlink), + staggeredDslashReference(out.data(), reinterpret_cast(qdp_fatlink), reinterpret_cast(qdp_longlink), reinterpret_cast(ghost_fatlink), - reinterpret_cast(ghost_longlink), static_cast(in.data()), + reinterpret_cast(ghost_longlink), in.data(), reinterpret_cast(in.fwdGhostFaceBuffer), reinterpret_cast(in.backGhostFaceBuffer), oddBit, daggerBit, dslash_type); } diff --git a/tests/laph_test.cpp b/tests/laph_test.cpp index ac20bce407..75f244d406 100644 --- a/tests/laph_test.cpp +++ b/tests/laph_test.cpp @@ -114,8 +114,8 @@ auto laph_test(test_t param) std::vector qudaRes(nSink * nEv * Lt * nSpin, 0.); int X[4] = {xdim, ydim, zdim, tdim}; - laphSinkProject((__complex__ double *)qudaRes.data(), (void **)snkPtr.data(), nSink, tileSink, - (void **)evPtr.data(), nEv, tileEv, &invParam, X); + laphSinkProject((__complex__ double *)qudaRes.data(), snkPtr.data(), nSink, tileSink, evPtr.data(), nEv, tileEv, + &invParam, X); printfQuda("laphSinkProject Done: %g secs, %g Gflops\n", invParam.secs, invParam.gflops / invParam.secs); auto tol = getTolerance(cuda_prec);