diff --git a/README.md b/README.md index 8b1afc8b43..274f708b3e 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,7 @@ Advanced Scientific Computing (PASC21) [arXiv:2104.05615[hep-lat]]. * Hwancheol Jeong (Indiana University) * Xiangyu Jiang (ITP, Chinese Academy of Sciences) * Balint Joo (OLCF, Oak Ridge National Laboratory, formerly Jefferson Lab) +* Rohith Karur (UC Berkeley, Lawrence Berkeley National Laboratory) * Hyung-Jin Kim (Samsung Advanced Institute of Technology) * Bartosz Kostrzewa (HPC/A-Lab, University of Bonn) * Damon McDougall (AMD) diff --git a/include/quda.h b/include/quda.h index 7501b4c9dd..5230d18eda 100644 --- a/include/quda.h +++ b/include/quda.h @@ -873,6 +873,8 @@ extern "C" { double alpha3; /**< The coefficient used in HYP smearing step 1*/ unsigned int meas_interval; /**< Perform the requested measurements on the gauge field at this interval */ QudaGaugeSmearType smear_type; /**< The smearing type to perform */ + unsigned int adj_n_save; /**< How many intermediate gauge fields to save at each large nblock to perform adj flow*/ + unsigned int hier_threshold; /**< Minimum *hierarchical* threshold for adj gradient flow*/ QudaBoolean restart; /**< Used to restart the smearing from existing gaugeSmeared */ double t0; /**< Starting flow time for Wilson flow */ int dir_ignore; /**< The direction to be ignored by the smearing algorithm @@ -1707,6 +1709,26 @@ extern "C" { void performGFlowQuda(void **h_out, void **h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param, const size_t nSpinors); + /** + * Performs Adjoint Gradient Flow (gauge + fermion) the "safe" way on gaugePrecise and stores it in gaugeSmeared + * @param[out] h_out Output fermion field + * @param[in] h_in Input fermion field + * @param[in] smear_param Parameter struct that defines the computation parameters + * @param[in,out] obs_param Parameter struct that defines which + * observables we are making and the resulting observables. + */ + void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); + + /** + * Performs Adjoint Gradient Flow (gauge + fermion) the Hierarchical way on gaugePrecise and stores it in gaugeSmeared + * @param[out] h_out Output fermion field + * @param[in] h_in Input fermion field + * @param[in] smear_param Parameter struct that defines the computation parameters + * @param[in,out] obs_param Parameter struct that defines which + * observables we are making and the resulting observables. + */ + void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); + /** * @brief Calculates a variety of gauge-field observables. If a * smeared gauge field is presently loaded (in gaugeSmeared) the diff --git a/lib/check_params.h b/lib/check_params.h index ee5063f4c2..a0554356ba 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1161,6 +1161,7 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) #if defined CHECK_PARAM if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param)) errorQuda("Unexpected QudaGaugeSmearParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); + #else P(struct_size, (size_t)INVALID_INT); #endif @@ -1176,6 +1177,8 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) P(smear_anisotropy, 1.0); P(rk_order, 3); P(restart, QUDA_BOOLEAN_FALSE); + P(adj_n_save, 5); + P(hier_threshold, 6); P(t0, 0.0); P(alpha1, 0.0); P(alpha2, 0.0); @@ -1190,6 +1193,8 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) P(smear_anisotropy, INVALID_DOUBLE); P(rk_order, (unsigned int)INVALID_INT); P(restart, QUDA_BOOLEAN_INVALID); + P(adj_n_save, (unsigned int)INVALID_INT); + P(hier_threshold, (unsigned int)INVALID_INT); P(t0, INVALID_DOUBLE); P(alpha1, INVALID_DOUBLE); P(alpha2, INVALID_DOUBLE); diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 6b010fe066..3266167d5d 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -196,6 +197,11 @@ static TimeProfile profileWFlow("wFlowQuda"); //!< Profiler for gFlowQuda static TimeProfile profileGFlow("gFlowQuda"); +//!< Profiler for gFlowQuda +static TimeProfile profileAdjGFlowSafe("AdjgFlowSafeQuda"); + +static TimeProfile profileAdjGFlowHier("AdjgFlowHierQuda"); + //!< Profiler for projectSU3Quda static TimeProfile profileProject("projectSU3Quda"); @@ -5468,6 +5474,451 @@ void performGFlowQuda(void **h_out, void **h_in, QudaInvertParam *inv_param, Qud } /* end of performGFlowQuda */ +// perform adjoint (backwards) gradient flow on gauge and spinor field following the algorithm in arXiv:1302.5246 +// (Appendix E) the gauge flow steps are identical to Wilson Flow algorithm in arXiv:1006.4518 (Vt <-> W3) +void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param) +{ + + auto profile = pushProfile(profileAdjGFlowSafe); + pushOutputPrefix("performAdjGFlowQudaSafe: "); + checkGaugeSmearParam(smear_param); + + pushVerbosity(inv_param->verbosity); + if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); + + if (smear_param->restart) { + if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); + } else { + if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); + freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); + gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowSafe); + } + + GaugeFieldParam gParamDummy(*gaugeSmeared); + GaugeField gaugeW0(gParamDummy); + GaugeField gaugeW1(gParamDummy); + GaugeField gaugeW2(gParamDummy); + GaugeField gaugeVT(gParamDummy); + + GaugeFieldParam gParam(*gaugePrecise); + gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct + GaugeField gaugeTemp(gParam); + + const GaugeField gin = *gaugeSmeared; + GaugeField &g_W0 = *gaugeSmeared; + GaugeField &g_W1 = gaugeW1; + GaugeField &g_W2 = gaugeW2; + GaugeField &g_VT = gaugeVT; + + // helper gauge field for Laplace operator + GaugeField precise; + GaugeFieldParam gParam_helper(*gaugePrecise); + gParam_helper.create = QUDA_NULL_FIELD_CREATE; + precise = GaugeField(gParam_helper); + + // spinor fields + ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); + ColorSpinorField fin_h(cpuParam); + + ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); + ColorSpinorField fin(deviceParam); + fin = fin_h; + + deviceParam.create = QUDA_NULL_FIELD_CREATE; + ColorSpinorField fout(deviceParam); + + int parity = 0; + + // initialize a and b for Laplace operator + double a = 1.; + double b = -8.; + + int comm_dim[4] = {}; + // Will add fermion measruement utilities later + // int measurement_n = 0; // The nth measurement to take + // only switch on comms needed for directions with a derivative + for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } + + // auxilliary fermion fields [0], [1], [2] and [3] + ColorSpinorField f_temp0(deviceParam); + ColorSpinorField f_temp1(deviceParam); + ColorSpinorField f_temp2(deviceParam); + ColorSpinorField f_temp3(deviceParam); + ColorSpinorField f_temp4(deviceParam); + + // set [3] = input spinor + f_temp3 = fin; + + for (unsigned int j = 0; j < smear_param->n_steps; j++) { + for (unsigned int i = 0; i < smear_param->n_steps - j; i++) { + + if (i == 0) + g_W0 = gin; + else + std::swap(g_W0, g_VT); + + GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); + GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); + GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); + } + + // init auxilliary fields [0], [1] and [2] as [3] + f_temp0 = f_temp3; + f_temp1 = f_temp3; + f_temp2 = f_temp3; + + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, comm_dim, profileAdjGFlowSafe); + + blas::ax(smear_param->epsilon * 3. / 4., f_temp4); + + f_temp2 = f_temp4; + + copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, comm_dim, profileAdjGFlowSafe); + + blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); + + f_temp1 = f_temp3; + f_temp4 = f_temp1; + + blas::axpy(-8. / 9., f_temp2, f_temp4); + + copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, comm_dim, profileAdjGFlowSafe); + + blas::ax(smear_param->epsilon * 1. / 4., f_temp0); + blas::axpy(1., f_temp2, f_temp0); + blas::axpy(1., f_temp1, f_temp0); + + fout = f_temp0; + // redefining f_temp0 to restart loop + f_temp3 = f_temp0; + } + cpuParam.v = h_out; + cpuParam.location = inv_param->output_location; + ColorSpinorField fout_h(cpuParam); + fout_h = fout; + + popOutputPrefix(); +} + +void adjSafeEvolve(std::vector> sf_list, + std::vector> gf_list, QudaGaugeSmearParam *smear_param, + unsigned int ns_safe, TimeProfile &profile, std::vector> meas_cinf) +{ + const GaugeField gin = gf_list[0].get(); + GaugeField &g_W0 = gf_list[0].get(); + GaugeField &g_W1 = gf_list[1].get(); + GaugeField &g_W2 = gf_list[2].get(); + GaugeField &g_VT = gf_list[3].get(); + GaugeField &gaugeTemp = gf_list[4].get(); + GaugeField &precise = gf_list[5].get(); + + ColorSpinorField &f_temp0 = sf_list[0].get(); + ColorSpinorField &f_temp1 = sf_list[1].get(); + ColorSpinorField &f_temp2 = sf_list[2].get(); + ColorSpinorField &f_temp3 = sf_list[3].get(); + ColorSpinorField &f_temp4 = sf_list[4].get(); + + int &i_glob = meas_cinf[0].get(); + int &measurement_n = meas_cinf[1].get(); + measurement_n = 0; + + int parity = 0; + + // initialize a and b for Laplace operator + double a = 1.; + double b = -8.; + + int comm_dim[4] = {}; + // only switch on comms needed for directions with a derivative + for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } + + for (unsigned int j = 0; j < ns_safe; j++) { + for (unsigned int i = 0; i < ns_safe - j; i++) { + + if (i == 0) + g_W0 = gin; + else + std::swap(g_W0, g_VT); + + GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); + GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); + GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); + } + // init auxilliary fields [0], [1] and [2] as [3] + f_temp0 = f_temp3; + f_temp1 = f_temp3; + f_temp2 = f_temp3; + + // [4] = Lap2 [0] + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, comm_dim, profile); + + // [4] -> 3/4 eps [4] + blas::ax(smear_param->epsilon * 3. / 4., f_temp4); + + // [2] = [4] + f_temp2 = f_temp4; + + // [4] = Lap1 [2] + copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, comm_dim, profile); + + // [3] -> [3] + 8/9 eps [4] + blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); + + // [1], [4] <- [3] + f_temp1 = f_temp3; + f_temp4 = f_temp1; + + // [4] <- [4] - 8/9 [2] + blas::axpy(-8. / 9., f_temp2, f_temp4); + + // [0] <- Lap0 [4] + copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, comm_dim, profile); + + // [0] <- 1/4 eps [0]; [0] <- [2] + [0]; [0] <- [1] + [0] + blas::ax(smear_param->epsilon * 1. / 4., f_temp0); + blas::axpy(1., f_temp2, f_temp0); + blas::axpy(1., f_temp1, f_temp0); + + // redefining f_temp0 to restart loop + f_temp3 = f_temp0; + + i_glob++; + } +} + +/* total_dist == n_steps, n_b is dividing factor of each block, n_Save is the size of the list, "front" denotes whether + * split hierarchy goes to existing or new subhierarchy */ +std::vector get_hier_list(int total_dist, int n_b, int n_save, bool front = true) +{ + + std::vector hier_list; + int counter = 0; + + int val = total_dist; + for (int i_s = 0; i_s < n_save; i_s++) { + val = (val <= 1) ? 1 : val / n_b; + hier_list.push_back(val); + counter += val; + } + + if (front) + hier_list.at(0) += total_dist - counter; + else + hier_list.back() += total_dist - counter; + + return hier_list; +} + +int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int threshold) +{ + + int result = -1; + int current_size = hier_list.size(); + std::vector temp_list; + if (current_size > n_save) errorQuda("something isnt right\n"); + + int diff = n_save - current_size; + + for (int i = current_size - 1; i >= 0; --i) { + + if (hier_list[i] > threshold) { + + temp_list = get_hier_list(hier_list[i], n_b, diff + 1, false); + hier_list.erase(hier_list.begin() + i); + hier_list.insert(hier_list.begin() + i, temp_list.begin(), temp_list.end()); + result = i; + break; + } + } + + return result; +} + +void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param) +{ + + auto profile = pushProfile(profileAdjGFlowHier); + pushOutputPrefix("performAdjGFlowQudaHier: "); + checkGaugeSmearParam(smear_param); + + if (smear_param->n_steps <= smear_param->adj_n_save) { + + errorQuda("Not good practice to have adj_n_save (%d) >= n_steps (%d); adj_n_save should be manually altered to " + "min(nsteps, %d): \n", + smear_param->n_steps, smear_param->adj_n_save, smear_param->n_steps - 1); + } + + pushVerbosity(inv_param->verbosity); + if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); + + if (smear_param->restart) { + if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); + } else { + if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); + freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); + gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowHier); + } + + GaugeFieldParam gParamDummy(*gaugeSmeared); + GaugeField gaugeW0(gParamDummy); + GaugeField gaugeW1(gParamDummy); + GaugeField gaugeW2(gParamDummy); + GaugeField gaugeVT(gParamDummy); + GaugeField gauge_out(gParamDummy); + + GaugeFieldParam gParam(*gaugePrecise); + gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct + GaugeField gaugeTemp(gParam); + + auto n = smear_param->adj_n_save; + + std::vector gauge_stages(n, gParamDummy); + gauge_stages[0] = *gaugeSmeared; + // Can also do below + // creates copies std::vector gauge_stages(n,*gaugeSmeared); + + GaugeField &gin = *gaugeSmeared; + GaugeField &gout = gauge_out; + + // helper gauge field for Laplace operator + GaugeField precise; + GaugeFieldParam gParam_helper(*gaugePrecise); + gParam_helper.create = QUDA_NULL_FIELD_CREATE; + precise = GaugeField(gParam_helper); + + // spinor fields + ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); + ColorSpinorField fin_h(cpuParam); + + ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); + ColorSpinorField fin(deviceParam); + fin = fin_h; + + deviceParam.create = QUDA_NULL_FIELD_CREATE; + ColorSpinorField fout(deviceParam); + + ColorSpinorField f_temp0(deviceParam); + ColorSpinorField f_temp1(deviceParam); + ColorSpinorField f_temp2(deviceParam); + ColorSpinorField f_temp3(deviceParam); + ColorSpinorField f_temp4(deviceParam); + + // IMPORTANT initializing step: set [3] = input spinor + f_temp3 = fin; + + int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1))); + logQuda(QUDA_SUMMARIZE, "Hierarchical block n_b: %d\n\n", n_b); + int ret_idx = 0; + int threshold = smear_param->hier_threshold; + std::vector hier_list; + // The first stage is saved at the very beginning, so its presence is implicit + hier_list = get_hier_list(smear_param->n_steps, n_b, smear_param->adj_n_save); + logQuda(QUDA_SUMMARIZE, "hier list size (number of gauge fields to save) is %d\n", (int)hier_list.size()); + if (threshold < hier_list.back()) { + threshold = hier_list.back(); + logQuda(QUDA_SUMMARIZE, "threshold changed to %d", threshold); + } else + logQuda(QUDA_SUMMARIZE, "threshold is %d\n", threshold); + + if (hier_list.empty()) errorQuda("hier_list is not populated\n"); + if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages\n"); + + for (unsigned int i = 0; i < hier_list.size() - 1; i++) { + + if (i == 0) { + logQuda(QUDA_VERBOSE, "we first set gin to the first index of the gauge_steps vector\n"); + gauge_stages[0] = gin; + } + if (i > 0) std::swap(gout, gin); + + for (unsigned int j = 0; j < (unsigned int)hier_list[i]; j++) { + if (j > 0) std::swap(gout, gin); + + WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type, smear_param->smear_anisotropy, + smear_param->rk_order); + } + gauge_stages[i + 1] = gout; + } + + std::vector> sf_list; + sf_list = {f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; + std::vector> gf_list; + gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; + + // first one is global counter, second is meas counter + int i_glob = 0, measurement_n = 0; + std::vector> meas_cinf {i_glob, measurement_n}; + + int hier_loop_counter = 0; + while (ret_idx != -1) { + logQuda(QUDA_DEBUG_VERBOSE, "Hier loop count %d has begun \n", hier_loop_counter); + logQuda(QUDA_DEBUG_VERBOSE, "Starting a hierarchical loop log: \n"); + + adjSafeEvolve(sf_list, gf_list, smear_param, hier_list.back(), profileAdjGFlowHier, meas_cinf); + + logQuda(QUDA_DEBUG_VERBOSE, "Previous hier list elements: \n"); + for (int j = 0; j < (int)hier_list.size(); j++) { logQuda(QUDA_DEBUG_VERBOSE, "%d \n", (int)hier_list[j]); } + logQuda(QUDA_DEBUG_VERBOSE, "\n"); + + hier_list.pop_back(); + gauge_stages.pop_back(); + ret_idx = modify_hier_list(hier_list, n_b, smear_param->adj_n_save, threshold); + if (ret_idx == -1) { + logQuda(QUDA_VERBOSE, " now in final serial stage of hierarchial evolution \n"); + for (int i = gauge_stages.size() - 1; i >= 0; --i) { + // first load correct gauge field (for beginning of the loop, it is the final gauge list element) + + gf_list.at(0) = std::ref(gauge_stages[i]); + + adjSafeEvolve(sf_list, gf_list, smear_param, hier_list[i], profileAdjGFlowHier, meas_cinf); + + logQuda(QUDA_DEBUG_VERBOSE, " block number %d successfully deployed \n", i); + } + logQuda(QUDA_VERBOSE, "Hierarchial evolution completed \n"); + break; + } + + GaugeField g_2(gParamDummy); + GaugeField g_1 = gauge_stages[ret_idx]; + + logQuda(QUDA_DEBUG_VERBOSE, "Modified hier list elements: \n"); + for (int j = 0; j < (int)hier_list.size(); j++) { logQuda(QUDA_DEBUG_VERBOSE, "%d \n", (int)hier_list[j]); } + logQuda(QUDA_DEBUG_VERBOSE, "\n"); + + for (unsigned int j = 0; j < (unsigned int)hier_list[ret_idx]; j++) { + if (j > 0) std::swap(g_2, g_1); + WFlowStep(g_2, gaugeTemp, g_1, smear_param->epsilon, smear_param->smear_type, smear_param->smear_anisotropy, + smear_param->rk_order); + } + + gauge_stages.insert(gauge_stages.begin() + ret_idx + 1, g_2); + logQuda(QUDA_DEBUG_VERBOSE, "recycled gauge field placed *before* index %d\n\n", ret_idx + 1); + gf_list.at(0) = std::ref(gauge_stages.back()); + hier_loop_counter += 1; + } + + cpuParam.v = h_out; + cpuParam.location = inv_param->output_location; + ColorSpinorField fout_h(cpuParam); + fout_h = sf_list[0].get(); + logQuda(QUDA_DEBUG_VERBOSE, "Spinor written to cpu \n"); + popOutputPrefix(); +} + +/* save list of gauge vectors */ + int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a0ab337328..45e6b26cb2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -199,6 +199,13 @@ target_link_libraries(su3_test ${TEST_LIBS}) quda_checkbuildtest(su3_test QUDA_BUILD_ALL_TESTS) install(TARGETS su3_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) +if(QUDA_DIRAC_LAPLACE) + add_executable(su3_fermion_test su3_fermion_test.cpp) + target_link_libraries(su3_fermion_test ${TEST_LIBS}) + quda_checkbuildtest(su3_fermion_test QUDA_BUILD_ALL_TESTS) + install(TARGETS su3_fermion_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) +endif() + add_executable(pack_test pack_test.cpp) target_link_libraries(pack_test ${TEST_LIBS}) quda_checkbuildtest(pack_test QUDA_BUILD_ALL_TESTS) @@ -1450,6 +1457,12 @@ if (QUDA_DIRAC_LAPLACE AND QUDA_DIRAC_WILSON) --dim 2 4 6 8 --enable-testing true --gtest_output=xml:laph_test.xml) + + add_test(NAME su3_fermion_test + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dim 2 4 6 8 + --enable-testing true + --gtest_output=xml:su3_fermion_test.xml) endif() diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp new file mode 100644 index 0000000000..672735bd74 --- /dev/null +++ b/tests/su3_fermion_test.cpp @@ -0,0 +1,313 @@ +// In a typical application, quda.h is the only QUDA header required. +#include +#include +#include + +#include "host_utils.h" +#include "gauge_utils.h" +#include "command_line_params.h" +#include "dslash_reference.h" +#include "misc.h" +#include "test.h" + +using test_t = ::testing::tuple; + +class SU3FermionTest : public ::testing::TestWithParam +{ +protected: + QudaPrecision precision; + QudaGaugeSmearType smear_type; + +public: + SU3FermionTest() : precision(::testing::get<0>(GetParam())), smear_type(::testing::get<1>(GetParam())) { } +}; + +int argc_copy; +char **argv_copy; + +void run(test_t param) +{ + prec = ::testing::get<0>(param); + gauge_smear_type = ::testing::get<1>(param); + + using namespace quda; + + if (!is_enabled_spin(4)) errorQuda("Test requires Wilson-type fermion enablement"); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; + + void *gauge[4], *new_gauge[4]; + for (int dir = 0; dir < 4; dir++) { + gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + } + + constructHostGaugeField(gauge, gauge_param, argc_copy, argv_copy); + // Load the gauge field to the device + loadGaugeQuda((void *)gauge, &gauge_param); + saveGaugeQuda(new_gauge, &gauge_param); + // start the timer + quda::host_timer_t host_timer, host_safe_timer, host_hier_timer, host_fwd_timer; + + // The commented out section is all geared towards gauge observables, so unlikely to be needed for now + // // Prepare various perf info + // long long flops_plaquette = 6ll * 597 * V; + // long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; + + // // Prepare a gauge observable struct + // QudaGaugeObservableParam param = newQudaGaugeObservableParam(); + + // The user may specify which measurements they wish to perform/omit + // using the QudaGaugeObservableParam struct, and whether or not to + // perform suN projection at each measurement step. We recommend that + // users perform suN projection. + // A unique observable param struct is constructed for each measurement. + + // Gauge Smearing Routines + //--------------------------------------------------------------------------- + // Stout smearing should be equivalent to APE smearing + // on D dimensional lattices for rho = alpha/2*(D-1). + // Typical values for + // APE: alpha=0.6 + // Stout: rho=0.1 + // Over Improved Stout: rho=0.08, epsilon=-0.25 + // + // Typically, the user will use smearing for Q charge data only, so + // we hardcode to compute Q only and not the plaquette. Users may + // of course set these as they wish. SU(N) projection su_project=true is recommended. + QudaGaugeObservableParam *obs_param = new QudaGaugeObservableParam[gauge_smear_steps / measurement_interval + 1]; + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i] = newQudaGaugeObservableParam(); + obs_param[i].compute_plaquette = QUDA_BOOLEAN_FALSE; + obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; + obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + } + + // We here set all the problem parameters for all possible smearing types. + QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); + smear_param.smear_type = gauge_smear_type; + smear_param.n_steps = gauge_smear_steps; + smear_param.adj_n_save = gauge_n_save; + smear_param.hier_threshold = hier_threshold; + smear_param.meas_interval = measurement_interval; + smear_param.alpha = gauge_smear_alpha; + smear_param.rho = gauge_smear_rho; + smear_param.epsilon = gauge_smear_epsilon; + smear_param.alpha1 = gauge_smear_alpha1; + smear_param.alpha2 = gauge_smear_alpha2; + smear_param.alpha3 = gauge_smear_alpha3; + smear_param.dir_ignore = gauge_smear_dir_ignore; + + quda::ColorSpinorField check, check_safe, check_hier, check_fwd; + QudaInvertParam invParam = newQudaInvertParam(); + invParam.cpu_prec = QUDA_DOUBLE_PRECISION; + invParam.cuda_prec = cuda_prec; + invParam.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; + invParam.dirac_order = QUDA_DIRAC_ORDER; + invParam.verbosity = verbosity; + + quda::ColorSpinorParam cs_param; + + constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); + check = quda::ColorSpinorField(cs_param); + // Add noise to spinor + spinorNoise(check, 1234, QUDA_NOISE_GAUSS); + + check_safe = quda::ColorSpinorField(cs_param); + check_hier = quda::ColorSpinorField(cs_param); + check_fwd = quda::ColorSpinorField(cs_param); + + void *check_arr[] = {check.data()}; + void *check_fwdarr[] = {check_fwd.data()}; + + printf("Inspecting the very first element of the random fermion we will use:\n"); + check.PrintVector(0, 0, 0); + printf("Inspecting the very first element of the 3 un-evolved fermions (should be zero):\n"); + printf("Hierarchical method:\n"); + check_hier.PrintVector(0, 0, 0); + printf("Safe method:\n"); + check_safe.PrintVector(0, 0, 0); + printf("Forward method:\n"); + check_fwd.PrintVector(0, 0, 0); + + host_timer.start(); // start the timer + switch (smear_param.smear_type) { + case QUDA_GAUGE_SMEAR_APE: + case QUDA_GAUGE_SMEAR_STOUT: + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + case QUDA_GAUGE_SMEAR_HYP: { + performGaugeSmearQuda(&smear_param, obs_param); + break; + } + + // Here we use a typical use case which is different from simple smearing in that + // the user will want to compute the plaquette values to compute the gauge energy. + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: { + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; + } + + // Perform two adjoint flow algorithms, these methods dont alter the final value for the gauge so we excecute them first + host_hier_timer.start(); + performAdjGFlowHier(check_hier.data(), check.data(), &invParam, &smear_param); + host_hier_timer.stop(); + host_safe_timer.start(); + performAdjGFlowSafe(check_safe.data(), check.data(), &invParam, &smear_param); + host_safe_timer.stop(); + // Perform forward flow algorithm + host_fwd_timer.start(); + performGFlowQuda(check_fwdarr, check_arr, &invParam, &smear_param, obs_param, 1); + host_fwd_timer.stop(); + + printfQuda("Time elapsed for adjoint hierarchical fermion/gauge smearing = %g secs\n", host_hier_timer.last()); + printfQuda("Time elapsed for adjoint safe fermion/gauge smearing = %g secs\n", host_safe_timer.last()); + printfQuda("Time elapsed for forward fermion/gauge smearing = %g secs\n", host_fwd_timer.last()); + + break; + } + default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); + } + + host_timer.stop(); // stop the timer + + printfQuda("Total time for collective fermion/gauge smearing = %g secs\n", host_timer.last()); + printf("Now, inspecting the very first element of the 3 evolved fermions:\n"); + printf("Hierarchical method:\n"); + check_hier.PrintVector(0, 0, 0); + printf("Safe method:\n"); + check_safe.PrintVector(0, 0, 0); + printf("Forward method:\n"); + check_fwd.PrintVector(0, 0, 0); + + double method_adj_diff = 0.; + /* To access the ith complex entry in a raw vector, do, for example: check.data*>()[i]*/ + for (int i = 0; i < V * 24; i++) { + method_adj_diff += pow(fabs(check_safe.data()[i] - check_hier.data()[i]), 2); + } + double method_adj_check = sqrt(method_adj_diff) / (V * 24.); + printf( + "Mean of mag errors between Safe and Hierarchical Adj methods (should be zero up to machine precision) = %1.5e\n", + method_adj_check); + + std::complex trace_fwd, trace_adj; + trace_fwd = twoColorSpinorContract(check.data *>(), check_fwd.data *>()); + trace_adj = twoColorSpinorContract(check.data *>(), check_safe.data *>()); + + auto trace_diff_err = 2. * std::fabs(trace_fwd - std::conj(trace_adj)) / std::fabs(trace_fwd + std::conj(trace_adj)); + + printf("The two numbers below should be complex conjugates of one another\n"); + printf(" is %1.5e, %1.5e \n", trace_adj.real(), trace_adj.imag()); + printf(" is %1.5e, %1.5e \n", trace_fwd.real(), trace_fwd.imag()); + printf("Fractional error of ( - .conj()) = %1.5e \n", trace_diff_err); + + auto eps = getTolerance(prec); + + printfQuda("Checking adjoint safe/hier match\n"); + EXPECT_LE(method_adj_check, gauge_smear_steps * gauge_smear_steps * eps); + if (method_adj_check > gauge_smear_steps * gauge_smear_steps * eps) + warningQuda("Adjoint safe/hier difference %e greater than tolerance %e\n", method_adj_check, + gauge_smear_steps * gauge_smear_steps * eps); + + printfQuda("Checking fractional error\n"); + EXPECT_LE(trace_diff_err, gauge_smear_steps * gauge_smear_steps * eps); + if (trace_diff_err > gauge_smear_steps * gauge_smear_steps * eps) + warningQuda("Fractional error %e greater than tolerance %e\n", trace_diff_err, + gauge_smear_steps * gauge_smear_steps * eps); + + if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); + + for (int dir = 0; dir < 4; dir++) { + host_free(gauge[dir]); + host_free(new_gauge[dir]); + } + + check = {}; + check_hier = {}; + check_safe = {}; + check_fwd = {}; + + freeGaugeQuda(); +} + +TEST_P(SU3FermionTest, verify) +{ + if (!quda::is_enabled(precision)) GTEST_SKIP(); + run(GetParam()); +} + +struct su3_fermion_test : quda_test { + + void display_info() const override + { + quda_test::display_info(); + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim); + + // Specific test + printfQuda("\n%s smearing\n", get_gauge_smear_str(gauge_smear_type)); + switch (gauge_smear_type) { + case QUDA_GAUGE_SMEAR_APE: printfQuda(" - alpha %f\n", gauge_smear_alpha); break; + case QUDA_GAUGE_SMEAR_STOUT: printfQuda(" - rho %f\n", gauge_smear_rho); break; + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + printfQuda(" - rho %f\n", gauge_smear_rho); + printfQuda(" - epsilon %f\n", gauge_smear_epsilon); + break; + case QUDA_GAUGE_SMEAR_HYP: + printfQuda(" - alpha1 %f\n", gauge_smear_alpha1); + printfQuda(" - alpha2 %f\n", gauge_smear_alpha2); + printfQuda(" - alpha3 %f\n", gauge_smear_alpha3); + break; + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: printfQuda(" - epsilon %f\n", gauge_smear_epsilon); break; + default: errorQuda("Undefined test type %d given", test_type); + } + printfQuda(" - smearing steps %d\n", gauge_smear_steps); + printfQuda(" - smearing ignore direction %d\n", gauge_smear_dir_ignore); + printfQuda(" - Measurement interval %d\n", measurement_interval); + } + + void add_command_line_group(std::shared_ptr app) const override + { + quda_test::add_command_line_group(app); + add_su3_option_group(app); + } + + su3_fermion_test(int argc, char **argv) : quda_test("SU3 Fermion Test", argc, argv) { } +}; + +auto test_str = [](testing::TestParamInfo param) { + return std::string(get_prec_str(::testing::get<0>(param.param))) + "_" + + get_gauge_smear_str(::testing::get<1>(param.param)); +}; + +INSTANTIATE_TEST_SUITE_P(Flow, SU3FermionTest, + ::testing::Combine(::testing::Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), + ::testing::Values(QUDA_GAUGE_SMEAR_WILSON_FLOW, + QUDA_GAUGE_SMEAR_SYMANZIK_FLOW)), + test_str); + +int main(int argc, char **argv) +{ + argc_copy = argc; + argv_copy = argv; + + su3_fermion_test test(argc, argv); + test.init(); + if (enable_testing) { + return test.execute(); + } else { + if (gauge_smear_type != QUDA_GAUGE_SMEAR_WILSON_FLOW && gauge_smear_type != QUDA_GAUGE_SMEAR_SYMANZIK_FLOW) { + warningQuda("Smear type %s not supported, setting to Wilson Flow", get_gauge_smear_str(gauge_smear_type)); + gauge_smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; + } + run(test_t {prec, gauge_smear_type}); + }; +} diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp index 7e0146d8fc..ff2de0184e 100644 --- a/tests/utils/command_line_params.cpp +++ b/tests/utils/command_line_params.cpp @@ -313,6 +313,8 @@ double gauge_smear_alpha1 = 0.75; double gauge_smear_alpha2 = 0.6; double gauge_smear_alpha3 = 0.3; int gauge_smear_steps = 5; +int gauge_n_save = 3; +int hier_threshold = 6; int gauge_smear_dir_ignore = -1; int measurement_interval = 5; bool su_project = true; @@ -1158,6 +1160,11 @@ void add_su3_option_group(std::shared_ptr quda_app) opgroup->add_option("--su3-smear-steps", gauge_smear_steps, "The number of smearing steps to perform (default 10)"); + opgroup->add_option("--su3-adj-gauge-nsave", gauge_n_save, + "The number of gauge steps to save for hierarchical adj grad flow"); + + opgroup->add_option("--su3-hier-threshold", hier_threshold, "Minimum threshold for hierarchical adj grad flow"); + opgroup->add_option("--su3-measurement-interval", measurement_interval, "Measure the field energy and/or topological charge every Nth step (default 5) "); diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h index f0739a2ea6..8641ee0a0d 100644 --- a/tests/utils/command_line_params.h +++ b/tests/utils/command_line_params.h @@ -561,6 +561,8 @@ extern double gauge_smear_alpha1; extern double gauge_smear_alpha2; extern double gauge_smear_alpha3; extern int gauge_smear_steps; +extern int gauge_n_save; +extern int hier_threshold; extern int gauge_smear_dir_ignore; extern int measurement_interval; extern QudaGaugeSmearType gauge_smear_type; diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index bd953f9ddb..2a31748a7a 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -964,6 +964,46 @@ void check_gauge(void **oldG, void **newG, double epsilon, QudaPrecision precisi checkGauge((float **)oldG, (float **)newG, epsilon); } +std::complex twoColorSpinorContract(std::complex *spinor1, std::complex *spinor2) +{ + int col_inc = 3; + + std::vector col_st {0, 1, 2}; + std::vector row_st {0, 3, 6}; + + std::vector> test_contract(9 * V); + complex trace = {0., 0.}; + double trace_re, trace_im; + for (int i = 0; i < V; i++) { + + for (int ii = 0; ii < 9; ii++) { + int which_col_idx = (ii % 3), which_row_idx = (ii - (ii % 3)) / 3; + + std::complex dot = {0., 0.}; + + for (int i_s = 0; i_s < 4; i_s++) { + + int s_row_idx = i * 12 + col_st[which_row_idx] + i_s * col_inc; + int s_col_idx = i * 12 + col_st[which_col_idx] + i_s * col_inc; + + auto m1 = std::conj(spinor1[s_row_idx]); + auto m2 = spinor2[s_col_idx]; + + dot += m1 * m2; + } + test_contract[i * 9 + ii] = dot; + } + trace += (test_contract[i * 9] + test_contract[i * 9 + 4] + test_contract[i * 9 + 8]); + } + trace_re = trace.real(); + trace_im = trace.imag(); + quda::comm_allreduce_sum(trace_re); + quda::comm_allreduce_sum(trace_im); + + std::complex trace_fin = {trace_re, trace_im}; + return trace_fin; +} + void createSiteLinkCPU(void *const *gauge, QudaPrecision precision, SiteLinkType phase) { if (phase == SiteLinkType::SITELINK_PHASE_NO) { diff --git a/tests/utils/host_utils.h b/tests/utils/host_utils.h index 53f261a612..cc1e71751e 100644 --- a/tests/utils/host_utils.h +++ b/tests/utils/host_utils.h @@ -137,6 +137,7 @@ void constructPointSpinorSource(void *v, QudaPrecision precision, const int *con void constructWallSpinorSource(void *v, int nSpin, int nColor, QudaPrecision precision, const int dil); void constructRandomSpinorSource(void *v, int nSpin, int nColor, QudaPrecision precision, QudaSolutionType sol_type, const int *const x, int nDim, quda::RNG &rng); +std::complex twoColorSpinorContract(std::complex *spinor1, std::complex *spinor2); //------------------------------------------------------ // Helper functions diff --git a/tests/utils/misc.cpp b/tests/utils/misc.cpp index b3041e85f5..3bcc32e22a 100644 --- a/tests/utils/misc.cpp +++ b/tests/utils/misc.cpp @@ -240,10 +240,10 @@ const char *get_gauge_smear_str(QudaGaugeSmearType type) switch (type) { case QUDA_GAUGE_SMEAR_APE: ret = "APE"; break; case QUDA_GAUGE_SMEAR_STOUT: ret = "Stout"; break; - case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: ret = "Over Improved"; break; + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: ret = "Over_Improved"; break; case QUDA_GAUGE_SMEAR_HYP: ret = "HYP"; break; - case QUDA_GAUGE_SMEAR_WILSON_FLOW: ret = "Wilson Flow"; break; - case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: ret = "Symanzik Flow"; break; + case QUDA_GAUGE_SMEAR_WILSON_FLOW: ret = "Wilson_Flow"; break; + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: ret = "Symanzik_Flow"; break; default: ret = "unknown"; break; }