Merge branch 'develop' into feature/sycl

lattice · Nov 8, 2024 · c573936 · c573936
2 parents 3b1154e + 3414317
commit c573936
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 18 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -421,6 +421,7 @@ if(QUDA_DOWNLOAD_EIGEN)
     NAME Eigen
     VERSION ${QUDA_EIGEN_VERSION}
     URL https://gitlab.com/libeigen/eigen/-/archive/${QUDA_EIGEN_VERSION}/eigen-${QUDA_EIGEN_VERSION}.tar.bz2
+    URL_HASH SHA256=B4C198460EBA6F28D34894E3A5710998818515104D6E74E5CC331CE31E46E626
     DOWNLOAD_ONLY YES
     SYSTEM YES)
   target_include_directories(Eigen SYSTEM INTERFACE ${Eigen_SOURCE_DIR})

diff --git a/include/color_spinor_field_order.h b/include/color_spinor_field_order.h
@@ -1824,8 +1824,8 @@ namespace quda
       {
         for (int s = 0; s < Ns; s++) {
           for (int c = 0; c < Nc; c++) {
-            v[s * Nc + c] = complex(field[(((0 * Nc + c) * Ns + s) * 2 + (1 - parity)) * volumeCB + x],
-                                    field[(((1 * Nc + c) * Ns + s) * 2 + (1 - parity)) * volumeCB + x]);
+            v[s * Nc + c] = complex(field[(((0 * Nc + c) * Ns + s) * 2 + parity) * volumeCB + x],
+                                    field[(((1 * Nc + c) * Ns + s) * 2 + parity) * volumeCB + x]);
           }
         }
       }
@@ -1834,8 +1834,8 @@ namespace quda
       {
         for (int s = 0; s < Ns; s++) {
           for (int c = 0; c < Nc; c++) {
-            field[(((0 * Nc + c) * Ns + s) * 2 + (1 - parity)) * volumeCB + x] = v[s * Nc + c].real();
-            field[(((1 * Nc + c) * Ns + s) * 2 + (1 - parity)) * volumeCB + x] = v[s * Nc + c].imag();
+            field[(((0 * Nc + c) * Ns + s) * 2 + parity) * volumeCB + x] = v[s * Nc + c].real();
+            field[(((1 * Nc + c) * Ns + s) * 2 + parity) * volumeCB + x] = v[s * Nc + c].imag();
           }
         }
       }

diff --git a/include/kernels/copy_color_spinor.cuh b/include/kernels/copy_color_spinor.cuh
@@ -16,6 +16,30 @@ namespace quda
 
   using namespace colorspinor;
 
+  /**
+   * @brief A helper function to figure out what parity to use for input and output.
+   * @details Pick parity from input field site order. Addditionally QDPJIT fields
+   *  may need a relative parity flip compared to what is expected when dealing with
+   *  only the odd parity since the pointer is always to the top of the full spinort.
+   * @param[in] f Reference to the field for parity computation
+   * @return the computed parity
+   */
+  inline int computeParity(const ColorSpinorField &f)
+  {
+
+    // Account for odd-even vs. even-odd site orders
+    int ret_val = f.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0;
+
+    // Account for potential parity flip to access single parity subset QDP-JIT fields
+    // The Flip is only needed fir offsetting into Odd Parity Fields
+    if (f.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER && f.SiteSubset() == QUDA_PARITY_SITE_SUBSET
+        && f.SuggestedParity() == QUDA_ODD_PARITY) {
+      ret_val = 1 - ret_val;
+    }
+
+    return ret_val;
+  }
+
   template <typename FloatOut, typename FloatIn, int nSpin_, int nColor_, typename Out, typename In,
             template <int, int> class Basis_>
   struct CopyColorSpinorArg : kernel_param<> {
@@ -32,8 +56,8 @@ namespace quda
       kernel_param(dim3(in.VolumeCB(), in.SiteSubset(), 1)),
       out(out, 1, Out_),
       in(in, 1, const_cast<FloatIn *>(In_)),
-      outParity(out.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0),
-      inParity(in.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0)
+      outParity(computeParity(out)),
+      inParity(computeParity(in))
     {
     }
   };

diff --git a/lib/copy_color_spinor.cuh b/lib/copy_color_spinor.cuh
@@ -181,11 +181,6 @@ namespace quda
       errorQuda("Copying to full fields with lexicographical ordering is not currently supported");
     }
 
-    if (dst.SiteSubset() == QUDA_FULL_SITE_SUBSET
-        && (src.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER || dst.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER)) {
-      errorQuda("QDPJIT field ordering not supported for full site fields");
-    }
-
     genericCopyColorSpinor<Ns, Nc>(param);
   }
 

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
@@ -4256,7 +4256,6 @@ void computeHISQForceQuda(void* const milc_momentum,
 
   using namespace quda;
   using namespace quda::fermion_force;
-  if (gParam->gauge_order != QUDA_MILC_GAUGE_ORDER) errorQuda("Unsupported input field order %d", gParam->gauge_order);
 
   {
     // default settings for the unitarization
@@ -4399,7 +4398,6 @@ void computeHISQForceQuda(void* const milc_momentum,
   GaugeFieldParam param(*gParam);
   param.location = QUDA_CPU_FIELD_LOCATION;
   param.create = QUDA_REFERENCE_FIELD_CREATE;
-  param.order = QUDA_MILC_GAUGE_ORDER;
   param.link_type = QUDA_ASQTAD_MOM_LINKS;
   param.reconstruct = QUDA_RECONSTRUCT_10;
   param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
@@ -4421,7 +4419,6 @@ void computeHISQForceQuda(void* const milc_momentum,
   GaugeFieldParam wParam(gParam_field);
   wParam.location = QUDA_CPU_FIELD_LOCATION;
   wParam.create = QUDA_REFERENCE_FIELD_CREATE;
-  wParam.order = QUDA_MILC_GAUGE_ORDER;
   wParam.link_type = QUDA_GENERAL_LINKS;
   wParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
   wParam.gauge = (void *)w_link;
@@ -5590,7 +5587,7 @@ void gaugeObservablesQuda(QudaGaugeObservableParam *param)
   auto profile = pushProfile(profileGaugeObs);
   checkGaugeObservableParam(param);
 
-  if (!gaugePrecise) errorQuda("Cannot compute Polyakov loop as there is no resident gauge field");
+  if (!gaugePrecise) errorQuda("Cannot compute gauge observables as there is no resident gauge field");
 
   GaugeField *gauge = nullptr;
   if (!gaugeSmeared) {

diff --git a/lib/multigrid.cpp b/lib/multigrid.cpp
@@ -966,7 +966,7 @@ namespace quda
       }
 
       transfer->R(x_coarse[0], tmp2);
-      static_cast<DiracCoarse *>(diracCoarseResidual)->M(r_coarse, tmp_coarse);
+      static_cast<DiracCoarse *>(diracCoarseResidual)->M(r_coarse[0], tmp_coarse);
 
 #if 0 // enable to print out emulated and actual coarse-grid operator vectors for debugging
       setOutputPrefix("");

diff --git a/lib/solve.cpp b/lib/solve.cpp
@@ -146,7 +146,7 @@ namespace quda
     // rescale the source and solution vectors to help prevent the onset of underflow
     if (param.solver_normalization == QUDA_SOURCE_NORMALIZATION) {
       auto nb_inv(nb);
-      for (auto bi : nb_inv) bi = 1 / sqrt(bi);
+      for (auto &bi : nb_inv) bi = 1 / sqrt(bi);
       blas::ax(nb_inv, b);
       blas::ax(nb_inv, x);
     }
@@ -299,7 +299,7 @@ namespace quda
 
     if (param.solver_normalization == QUDA_SOURCE_NORMALIZATION) {
       // rescale the solution
-      for (auto bi : nb) bi = sqrt(bi);
+      for (auto &bi : nb) bi = sqrt(bi);
       blas::ax(nb, x);
     }