lattice · maddyscientist · Oct 10, 2024 · Jun 26, 2024 · Jul 9, 2024 · Jul 9, 2024
@@ -635,6 +635,12 @@ typedef enum QudaExtLibType_s {
   QUDA_EXTLIB_INVALID = QUDA_INVALID_ENUM
 } QudaExtLibType;
 
+typedef enum QudaWFlowStepType_s {
+  WFLOW_STEP_W1,
+  WFLOW_STEP_W2,
+  WFLOW_STEP_VT,
+} QudaWFlowStepType;
+
 #ifdef __cplusplus
 }
 #endif

@@ -145,6 +145,21 @@ namespace quda
   */
   void WFlowStep(GaugeField &out, GaugeField &temp, GaugeField &in, double epsilon, QudaGaugeSmearType smear_type);
 
+  /**
+     @brief Apply intermediary Wilson Flow steps W1, W2 or Vt to the gauge field.
+     This routine assumes that the input and output fields are
+     extended, with the input field being exchanged prior to calling
+     this function.  On exit from this routine, the output field will
+     have been exchanged.
+     @param[out] dataDs Output smeared field
+     @param[in] dataTemp Temp space
+     @param[in] dataOr Input gauge field
+     @param[in] epsilon Step size
+     @param[in] smear_type Wilson (1x1) or Symanzik improved (2x1) staples, else error
+     @param[in] step_type Which intermediary Wilson Flow step (W1, W2 or Vt) to perform
+  */
+  void GFlowStep(GaugeField &out, GaugeField &temp, GaugeField &in, double epsilon, QudaGaugeSmearType smear_type, QudaWFlowStepType step_type);
+
   /**
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in,out] data, quda gauge field

@@ -9,24 +9,18 @@
 namespace quda
 {
 
-  enum WFlowStepType {
-    WFLOW_STEP_W1,
-    WFLOW_STEP_W2,
-    WFLOW_STEP_VT,
-  };
-
-  template <typename Float, int nColor_, QudaReconstructType recon_, int wflow_dim_, QudaGaugeSmearType wflow_type_,
-            WFlowStepType step_type_>
+  template <typename Float, int nColor_, QudaReconstructType recon_, int wflow_dim_,
+            QudaGaugeSmearType wflow_type_, QudaWFlowStepType step_type_>
   struct GaugeWFlowArg : kernel_param<> {
     using real = typename mapper<Float>::type;
     static constexpr int nColor = nColor_;
     static_assert(nColor == 3, "Only nColor=3 enabled at this time");
     static constexpr QudaReconstructType recon = recon_;
     static constexpr int wflow_dim = wflow_dim_;
     static constexpr QudaGaugeSmearType wflow_type = wflow_type_;
-    static constexpr WFlowStepType step_type = step_type_;
-    typedef typename gauge_mapper<Float, recon>::type Gauge;
-    typedef typename gauge_mapper<Float, QUDA_RECONSTRUCT_NO>::type Matrix; // temp field not on the manifold
+    static constexpr QudaWFlowStepType step_type = step_type_;
+    typedef typename gauge_mapper<Float,recon>::type Gauge;
+    typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type Matrix; // temp field not on the manifold
 
     Gauge out;
     Matrix temp;

@@ -1687,6 +1687,16 @@ extern "C" {
    */
   void performWFlowQuda(QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param);
 
+  /**
+   * Performs Gradient Flow (gauge + fermion) on gaugePrecise and stores it in gaugeSmeared
+   * @param[out] h_out Output fermion field
+   * @param[in] h_in Input fermion field
+   * @param[in] smear_param Parameter struct that defines the computation parameters
+   * @param[in,out] obs_param Parameter struct that defines which
+   * observables we are making and the resulting observables.
+   */
+  void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param);
+
   /**
    * @brief Calculates a variety of gauge-field observables.  If a
    * smeared gauge field is presently loaded (in gaugeSmeared) the

@@ -15,7 +15,7 @@ namespace quda {
     const GaugeField &in;
     const real epsilon;
     const QudaGaugeSmearType wflow_type;
-    const WFlowStepType step_type;
+    const QudaWFlowStepType step_type;
 
     unsigned int minThreads() const { return in.LocalVolumeCB(); }
     unsigned int maxSharedBytesPerBlock() const {
@@ -32,7 +32,7 @@ namespace quda {
     }
 
   public:
-    GaugeWFlowStep(GaugeField &out, GaugeField &temp, const GaugeField &in, const double epsilon, const QudaGaugeSmearType wflow_type, const WFlowStepType step_type) :
+    GaugeWFlowStep(GaugeField &out, GaugeField &temp, const GaugeField &in, const double epsilon, const QudaGaugeSmearType wflow_type, const QudaWFlowStepType step_type) :
       TunableKernel3D(in, 2, wflow_dim),
       out(out),
       temp(temp),
@@ -59,7 +59,7 @@ namespace quda {
       getProfile().TPSTOP(QUDA_PROFILE_COMPUTE);
     }
 
-    template <QudaGaugeSmearType wflow_type, WFlowStepType step_type> using Arg =
+    template <QudaGaugeSmearType wflow_type, QudaWFlowStepType step_type> using Arg =
       GaugeWFlowArg<Float, nColor, recon, wflow_dim, wflow_type, step_type>;
 
     void apply(const qudaStream_t &stream)
@@ -151,4 +151,17 @@ namespace quda {
     out.exchangeExtendedGhost(out.R(), false);
   }
 
+  void GFlowStep(GaugeField &out, GaugeField &temp, GaugeField &in, const double epsilon, const QudaGaugeSmearType smear_type, const QudaWFlowStepType step_type)
+  {
+    checkPrecision(out, temp, in);
+    checkReconstruct(out, in);
+    checkNative(out, in);
+    if (temp.Reconstruct() != QUDA_RECONSTRUCT_NO) errorQuda("Temporary vector must not use reconstruct");
+    if (!(smear_type == QUDA_GAUGE_SMEAR_WILSON_FLOW || smear_type == QUDA_GAUGE_SMEAR_SYMANZIK_FLOW))
+      errorQuda("Gauge smear type %d not supported for flow kernels", smear_type);
+
+    instantiate<GaugeWFlowStep>(out, temp, in, epsilon, smear_type, step_type);
+    out.exchangeExtendedGhost(out.R(), false);
+  }
+
 }
@@ -194,6 +194,9 @@ static TimeProfile profileGaugeSmear("gaugeSmearQuda");
 //!< Profiler for wFlowQuda
 static TimeProfile profileWFlow("wFlowQuda");
 
+//!< Profiler for gFlowQuda
+static TimeProfile profileGFlow("gFlowQuda");
+
 //!< Profiler for projectSU3Quda
 static TimeProfile profileProject("projectSU3Quda");
 
@@ -1407,6 +1410,7 @@ void endQuda(void)
     profileGaussianSmear.Print();
     profileGaugeSmear.Print();
     profileWFlow.Print();
+    profileGFlow.Print();
     profileProject.Print();
     profilePhase.Print();
     profileMomAction.Print();
@@ -5574,6 +5578,167 @@ void performWFlowQuda(QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam
   popOutputPrefix();
 }
 
+// perform forward gradient flow on gauge and spinor field following the algorithm in arXiv:1302.5246 (Appendix D)
+// the gauge flow steps are identical to Wilson Flow algorithm in arXiv:1006.4518 (Vt <-> W3)
+void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param)
+{
+
+  auto profile = pushProfile(profileGFlow);
+  pushOutputPrefix("performGFlowQuda: ");
+  checkGaugeSmearParam(smear_param);
+
+  pushVerbosity(inv_param->verbosity);
+  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param);
+
+  if (smear_param->restart) {
+    if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded");
+  } else {
+    if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded");
+    freeUniqueGaugeQuda(QUDA_SMEARED_LINKS);
+    gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileGFlow);
+  }
+
+  GaugeFieldParam gParamEx(*gaugeSmeared);
+  GaugeField gaugeAux(gParamEx);
+
+  GaugeFieldParam gParam(*gaugePrecise);
+  gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct
+  GaugeField gaugeTemp(gParam);
+
+  GaugeField &gin = *gaugeSmeared;
+  GaugeField &gout = gaugeAux;
+
+  // helper gauge field for Laplace operator
+  GaugeField *precise = nullptr;
+  GaugeFieldParam gParam_helper(*gaugePrecise);
+  gParam_helper.create = QUDA_NULL_FIELD_CREATE;
+  precise = new GaugeField(gParam_helper);
+
+  // spinor fields
+  ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location);
+  ColorSpinorField fin_h(cpuParam);
+
+  ColorSpinorParam cudaParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION);
+  ColorSpinorField fin(cudaParam);
+  fin = fin_h;
+
+  cudaParam.create = QUDA_NULL_FIELD_CREATE;
+  ColorSpinorField fout(cudaParam);
+
+  int parity = 0;
+
+  // initialize a and b for Laplace operator
+  double a =  1.;
+  double b = -8.;
+
+  int comm_dim[4] = { };
+  // only switch on comms needed for directions with a derivative
+  for (int i = 0; i < 4; i++) {
+    comm_dim[i] = comm_dim_partitioned(i);
+  }
+
+  // auxilliary fermion fields [0], [1], [2] and [3]
+  ColorSpinorField f_temp0 ( cudaParam );
+  ColorSpinorField f_temp1 ( cudaParam );
+  ColorSpinorField f_temp2 ( cudaParam );
+  ColorSpinorField f_temp3 ( cudaParam );
+  ColorSpinorField f_temp4 ( cudaParam );
+
+  // set [3] = input spinor
+  f_temp3 = fin;
+
+  int measurement_n = 0; // The nth measurement to take
+
+  gaugeObservables(gin, obs_param[measurement_n]);
+
+  logQuda(QUDA_SUMMARIZE, "flow t, plaquette, norm(f_spinor)\n");
+  logQuda(QUDA_SUMMARIZE, "%le %.16e %+.16e\n", smear_param->t0, obs_param[0].plaquette[0], blas::norm2(fin));
+
+  // loop, iterations of gf
+  for (unsigned int i = 0; i < smear_param->n_steps; i++) {
+
+    if ( i > 0 ) std::swap ( gin, gout ); // output from prior step becomes input for next step
+
+    // init auxilliary fields [0], [1] and [2] as [3]
+    f_temp0 = f_temp3;
+    f_temp1 = f_temp3;
+    f_temp2 = f_temp3;
+
+    // STEP 1
+    // [4] = Laplace [0]
+    copyExtendedGauge(*precise, gin, QUDA_CUDA_FIELD_LOCATION);
+    precise->exchangeGhost();
+    ApplyLaplace ( f_temp4, f_temp0, *precise, 4, a, b, f_temp0, parity, false, comm_dim, profileGFlow );
+
+    // [0] = [4] = Laplace [0] = Laplace [3]
+    f_temp0 = f_temp4;
+
+    // [1] <- epsilon/4 x [0] + [1] = [3] + epsilon /4 x Laplace [3]
+    blas::axpy( smear_param->epsilon/4., f_temp0, f_temp1);
+
+    // apply step W1 of gauge field flow part
+    GFlowStep ( gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1);
+
+    // STEP 2
+    // [3] <- [1]
+    f_temp3 = f_temp1;
+
+    // [4] <- Laplace [1]
+    copyExtendedGauge(*precise, gout, QUDA_CUDA_FIELD_LOCATION);
+    precise->exchangeGhost();
+    ApplyLaplace ( f_temp4, f_temp1, *precise, 4, a, b, f_temp1, parity, false, comm_dim, profileGFlow );
+
+    // [1] <- [4]
+    f_temp1 =  f_temp4;
+
+    // [2] <- 8/9 x epsilon x [1] + [2]
+    blas::axpy (  smear_param->epsilon*8./9., f_temp1, f_temp2 );
+
+    // [2] <- -2/9 x epsilon x [0] + [2]
+    blas::axpy ( -smear_param->epsilon*2./9., f_temp0, f_temp2 );
+
+    // apply step W2 of gauge field flow part
+    GFlowStep ( gin, gaugeTemp, gout, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2);
+
+    // STEP 3
+    // [4] <- Laplace [2]
+    copyExtendedGauge(*precise, gin, QUDA_CUDA_FIELD_LOCATION);
+    precise->exchangeGhost();
+    ApplyLaplace ( f_temp4, f_temp2, *precise, 4, a, b, f_temp2, parity, false, comm_dim, profileGFlow );
+
+    // [2] <- [4] = Laplace [2]
+    f_temp2 = f_temp4;
+
+    // [3] <- 3/4 x epsilon x [2] + [3]
+    blas::axpy ( smear_param->epsilon*3./4., f_temp2, f_temp3 );
+
+    // set output spinor = [3]
+    fout = f_temp3;
+
+    // apply step W3 (Vt) of gauge field flow part
+    GFlowStep ( gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT);
+
+    if ((i + 1) % smear_param->meas_interval == 0) {
+      measurement_n++; // increment measurements.
+      gaugeObservables(gout, obs_param[measurement_n]);
+      logQuda(QUDA_SUMMARIZE, "%le %.16e %+.16e\n", (smear_param->t0 + smear_param->epsilon * (i + 1)),
+              obs_param[measurement_n].plaquette[0], blas::norm2(fout));
+    }
+  }  /* end of one iteration of GF application */
+
+  // copy gout to gaugeSmeared so that flowed gauge can be saved to host and WFlow can be restarted 
+  copyExtendedGauge(*gaugeSmeared, gout, QUDA_CUDA_FIELD_LOCATION);
+  gaugeSmeared->exchangeExtendedGhost( gaugeSmeared->R() );
+
+  cpuParam.v = h_out;
+  cpuParam.location = inv_param->output_location;
+  ColorSpinorField fout_h(cpuParam);
+  fout_h = fout;
+
+  popOutputPrefix();
+
+}  /* end of performGFlowQuda */
+
 int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps,
                               const unsigned int verbose_interval, const double relax_boost, const double tolerance,
                               const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param)