From 288a3052a862b09493eda06a6d9c752f0b0147b0 Mon Sep 17 00:00:00 2001 From: Marcogarofalo Date: Tue, 21 Nov 2023 11:58:43 +0100 Subject: [PATCH] detratio derivative with quda --- monomial/cloverdet_monomial.c | 2 +- monomial/cloverdetratio_monomial.c | 98 +++++++++++++++++++++--------- quda_interface.c | 14 ++++- quda_interface.h | 3 +- 4 files changed, 83 insertions(+), 34 deletions(-) diff --git a/monomial/cloverdet_monomial.c b/monomial/cloverdet_monomial.c index b64f49319..e25ca7cd5 100644 --- a/monomial/cloverdet_monomial.c +++ b/monomial/cloverdet_monomial.c @@ -123,7 +123,7 @@ void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) { memcpy(debug_derivative[0], hf->derivative[0], 4*VOLUME*sizeof(su3adj)); } - compute_cloverdet_derivative_quda(mnl, hf, mnl->w_fields[1]); + compute_cloverdet_derivative_quda(mnl, hf, mnl->w_fields[1], NULL, 0); if (g_debug_level > 3){ su3adj **given = hf->derivative; diff --git a/monomial/cloverdetratio_monomial.c b/monomial/cloverdetratio_monomial.c index 4dea03408..03d9d60bb 100644 --- a/monomial/cloverdetratio_monomial.c +++ b/monomial/cloverdetratio_monomial.c @@ -214,39 +214,79 @@ void cloverdetratio_derivative(const int no, hamiltonian_field_t * const hf) { mnl->forceprec, g_relative_precision_flag, VOLUME/2, mnl->Qsq, mnl->solver); chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array, mnl->csg_N, &mnl->csg_n, VOLUME/2); - // Apply Q_{-} to get Y_W -> w_fields[0] - tm_stopwatch_push(&g_timers, "Qm_diff", ""); - mnl->Qm(mnl->w_fields[0], mnl->w_fields[1]); - // Compute phi - Y_W -> w_fields[0] - diff(mnl->w_fields[0], mnl->w_fields[0], mnl->pf, VOLUME/2); - tm_stopwatch_pop(&g_timers, 0, 1, ""); - /* apply Hopping Matrix M_{eo} */ - /* to get the even sites of X */ - tm_stopwatch_push(&g_timers, "H_eo_sw_inv_psi", ""); - H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -1, mnl->mu); - tm_stopwatch_pop(&g_timers, 0, 1, ""); - /* \delta Q sandwitched by Y_o^\dagger and X_e */ - deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); - - /* to get the even sites of Y */ - tm_stopwatch_push(&g_timers, "H_eo_sw_inv_psi", ""); - H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, +1, mnl->mu); - tm_stopwatch_pop(&g_timers, 0, 1, ""); - /* \delta Q sandwitched by Y_e^\dagger and X_o */ - deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor); + if ( mnl->solver_params.external_inverter == QUDA_INVERTER){ + if(!mnl->even_odd_flag) { + fatal_error("QUDA support only even_odd_flag",__func__); + } + #ifdef TM_USE_QUDA + if (g_debug_level > 3) { +#ifdef TM_USE_MPI + // FIXME: here we do not need to set to zero the interior but only the halo + for(int i = 0; i < (VOLUMEPLUSRAND + g_dbw2rand);i++) { + for(int mu=0;mu<4;mu++) { + _zero_su3adj(debug_derivative[i][mu]); + } + } +#endif + // we copy only the interior + memcpy(debug_derivative[0], hf->derivative[0], 4*VOLUME*sizeof(su3adj)); + } - // here comes the clover term... - // computes the insertion matrices for S_eff - // result is written to swp and swm - // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e - sw_spinor_eo(EO, mnl->w_fields[2], mnl->w_fields[3], mnl->forcefactor); - - // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o - sw_spinor_eo(OE, mnl->w_fields[0], mnl->w_fields[1], mnl->forcefactor); + compute_cloverdet_derivative_quda(mnl, hf, mnl->w_fields[1], mnl->pf, 1 ); - sw_all(hf, mnl->kappa, mnl->c_sw); + if (g_debug_level > 3){ + su3adj **given = hf->derivative; + hf->derivative = debug_derivative; + int store_solver = mnl->solver; + mnl->solver_params.external_inverter = NO_EXT_INV; + mnl->solver = CG; + tm_debug_printf( 0, 3, "Recomputing the derivative from tmLQCD\n"); + cloverdetratio_derivative(no, hf); + #ifdef TM_USE_MPI + xchange_deri(hf->derivative);// this function use ddummy inside + #endif + compare_derivative(mnl, given, hf->derivative); + mnl->solver_params.external_inverter = QUDA_INVERTER; + mnl->solver = store_solver; + hf->derivative = given; + } + #endif // no other option, TM_USE_QUDA already checked by solver + } + else{ + // Apply Q_{-} to get Y_W -> w_fields[0] + tm_stopwatch_push(&g_timers, "Qm_diff", ""); + mnl->Qm(mnl->w_fields[0], mnl->w_fields[1]); + // Compute phi - Y_W -> w_fields[0] + diff(mnl->w_fields[0], mnl->w_fields[0], mnl->pf, VOLUME/2); + tm_stopwatch_pop(&g_timers, 0, 1, ""); + /* apply Hopping Matrix M_{eo} */ + /* to get the even sites of X */ + tm_stopwatch_push(&g_timers, "H_eo_sw_inv_psi", ""); + H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -1, mnl->mu); + tm_stopwatch_pop(&g_timers, 0, 1, ""); + /* \delta Q sandwitched by Y_o^\dagger and X_e */ + deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); + + /* to get the even sites of Y */ + tm_stopwatch_push(&g_timers, "H_eo_sw_inv_psi", ""); + H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, +1, mnl->mu); + tm_stopwatch_pop(&g_timers, 0, 1, ""); + /* \delta Q sandwitched by Y_e^\dagger and X_o */ + deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor); + + // here comes the clover term... + // computes the insertion matrices for S_eff + // result is written to swp and swm + // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e + sw_spinor_eo(EO, mnl->w_fields[2], mnl->w_fields[3], mnl->forcefactor); + + // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o + sw_spinor_eo(OE, mnl->w_fields[0], mnl->w_fields[1], mnl->forcefactor); + + sw_all(hf, mnl->kappa, mnl->c_sw); + } mnl_backup_restore_globals(TM_RESTORE_GLOBALS); tm_stopwatch_pop(&g_timers, 0, 1, ""); return; diff --git a/quda_interface.c b/quda_interface.c index f07308f94..a563d5dc3 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2523,19 +2523,23 @@ void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * c tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); } -void compute_cloverdet_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf, spinor ** const X_o) { +void compute_cloverdet_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf, spinor ** const X_o, spinor ** const phi, int detratio) { tm_stopwatch_push(&g_timers, __func__, ""); _initQuda(); _initMomQuda(); void *spinorIn; + void *spinorPhi; const int nr_sf_in = 1; spinor ** in_o; spinorIn = (void*)X_o; reorder_spinor_eo_toQuda((double*)spinorIn, inv_param.cpu_prec, 0, 1); - + if (detratio){ + spinorPhi = (void*)phi; + reorder_spinor_eo_toQuda((double*)spinorPhi, inv_param.cpu_prec, 0, 1); + } #pragma omp parallel for for(int i = 0; i < 4; i++){ @@ -2559,13 +2563,17 @@ void compute_cloverdet_derivative_quda(monomial * const mnl, hamiltonian_field_t // &inv_param); inv_param.kappa= mnl->kappa; inv_param.clover_csw= mnl->c_sw; - computeTMCloverForceQuda(mom_quda, &spinorIn, coeff, nvector, &f_gauge_param, &inv_param); + computeTMCloverForceQuda(mom_quda, &spinorIn, &spinorPhi, coeff, nvector, &f_gauge_param, &inv_param, detratio); tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); reorder_mom_fromQuda(mom_quda); add_mom_to_derivative(hf->derivative); + // if we want to compare the force to tmLQCD native implementation we should restore the source + if (detratio && g_debug_level > 3){ + reorder_spinor_eo_fromQuda((double*)spinorPhi, inv_param.cpu_prec, 0, 1); + } tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); } diff --git a/quda_interface.h b/quda_interface.h index ee454c66c..cf9d37faf 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -173,7 +173,8 @@ int invert_eo_quda_twoflavour_mshift(spinor ** const out_up, spinor ** const out CompressionType compression); void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf); -void compute_cloverdet_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf, spinor ** const X_o); +void compute_cloverdet_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf, + spinor ** const X_o, spinor ** const phi, int ratio); void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile); #endif /* QUDA_INTERFACE_H_ */