Skip to content

Commit

Permalink
Merge pull request #563 from etmc/write_gauge_solver_fail
Browse files Browse the repository at this point in the history
write out gauge field when the solver fails during monomial_solve
  • Loading branch information
kostrzewa authored Jun 16, 2023
2 parents 21dbfaa + 5d5e6c4 commit 6ac0c6e
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 8 deletions.
2 changes: 1 addition & 1 deletion default_input_values.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@
#define _default_subprocess_flag 0
#define _default_lowmem_flag 0

#define _default_g_barrier_monomials_convergence 0
#define _default_g_barrier_monomials_convergence 1

/* default input values for QUDA interface */
/* These follow the recommendations of https://github.com/lattice/quda/wiki/Multigrid-Solver */
Expand Down
11 changes: 10 additions & 1 deletion quda_interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@
#include "tm_debug_printf.h"
#include "phmc.h"
#include "quda_gauge_paths.inc"
#include "io/gauge.h"
#include "measure_gauge_action.h"

// nstore is generally like a gauge id, for measurements it identifies the gauge field
// uniquely
Expand Down Expand Up @@ -2235,9 +2237,16 @@ int invert_eo_degenerate_quda(spinor * const out,
rel_prec, even_odd_flag, solver_params,
sloppy_precision, compression, QpQm);
if (ret_value >= max_iter) {
char outname[200];
snprintf(outname, 200, "conf_mg_refresh_fail.%.6f.%04d", g_gauge_state.gauge_id, nstore);
paramsXlfInfo * xlfInfo = construct_paramsXlfInfo(
measure_plaquette((const su3**)g_gauge_field)/(6.*VOLUME*g_nproc), nstore);
int status = write_gauge_field(outname, 64, xlfInfo);
free(xlfInfo);

char errmsg[200];
snprintf(errmsg, 200, "QUDA-MG solver failed to converge in %d iterations even after forced setup refresh. Terminating!",
max_iter);
max_iter);
fatal_error(errmsg, __func__);
return -1;
} else {
Expand Down
23 changes: 17 additions & 6 deletions solver/monomial_solve.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,24 @@
#endif
#include "fatal_error.h"

#include <io/params.h>
#include <io/spinor.h>
#include "io/params.h"
#include "io/spinor.h"
#include "io/gauge.h"
#include "measure_gauge_action.h"

#ifdef TM_USE_QUDA
# include "quda_interface.h"
#endif

void solve_fail_write_config_and_abort(const char * const solver) {
char outname[200];
snprintf(outname, 200, "conf_monomial_solve_fail.%.6f.%04d", g_gauge_state.gauge_id, nstore);
paramsXlfInfo * xlfInfo = construct_paramsXlfInfo(measure_plaquette((const su3**)g_gauge_field)/(6.*VOLUME*g_nproc), nstore);
int status = write_gauge_field(outname, 64, xlfInfo);
free(xlfInfo);
fatal_error("Error: solver reported -1 iterations.", solver);
}

int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_params,
const int max_iter, double eps_sq, const int rel_prec,
const int N, matrix_mult f, int solver_type){
Expand Down Expand Up @@ -216,7 +227,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_degenerate");
solve_fail_write_config_and_abort("solve_degenerate");
}

return (iteration_count);
Expand Down Expand Up @@ -425,7 +436,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_tm");
solve_fail_write_config_and_abort("solve_mms_tm");
}

return(iteration_count);
Expand Down Expand Up @@ -671,7 +682,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_nd");
solve_fail_write_config_and_abort("solve_mms_nd");
}

return (iteration_count);
Expand Down Expand Up @@ -726,7 +737,7 @@ int solve_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_nd_plus");
solve_fail_write_config_and_abort("solve_mms_nd_plus");
}

return iteration_count;
Expand Down

0 comments on commit 6ac0c6e

Please sign in to comment.