diff --git a/src/nvector/parhyp/nvector_parhyp.c b/src/nvector/parhyp/nvector_parhyp.c index 5e089e93b3..2a266b6f0d 100644 --- a/src/nvector/parhyp/nvector_parhyp.c +++ b/src/nvector/parhyp/nvector_parhyp.c @@ -575,9 +575,9 @@ N_Vector N_VCloneEmpty_ParHyp(N_Vector w) NV_OWN_PARVEC_PH(v) = SUNFALSE; NV_HYPRE_PARVEC_PH(v) = NULL; - #if defined(SUNDIALS_HYPRE_BACKENDS_CUDA_OR_HIP) +#if defined(SUNDIALS_HYPRE_BACKENDS_CUDA_OR_HIP) // ... - #endif +#endif // /* Create content */ // content = NULL; @@ -1858,7 +1858,7 @@ int N_VDotProdMulti_ParHyp(int nvec, N_Vector x, N_Vector* Y, int N_VDotProdMultiLocal_ParHyp(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods) { - sunindextype N; + sunindextype i, N; realtype* xd=NULL; /* invalid number of vectors */ @@ -1875,7 +1875,7 @@ int N_VDotProdMultiLocal_ParHyp(int nvec, N_Vector x, N_Vector* Y, xd = NV_DATA_PH(x); #if defined(SUNDIALS_HYPRE_BACKENDS_SERIAL) - sunindextype i, j; + sunindextype j; realtype* yd=NULL; /* compute multiple dot products */ for (i=0; i<<>> @@ -1911,7 +1912,7 @@ int N_VDotProdMultiLocal_ParHyp(int nvec, N_Vector x, N_Vector* Y, // Get result from the GPU ReductionBuffer_CopyFromDevice(x, nvec); - for (sunindextype i = 0; i < nvec; ++i) + for (i = 0; i < nvec; ++i) { dotprods[i] = NV_HBUFFERp_PH(x)[i]; } @@ -2119,11 +2120,7 @@ int N_VConstVectorArray_ParHyp(int nvec, realtype c, N_Vector* Z) int N_VWrmsNormVectorArray_ParHyp(int nvec, N_Vector* X, N_Vector* W, realtype* nrm) { - int i, retval; - sunindextype j, Nl, Ng; - realtype* wd=NULL; - realtype* xd=NULL; - MPI_Comm comm; + sunindextype i, Nl, Ng; /* invalid number of vectors */ if (nvec < 1) return(-1); @@ -2135,10 +2132,13 @@ int N_VWrmsNormVectorArray_ParHyp(int nvec, N_Vector* X, N_Vector* W, realtype* } /* get vector lengths and communicator */ - Nl = NV_LOCLENGTH_PH(X[0]); - Ng = NV_GLOBLENGTH_PH(X[0]); - comm = NV_COMM_PH(X[0]); + Nl = NV_LOCLENGTH_PH(X[0]); + Ng = NV_GLOBLENGTH_PH(X[0]); +#if defined(SUNDIALS_HYPRE_BACKENDS_SERIAL) + sunindextype j; + realtype* xd=NULL; + realtype* wd=NULL; /* compute the WRMS norm for each vector in the vector array */ for (i=0; i<<>> + ( + nvec, + Xd, + Wd, + NV_DBUFFERp_PH(W[0]), + Nl + ); + PostKernelLaunch(); + + /* Copy local square sums from device buffer to host buffer */ + ReductionBuffer_CopyFromDevice(W[0], nvec); + for (i = 0; i < nvec; ++i) + { + nrm[i] = NV_HBUFFERp_PH(x)[i]; + } +#endif + + /* Reduce local square sums into global square sums */ + int retval = MPI_Allreduce(MPI_IN_PLACE, nrm, nvec, MPI_SUNREALTYPE, MPI_SUM, NV_COMM_PH(X[0])); + /* Root of mean of global square sums */ for (i=0; i