From 8cf1ed61fad0a59121096bc4ff9fb3f06b020936 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 15 Feb 2023 10:57:41 -0700 Subject: [PATCH] tpetra: add missing fences for async deep_copy use Adding missing fences following async deep_copy calls to ensure copy completes before handoff to send This resolves a test failure in TpetraCore_Issue1454 with Cuda-Aware MPI in cuda/11.4.2 builds without UVM --- packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp b/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp index db317c46eef8..0c552dc9b48b 100644 --- a/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp @@ -488,6 +488,8 @@ void DistributorActor::doPosts(const DistributorPlan& plan, packOffset(sendArray, exports, sendArrayOffset, plan.getIndicesTo()[j]*numPackets, numPackets); sendArrayOffset += numPackets; } + typename ExpView::execution_space().fence(); + ImpView tmpSend = subview_offset(sendArray, size_t(0), plan.getLengthsTo()[p]*numPackets); @@ -847,6 +849,8 @@ void DistributorActor::doPosts(const DistributorPlan& plan, indicesOffsets[j], numExportPacketsPerLID[j]); sendArrayOffset += numExportPacketsPerLID[j]; } + typename ExpView::execution_space().fence(); + if (numPacketsTo_p > 0) { ImpView tmpSend = subview_offset(sendArray, size_t(0), numPacketsTo_p);