diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index ba6622d8504a4f..f57f0e7a77a018 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -29,6 +29,7 @@ add_flang_library(FortranLower OpenMP/DataSharingProcessor.cpp OpenMP/Decomposer.cpp OpenMP/OpenMP.cpp + OpenMP/PrivateReductionUtils.cpp OpenMP/ReductionProcessor.cpp OpenMP/Utils.cpp PFTBuilder.cpp diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp new file mode 100644 index 00000000000000..83f0d4e93ca548 --- /dev/null +++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp @@ -0,0 +1,236 @@ +//===-- PrivateReductionUtils.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "PrivateReductionUtils.h" + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Support/FatalError.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/Location.h" + +static void createCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Type argType, + mlir::Region &cleanupRegion) { + assert(cleanupRegion.empty()); + mlir::Block *block = builder.createBlock(&cleanupRegion, cleanupRegion.end(), + {argType}, {loc}); + builder.setInsertionPointToEnd(block); + + auto typeError = [loc]() { + fir::emitFatalError(loc, + "Attempt to create an omp cleanup region " + "for a type that wasn't allocated", + /*genCrashDiag=*/true); + }; + + mlir::Type valTy = fir::unwrapRefType(argType); + if (auto boxTy = mlir::dyn_cast_or_null(valTy)) { + if (!mlir::isa(boxTy.getEleTy())) { + mlir::Type innerTy = fir::extractSequenceType(boxTy); + if (!mlir::isa(innerTy)) + typeError(); + } + + mlir::Value arg = builder.loadIfRef(loc, block->getArgument(0)); + assert(mlir::isa(arg.getType())); + + // Deallocate box + // The FIR type system doesn't nesecarrily know that this is a mutable box + // if we allocated the thread local array on the heap to avoid looped stack + // allocations. + mlir::Value addr = + hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg}); + mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr); + fir::IfOp ifOp = + builder.create(loc, isAllocated, /*withElseRegion=*/false); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + + mlir::Value cast = builder.createConvert( + loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr); + builder.create(loc, cast); + + builder.setInsertionPointAfter(ifOp); + builder.create(loc); + return; + } + + typeError(); +} + +fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder, + mlir::Location loc, + mlir::Value box) { + fir::SequenceType sequenceType = mlir::cast( + hlfir::getFortranElementOrSequenceType(box.getType())); + const unsigned rank = sequenceType.getDimension(); + llvm::SmallVector lbAndExtents; + lbAndExtents.reserve(rank * 2); + + mlir::Type idxTy = builder.getIndexType(); + for (unsigned i = 0; i < rank; ++i) { + // TODO: ideally we want to hoist box reads out of the critical section. + // We could do this by having box dimensions in block arguments like + // OpenACC does + mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); + auto dimInfo = + builder.create(loc, idxTy, idxTy, idxTy, box, dim); + lbAndExtents.push_back(dimInfo.getLowerBound()); + lbAndExtents.push_back(dimInfo.getExtent()); + } + + auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); + auto shapeShift = + builder.create(loc, shapeShiftTy, lbAndExtents); + return shapeShift; +} + +void Fortran::lower::omp::populateByRefInitAndCleanupRegions( + fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argType, + mlir::Value scalarInitValue, mlir::Block *initBlock, + mlir::Value allocatedPrivVarArg, mlir::Value moldArg, + mlir::Region &cleanupRegion) { + mlir::Type ty = fir::unwrapRefType(argType); + builder.setInsertionPointToEnd(initBlock); + auto yield = [&](mlir::Value ret) { + builder.create(loc, ret); + }; + + if (fir::isa_trivial(ty)) { + builder.setInsertionPointToEnd(initBlock); + + if (scalarInitValue) + builder.createStoreWithConvert(loc, scalarInitValue, allocatedPrivVarArg); + yield(allocatedPrivVarArg); + return; + } + + // check if an allocatable box is unallocated. If so, initialize the boxAlloca + // to be unallocated e.g. + // %box_alloca = fir.alloca !fir.box> + // %addr = fir.box_addr %box + // if (%addr == 0) { + // %nullbox = fir.embox %addr + // fir.store %nullbox to %box_alloca + // } else { + // // ... + // fir.store %something to %box_alloca + // } + // omp.yield %box_alloca + moldArg = builder.loadIfRef(loc, moldArg); + auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp { + mlir::Value addr = builder.create(loc, moldArg); + mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr); + fir::IfOp ifOp = builder.create(loc, isNotAllocated, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + // just embox the null address and return + mlir::Value nullBox = builder.create(loc, ty, addr); + builder.create(loc, nullBox, boxAlloca); + return ifOp; + }; + + // all arrays are boxed + if (auto boxTy = mlir::dyn_cast_or_null(ty)) { + bool isAllocatableOrPointer = + mlir::isa(boxTy.getEleTy()); + + builder.setInsertionPointToEnd(initBlock); + mlir::Value boxAlloca = allocatedPrivVarArg; + mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy()); + if (fir::isa_trivial(innerTy)) { + // boxed non-sequence value e.g. !fir.box> + if (!isAllocatableOrPointer) + TODO(loc, + "Reduction/Privatization of non-allocatable trivial typed box"); + + fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca); + + builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); + mlir::Value valAlloc = builder.create(loc, innerTy); + if (scalarInitValue) + builder.createStoreWithConvert(loc, scalarInitValue, valAlloc); + mlir::Value box = builder.create(loc, ty, valAlloc); + builder.create(loc, box, boxAlloca); + + createCleanupRegion(builder, loc, argType, cleanupRegion); + builder.setInsertionPointAfter(ifUnallocated); + yield(boxAlloca); + return; + } + innerTy = fir::extractSequenceType(boxTy); + if (!mlir::isa(innerTy)) + TODO(loc, "Unsupported boxed type for reduction/privatization"); + + fir::IfOp ifUnallocated{nullptr}; + if (isAllocatableOrPointer) { + ifUnallocated = handleNullAllocatable(boxAlloca); + builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); + } + + // Create the private copy from the initial fir.box: + mlir::Value loadedBox = builder.loadIfRef(loc, moldArg); + hlfir::Entity source = hlfir::Entity{loadedBox}; + + // Allocating on the heap in case the whole reduction is nested inside of a + // loop + // TODO: compare performance here to using allocas - this could be made to + // work by inserting stacksave/stackrestore around the reduction in + // openmpirbuilder + auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); + // if needsDealloc isn't statically false, add cleanup region. Always + // do this for allocatable boxes because they might have been re-allocated + // in the body of the loop/parallel region + + std::optional cstNeedsDealloc = + fir::getIntIfConstant(needsDealloc); + assert(cstNeedsDealloc.has_value() && + "createTempFromMold decides this statically"); + if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) { + mlir::OpBuilder::InsertionGuard guard(builder); + createCleanupRegion(builder, loc, argType, cleanupRegion); + } else { + assert(!isAllocatableOrPointer && + "Pointer-like arrays must be heap allocated"); + } + + // Put the temporary inside of a box: + // hlfir::genVariableBox doesn't handle non-default lower bounds + mlir::Value box; + fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox); + mlir::Type boxType = loadedBox.getType(); + if (mlir::isa(temp.getType())) + // the box created by the declare form createTempFromMold is missing lower + // bounds info + box = builder.create(loc, boxType, temp, shapeShift, + /*shift=*/mlir::Value{}); + else + box = builder.create( + loc, boxType, temp, shapeShift, + /*slice=*/mlir::Value{}, + /*typeParams=*/llvm::ArrayRef{}); + + if (scalarInitValue) + builder.create(loc, scalarInitValue, box); + builder.create(loc, box, boxAlloca); + if (ifUnallocated) + builder.setInsertionPointAfter(ifUnallocated); + yield(boxAlloca); + return; + } + + TODO(loc, + "creating reduction/privatization init region for unsupported type"); + return; +} diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h new file mode 100644 index 00000000000000..b4abc40cd4b674 --- /dev/null +++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h @@ -0,0 +1,51 @@ +//===-- Lower/OpenMP/PrivateReductionUtils.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H +#define FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H + +#include "mlir/IR/Location.h" +#include "mlir/IR/Value.h" + +namespace mlir { +class Region; +} // namespace mlir + +namespace fir { +class FirOpBuilder; +class ShapeShiftOp; +} // namespace fir + +namespace Fortran { +namespace lower { +namespace omp { + +/// Generate init and cleanup regions suitable for reduction or privatizer +/// declarations. `scalarInitValue` may be nullptr if there is no default +/// initialization (for privatization). +void populateByRefInitAndCleanupRegions(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Type argType, + mlir::Value scalarInitValue, + mlir::Block *initBlock, + mlir::Value allocatedPrivVarArg, + mlir::Value moldArg, + mlir::Region &cleanupRegion); + +/// Generate a fir::ShapeShift op describing the provided boxed array. +fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value box); + +} // namespace omp +} // namespace lower +} // namespace Fortran + +#endif // FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 736de2ee511bef..2cd21107a916e4 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -12,6 +12,7 @@ #include "ReductionProcessor.h" +#include "PrivateReductionUtils.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/ConvertType.h" #include "flang/Lower/SymbolMap.h" @@ -294,33 +295,6 @@ mlir::Value ReductionProcessor::createScalarCombiner( return reductionOp; } -/// Generate a fir::ShapeShift op describing the provided boxed array. -static fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, - mlir::Location loc, mlir::Value box) { - fir::SequenceType sequenceType = mlir::cast( - hlfir::getFortranElementOrSequenceType(box.getType())); - const unsigned rank = sequenceType.getDimension(); - llvm::SmallVector lbAndExtents; - lbAndExtents.reserve(rank * 2); - - mlir::Type idxTy = builder.getIndexType(); - for (unsigned i = 0; i < rank; ++i) { - // TODO: ideally we want to hoist box reads out of the critical section. - // We could do this by having box dimensions in block arguments like - // OpenACC does - mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); - auto dimInfo = - builder.create(loc, idxTy, idxTy, idxTy, box, dim); - lbAndExtents.push_back(dimInfo.getLowerBound()); - lbAndExtents.push_back(dimInfo.getExtent()); - } - - auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); - auto shapeShift = - builder.create(loc, shapeShiftTy, lbAndExtents); - return shapeShift; -} - /// Create reduction combiner region for reduction variables which are boxed /// arrays static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, @@ -422,59 +396,6 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, TODO(loc, "OpenMP genCombiner for unsupported reduction variable type"); } -static void -createReductionCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::omp::DeclareReductionOp &reductionDecl) { - mlir::Type redTy = reductionDecl.getType(); - - mlir::Region &cleanupRegion = reductionDecl.getCleanupRegion(); - assert(cleanupRegion.empty()); - mlir::Block *block = - builder.createBlock(&cleanupRegion, cleanupRegion.end(), {redTy}, {loc}); - builder.setInsertionPointToEnd(block); - - auto typeError = [loc]() { - fir::emitFatalError(loc, - "Attempt to create an omp reduction cleanup region " - "for a type that wasn't allocated", - /*genCrashDiag=*/true); - }; - - mlir::Type valTy = fir::unwrapRefType(redTy); - if (auto boxTy = mlir::dyn_cast_or_null(valTy)) { - if (!mlir::isa(boxTy.getEleTy())) { - mlir::Type innerTy = fir::extractSequenceType(boxTy); - if (!mlir::isa(innerTy)) - typeError(); - } - - mlir::Value arg = block->getArgument(0); - arg = builder.loadIfRef(loc, arg); - assert(mlir::isa(arg.getType())); - - // Deallocate box - // The FIR type system doesn't nesecarrily know that this is a mutable box - // if we allocated the thread local array on the heap to avoid looped stack - // allocations. - mlir::Value addr = - hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg}); - mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr); - fir::IfOp ifOp = - builder.create(loc, isAllocated, /*withElseRegion=*/false); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - - mlir::Value cast = builder.createConvert( - loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr); - builder.create(loc, cast); - - builder.setInsertionPointAfter(ifOp); - builder.create(loc); - return; - } - - typeError(); -} - // like fir::unwrapSeqOrBoxedSeqType except it also works for non-sequence boxes static mlir::Type unwrapSeqOrBoxedType(mlir::Type ty) { if (auto seqTy = mlir::dyn_cast(ty)) @@ -517,154 +438,31 @@ static void createReductionAllocAndInitRegions( mlir::Value initValue = ReductionProcessor::getReductionInitValue( loc, unwrapSeqOrBoxedType(ty), redId, builder); + if (isByRef) { + populateByRefInitAndCleanupRegions(builder, loc, type, initValue, initBlock, + reductionDecl.getInitializerAllocArg(), + reductionDecl.getInitializerMoldArg(), + reductionDecl.getCleanupRegion()); + } + if (fir::isa_trivial(ty)) { if (isByRef) { // alloc region - { - builder.setInsertionPointToEnd(allocBlock); - mlir::Value alloca = builder.create(loc, ty); - yield(alloca); - } - - // init region - { - builder.setInsertionPointToEnd(initBlock); - // block arg is mapped to the alloca yielded from the alloc region - mlir::Value alloc = reductionDecl.getInitializerAllocArg(); - builder.createStoreWithConvert(loc, initValue, alloc); - yield(alloc); - } + builder.setInsertionPointToEnd(allocBlock); + mlir::Value alloca = builder.create(loc, ty); + yield(alloca); return; } // by val yield(initValue); return; } + assert(isByRef && "passing non-trivial types by val is unsupported"); - // check if an allocatable box is unallocated. If so, initialize the boxAlloca - // to be unallocated e.g. - // %box_alloca = fir.alloca !fir.box> - // %addr = fir.box_addr %box - // if (%addr == 0) { - // %nullbox = fir.embox %addr - // fir.store %nullbox to %box_alloca - // } else { - // // ... - // fir.store %something to %box_alloca - // } - // omp.yield %box_alloca - mlir::Value moldArg = - builder.loadIfRef(loc, reductionDecl.getInitializerMoldArg()); - auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp { - mlir::Value addr = builder.create(loc, moldArg); - mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr); - fir::IfOp ifOp = builder.create(loc, isNotAllocated, - /*withElseRegion=*/true); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - // just embox the null address and return - mlir::Value nullBox = builder.create(loc, ty, addr); - builder.create(loc, nullBox, boxAlloca); - return ifOp; - }; - - // all arrays are boxed - if (auto boxTy = mlir::dyn_cast_or_null(ty)) { - assert(isByRef && "passing boxes by value is unsupported"); - bool isAllocatableOrPointer = - mlir::isa(boxTy.getEleTy()); - - // alloc region - { - builder.setInsertionPointToEnd(allocBlock); - mlir::Value boxAlloca = builder.create(loc, ty); - yield(boxAlloca); - } - - // init region - builder.setInsertionPointToEnd(initBlock); - mlir::Value boxAlloca = reductionDecl.getInitializerAllocArg(); - mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy()); - if (fir::isa_trivial(innerTy)) { - // boxed non-sequence value e.g. !fir.box> - if (!isAllocatableOrPointer) - TODO(loc, "Reduction of non-allocatable trivial typed box"); - - fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca); - - builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); - mlir::Value valAlloc = builder.create(loc, innerTy); - builder.createStoreWithConvert(loc, initValue, valAlloc); - mlir::Value box = builder.create(loc, ty, valAlloc); - builder.create(loc, box, boxAlloca); - - auto insPt = builder.saveInsertionPoint(); - createReductionCleanupRegion(builder, loc, reductionDecl); - builder.restoreInsertionPoint(insPt); - builder.setInsertionPointAfter(ifUnallocated); - yield(boxAlloca); - return; - } - innerTy = fir::extractSequenceType(boxTy); - if (!mlir::isa(innerTy)) - TODO(loc, "Unsupported boxed type for reduction"); - - fir::IfOp ifUnallocated{nullptr}; - if (isAllocatableOrPointer) { - ifUnallocated = handleNullAllocatable(boxAlloca); - builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); - } - - // Create the private copy from the initial fir.box: - mlir::Value loadedBox = builder.loadIfRef(loc, moldArg); - hlfir::Entity source = hlfir::Entity{loadedBox}; - - // Allocating on the heap in case the whole reduction is nested inside of a - // loop - // TODO: compare performance here to using allocas - this could be made to - // work by inserting stacksave/stackrestore around the reduction in - // openmpirbuilder - auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); - // if needsDealloc isn't statically false, add cleanup region. Always - // do this for allocatable boxes because they might have been re-allocated - // in the body of the loop/parallel region - - std::optional cstNeedsDealloc = - fir::getIntIfConstant(needsDealloc); - assert(cstNeedsDealloc.has_value() && - "createTempFromMold decides this statically"); - if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) { - mlir::OpBuilder::InsertionGuard guard(builder); - createReductionCleanupRegion(builder, loc, reductionDecl); - } else { - assert(!isAllocatableOrPointer && - "Pointer-like arrays must be heap allocated"); - } - - // Put the temporary inside of a box: - // hlfir::genVariableBox doesn't handle non-default lower bounds - mlir::Value box; - fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox); - mlir::Type boxType = loadedBox.getType(); - if (mlir::isa(temp.getType())) - // the box created by the declare form createTempFromMold is missing lower - // bounds info - box = builder.create(loc, boxType, temp, shapeShift, - /*shift=*/mlir::Value{}); - else - box = builder.create( - loc, boxType, temp, shapeShift, - /*slice=*/mlir::Value{}, - /*typeParams=*/llvm::ArrayRef{}); - - builder.create(loc, initValue, box); - builder.create(loc, box, boxAlloca); - if (ifUnallocated) - builder.setInsertionPointAfter(ifUnallocated); - yield(boxAlloca); - return; - } - - TODO(loc, "createReductionInitRegion for unsupported type"); + // alloc region + builder.setInsertionPointToEnd(allocBlock); + mlir::Value boxAlloca = builder.create(loc, ty); + yield(boxAlloca); } mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction(