Skip to content

Commit

Permalink
[flang][OpenMP][NFC] Move reduction init and cleanup region gen to he…
Browse files Browse the repository at this point in the history
…lper (llvm#120761)

This will allow code sharing between reduction and privatization after
my (still WIP) changes to `omp.private` to use an `alloc` region similar
to the one used for reduction declarations.
  • Loading branch information
tblah authored Jan 7, 2025
1 parent 2e637db commit 8557a57
Show file tree
Hide file tree
Showing 4 changed files with 304 additions and 218 deletions.
1 change: 1 addition & 0 deletions flang/lib/Lower/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ add_flang_library(FortranLower
OpenMP/DataSharingProcessor.cpp
OpenMP/Decomposer.cpp
OpenMP/OpenMP.cpp
OpenMP/PrivateReductionUtils.cpp
OpenMP/ReductionProcessor.cpp
OpenMP/Utils.cpp
PFTBuilder.cpp
Expand Down
236 changes: 236 additions & 0 deletions flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
//===-- PrivateReductionUtils.cpp -------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//

#include "PrivateReductionUtils.h"

#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/Support/FatalError.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Location.h"

static void createCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type argType,
mlir::Region &cleanupRegion) {
assert(cleanupRegion.empty());
mlir::Block *block = builder.createBlock(&cleanupRegion, cleanupRegion.end(),
{argType}, {loc});
builder.setInsertionPointToEnd(block);

auto typeError = [loc]() {
fir::emitFatalError(loc,
"Attempt to create an omp cleanup region "
"for a type that wasn't allocated",
/*genCrashDiag=*/true);
};

mlir::Type valTy = fir::unwrapRefType(argType);
if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) {
if (!mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy())) {
mlir::Type innerTy = fir::extractSequenceType(boxTy);
if (!mlir::isa<fir::SequenceType>(innerTy))
typeError();
}

mlir::Value arg = builder.loadIfRef(loc, block->getArgument(0));
assert(mlir::isa<fir::BaseBoxType>(arg.getType()));

// Deallocate box
// The FIR type system doesn't nesecarrily know that this is a mutable box
// if we allocated the thread local array on the heap to avoid looped stack
// allocations.
mlir::Value addr =
hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg});
mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr);
fir::IfOp ifOp =
builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false);
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());

mlir::Value cast = builder.createConvert(
loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr);
builder.create<fir::FreeMemOp>(loc, cast);

builder.setInsertionPointAfter(ifOp);
builder.create<mlir::omp::YieldOp>(loc);
return;
}

typeError();
}

fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value box) {
fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>(
hlfir::getFortranElementOrSequenceType(box.getType()));
const unsigned rank = sequenceType.getDimension();
llvm::SmallVector<mlir::Value> lbAndExtents;
lbAndExtents.reserve(rank * 2);

mlir::Type idxTy = builder.getIndexType();
for (unsigned i = 0; i < rank; ++i) {
// TODO: ideally we want to hoist box reads out of the critical section.
// We could do this by having box dimensions in block arguments like
// OpenACC does
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
auto dimInfo =
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
lbAndExtents.push_back(dimInfo.getLowerBound());
lbAndExtents.push_back(dimInfo.getExtent());
}

auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
auto shapeShift =
builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents);
return shapeShift;
}

void Fortran::lower::omp::populateByRefInitAndCleanupRegions(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argType,
mlir::Value scalarInitValue, mlir::Block *initBlock,
mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
mlir::Region &cleanupRegion) {
mlir::Type ty = fir::unwrapRefType(argType);
builder.setInsertionPointToEnd(initBlock);
auto yield = [&](mlir::Value ret) {
builder.create<mlir::omp::YieldOp>(loc, ret);
};

if (fir::isa_trivial(ty)) {
builder.setInsertionPointToEnd(initBlock);

if (scalarInitValue)
builder.createStoreWithConvert(loc, scalarInitValue, allocatedPrivVarArg);
yield(allocatedPrivVarArg);
return;
}

// check if an allocatable box is unallocated. If so, initialize the boxAlloca
// to be unallocated e.g.
// %box_alloca = fir.alloca !fir.box<!fir.heap<...>>
// %addr = fir.box_addr %box
// if (%addr == 0) {
// %nullbox = fir.embox %addr
// fir.store %nullbox to %box_alloca
// } else {
// // ...
// fir.store %something to %box_alloca
// }
// omp.yield %box_alloca
moldArg = builder.loadIfRef(loc, moldArg);
auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp {
mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, moldArg);
mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr);
fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated,
/*withElseRegion=*/true);
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
// just embox the null address and return
mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr);
builder.create<fir::StoreOp>(loc, nullBox, boxAlloca);
return ifOp;
};

// all arrays are boxed
if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
bool isAllocatableOrPointer =
mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy());

builder.setInsertionPointToEnd(initBlock);
mlir::Value boxAlloca = allocatedPrivVarArg;
mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy());
if (fir::isa_trivial(innerTy)) {
// boxed non-sequence value e.g. !fir.box<!fir.heap<i32>>
if (!isAllocatableOrPointer)
TODO(loc,
"Reduction/Privatization of non-allocatable trivial typed box");

fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca);

builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy);
if (scalarInitValue)
builder.createStoreWithConvert(loc, scalarInitValue, valAlloc);
mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc);
builder.create<fir::StoreOp>(loc, box, boxAlloca);

createCleanupRegion(builder, loc, argType, cleanupRegion);
builder.setInsertionPointAfter(ifUnallocated);
yield(boxAlloca);
return;
}
innerTy = fir::extractSequenceType(boxTy);
if (!mlir::isa<fir::SequenceType>(innerTy))
TODO(loc, "Unsupported boxed type for reduction/privatization");

fir::IfOp ifUnallocated{nullptr};
if (isAllocatableOrPointer) {
ifUnallocated = handleNullAllocatable(boxAlloca);
builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
}

// Create the private copy from the initial fir.box:
mlir::Value loadedBox = builder.loadIfRef(loc, moldArg);
hlfir::Entity source = hlfir::Entity{loadedBox};

// Allocating on the heap in case the whole reduction is nested inside of a
// loop
// TODO: compare performance here to using allocas - this could be made to
// work by inserting stacksave/stackrestore around the reduction in
// openmpirbuilder
auto [temp, needsDealloc] = createTempFromMold(loc, builder, source);
// if needsDealloc isn't statically false, add cleanup region. Always
// do this for allocatable boxes because they might have been re-allocated
// in the body of the loop/parallel region

std::optional<int64_t> cstNeedsDealloc =
fir::getIntIfConstant(needsDealloc);
assert(cstNeedsDealloc.has_value() &&
"createTempFromMold decides this statically");
if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
mlir::OpBuilder::InsertionGuard guard(builder);
createCleanupRegion(builder, loc, argType, cleanupRegion);
} else {
assert(!isAllocatableOrPointer &&
"Pointer-like arrays must be heap allocated");
}

// Put the temporary inside of a box:
// hlfir::genVariableBox doesn't handle non-default lower bounds
mlir::Value box;
fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox);
mlir::Type boxType = loadedBox.getType();
if (mlir::isa<fir::BaseBoxType>(temp.getType()))
// the box created by the declare form createTempFromMold is missing lower
// bounds info
box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift,
/*shift=*/mlir::Value{});
else
box = builder.create<fir::EmboxOp>(
loc, boxType, temp, shapeShift,
/*slice=*/mlir::Value{},
/*typeParams=*/llvm::ArrayRef<mlir::Value>{});

if (scalarInitValue)
builder.create<hlfir::AssignOp>(loc, scalarInitValue, box);
builder.create<fir::StoreOp>(loc, box, boxAlloca);
if (ifUnallocated)
builder.setInsertionPointAfter(ifUnallocated);
yield(boxAlloca);
return;
}

TODO(loc,
"creating reduction/privatization init region for unsupported type");
return;
}
51 changes: 51 additions & 0 deletions flang/lib/Lower/OpenMP/PrivateReductionUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//===-- Lower/OpenMP/PrivateReductionUtils.h --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H
#define FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H

#include "mlir/IR/Location.h"
#include "mlir/IR/Value.h"

namespace mlir {
class Region;
} // namespace mlir

namespace fir {
class FirOpBuilder;
class ShapeShiftOp;
} // namespace fir

namespace Fortran {
namespace lower {
namespace omp {

/// Generate init and cleanup regions suitable for reduction or privatizer
/// declarations. `scalarInitValue` may be nullptr if there is no default
/// initialization (for privatization).
void populateByRefInitAndCleanupRegions(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Type argType,
mlir::Value scalarInitValue,
mlir::Block *initBlock,
mlir::Value allocatedPrivVarArg,
mlir::Value moldArg,
mlir::Region &cleanupRegion);

/// Generate a fir::ShapeShift op describing the provided boxed array.
fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value box);

} // namespace omp
} // namespace lower
} // namespace Fortran

#endif // FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H
Loading

0 comments on commit 8557a57

Please sign in to comment.