Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

base for nvptx sched #1

Open
wants to merge 1 commit into
base: ptx-base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ set(NVPTXCodeGen_sources
NVPTXPrologEpilogPass.cpp
NVPTXRegisterInfo.cpp
NVPTXReplaceImageHandles.cpp
NVPTXSchedStrategy.cpp
NVPTXSubtarget.cpp
NVPTXTargetMachine.cpp
NVPTXTargetTransformInfo.cpp
Expand Down
306 changes: 306 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXSchedStrategy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
//===-- NVPTXSchedStrategy.cpp - NVPTX Scheduler Strategy ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This contains a MachineSchedStrategy implementation for maximizing wave
/// occupancy on NVPTX hardware.
///
/// This pass will apply multiple scheduling stages to the same function.
/// Regions are first recorded in PTXScheduleDAGMILive::schedule. The actual
/// entry point for the scheduling of those regions is
/// PTXScheduleDAGMILive::runSchedStages.

/// Generally, the reason for having multiple scheduling stages is to account
/// for the kernel-wide effect of register usage on occupancy. Usually, only a
/// few scheduling regions will have register pressure high enough to limit
/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
/// other regions.
///
//===----------------------------------------------------------------------===//

#include "NVPTXSchedStrategy.h"
#include "NVPTXSubtarget.h"
#include "NVPTXMachineFunctionInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"

#define DEBUG_TYPE "machine-scheduler"

using namespace llvm;

NVPTXSchedStrategy::NVPTXSchedStrategy(const MachineSchedContext *C)
: GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
HasHighPressure(false) {}

void NVPTXSchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);

MF = &DAG->MF;

const NVPTXSubtarget &ST = MF->getSubtarget<NVPTXSubtarget>();

NVPTXMachineFunctionInfo &MFI = *MF->getInfo<NVPTXMachineFunctionInfo>();
// Set the initial TargetOccupnacy to the maximum occupancy that we can
// achieve for this function. This effectively sets a lower bound on the
// 'Critical' register limits in the scheduler.
// Allow for lower occupancy targets if kernel is wave limited or memory
// bound, and using the relaxed occupancy feature.
//TargetOccupancy = MFI.getOccupancy();
// RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();

}

void NVPTXSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop,
const RegPressureTracker &RPTracker,
const NVPTXRegisterInfo *SRI,
unsigned SGPRPressure,
unsigned VGPRPressure) {
Cand.SU = SU;
Cand.AtTop = AtTop;

if (!DAG->isTrackingPressure())
return;

// getDownwardPressure() and getUpwardPressure() make temporary changes to
// the tracker, so we need to pass those function a non-const copy.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);

Pressure.clear();
MaxPressure.clear();

if (AtTop)
TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
else {
// FIXME: I think for bottom up scheduling, the register pressure is cached
// and can be retrieved by DAG->getPressureDif(SU).
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
}

unsigned NewSGPRPressure = Pressure[NVPTX::RegisterPressureSets::Int16Regs];
unsigned NewVGPRPressure = Pressure[NVPTX::RegisterPressureSets::Float32Regs];

// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
// instruction that increases the set with the least amount of registers,
// which in our case would be SGPRs. This is rarely what we want, so
// when we report excess/critical register pressure, we do it either
// only for VGPRs or only for SGPRs.

// FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
const unsigned MaxVGPRPressureInc = 16;
bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;


// FIXME: We have to enter REG-EXCESS before we reach the actual threshold
// to increase the likelihood we don't go over the limits. We should improve
// the analysis to look through dependencies to find the path with the least
// register pressure.

// We only need to update the RPDelta for instructions that increase register
// pressure. Instructions that decrease or keep reg pressure the same will be
// marked as RegExcess in tryCandidate() when they are compared with
// instructions that increase the register pressure.
if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
HasHighPressure = true;
Cand.RPDelta.Excess = PressureChange(NVPTX::RegisterPressureSets::Float32Regs);
Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
}

if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
HasHighPressure = true;
Cand.RPDelta.Excess = PressureChange(NVPTX::RegisterPressureSets::Int16Regs);
Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
}

// Register pressure is considered 'CRITICAL' if it is approaching a value
// that would reduce the wave occupancy for the execution unit. When
// register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
// has the same cost, so we don't need to prefer one over the other.

int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;

if (SGPRDelta >= 0 || VGPRDelta >= 0) {
HasHighPressure = true;
if (SGPRDelta > VGPRDelta) {
Cand.RPDelta.CriticalMax =
PressureChange(NVPTX::RegisterPressureSets::Int16Regs);
Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
} else {
Cand.RPDelta.CriticalMax =
PressureChange(NVPTX::RegisterPressureSets::Float32Regs);
Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
}
}
}

// This function is mostly cut and pasted from
// GenericScheduler::pickNodeFromQueue()
void NVPTXSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
const NVPTXRegisterInfo *SRI = static_cast<const NVPTXRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
if (DAG->isTrackingPressure()) {
SGPRPressure = Pressure[NVPTX::RegisterPressureSets::Int16Regs];
VGPRPressure = Pressure[NVPTX::RegisterPressureSets::Float32Regs];
}
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {

SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
SGPRPressure, VGPRPressure);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryCandidate(Cand, TryCand, ZoneArg);
if (TryCand.Reason != NoCand) {
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(Zone.DAG, SchedModel);
Cand.setBest(TryCand);
LLVM_DEBUG(traceCandidate(Cand));
}
}
}

// This function is mostly cut and pasted from
// GenericScheduler::pickNodeBidirectional()
SUnit *NVPTXSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
return SU;
}
// Set the bottom-up policy based on the state of the current bottom zone and
// the instructions outside the zone, including the top zone.
CandPolicy BotPolicy;
setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
// Set the top-down policy based on the state of the current top zone and
// the instructions outside the zone, including the bottom zone.
CandPolicy TopPolicy;
setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);

// See if BotCand is still valid (because we previously scheduled from Top).
LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(BotCand));
#ifndef NDEBUG
if (VerifyScheduling) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
assert(TCand.SU == BotCand.SU &&
"Last pick result should correspond to re-picking right now");
}
#endif
}

// Check if the top Q has a better candidate.
LLVM_DEBUG(dbgs() << "Picking from Top:\n");
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(TopCand));
#ifndef NDEBUG
if (VerifyScheduling) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
assert(TCand.SU == TopCand.SU &&
"Last pick result should correspond to re-picking right now");
}
#endif
}

// Pick best from BotCand and TopCand.
LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
dbgs() << "Bot Cand: "; traceCandidate(BotCand););
SchedCandidate Cand = BotCand;
TopCand.Reason = NoCand;
tryCandidate(Cand, TopCand, nullptr);
if (TopCand.Reason != NoCand) {
Cand.setBest(TopCand);
}
LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););

IsTopNode = Cand.AtTop;
return Cand.SU;
}

// This function is mostly cut and pasted from
// GenericScheduler::pickNode()
SUnit *NVPTXSchedStrategy::pickNode(bool &IsTopNode) {
if (DAG->top() == DAG->bottom()) {
assert(Top.Available.empty() && Top.Pending.empty() &&
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
return nullptr;
}
SUnit *SU;
do {
if (RegionPolicy.OnlyTopDown) {
SU = Top.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
TopCand.reset(NoPolicy);
pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
SU = TopCand.SU;
}
IsTopNode = true;
} else if (RegionPolicy.OnlyBottomUp) {
SU = Bot.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
BotCand.reset(NoPolicy);
pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
SU = BotCand.SU;
}
IsTopNode = false;
} else {
SU = pickNodeBidirectional(IsTopNode);
}
} while (SU->isScheduled);

if (SU->isTopReady())
Top.removeReady(SU);
if (SU->isBottomReady())
Bot.removeReady(SU);

LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
<< *SU->getInstr());
return SU;
}

#if 0
static ScheduleDAGInstrs *createNVPTXMachineScheduler(MachineSchedContext *C) {
return new ScheduleDAGMILive(C, std::make_unique<NVPTXSchedStrategy>(C));
}

static MachineSchedRegistry NVPTXSchedRegistry("nvptx-base",
"Run NVPTX's custom scheduler",
createNVPTXMachineScheduler);
#endif
Loading