Skip to content

Aco better ready list #172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ endif()
# If asserts are enabled opt-sched must be built with "IS_DEBUG".
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DIS_DEBUG")

add_compile_options(-g)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

Expand Down
17 changes: 11 additions & 6 deletions include/opt-sched/Scheduler/aco.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Last Update: Jan. 2020
#define OPTSCHED_ACO_H

#include "opt-sched/Scheduler/gen_sched.h"
#include "opt-sched/Scheduler/simplified_aco_ds.h"
#include "opt-sched/Scheduler/ready_list.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
Expand All @@ -20,8 +22,6 @@ Last Update: Jan. 2020
namespace llvm {
namespace opt_sched {

typedef double pheromone_t;

enum class DCF_OPT {
OFF,
GLOBAL_ONLY,
Expand Down Expand Up @@ -50,7 +50,7 @@ class ACOScheduler : public ConstrainedScheduler {
private:
pheromone_t &Pheromone(SchedInstruction *from, SchedInstruction *to);
pheromone_t &Pheromone(InstCount from, InstCount to);
pheromone_t Score(SchedInstruction *from, Choice choice);
pheromone_t Score(InstCount FromId, InstCount ToId, HeurType ToHeuristic);
bool shouldReplaceSchedule(InstSchedule *OldSched, InstSchedule *NewSched,
bool IsGlobal);
DCF_OPT ParseDCFOpt(const std::string &opt);
Expand All @@ -73,12 +73,17 @@ class ACOScheduler : public ConstrainedScheduler {
llvm::SetVector<SchedInstruction *> &Visited);

// pheromone Graph Debugging end

Choice SelectInstruction(const llvm::ArrayRef<Choice> &ready,
SchedInstruction *lastInst);
InstCount SelectInstruction(SchedInstruction *lastInst);
void UpdatePheromone(InstSchedule *schedule);
void UpdateACOReadyList(SchedInstruction *Inst);
std::unique_ptr<InstSchedule> FindOneSchedule(InstCount TargetRPCost);
llvm::SmallVector<pheromone_t, 0> pheromone_;
//new ds representations
ACOReadyList ReadyLs;
KeysHelper KHelper;
pheromone_t MaxPriorityInv;
InstCount MaxScoringInst;

pheromone_t initialValue_;
bool use_fixed_bias;
int count_;
Expand Down
6 changes: 6 additions & 0 deletions include/opt-sched/Scheduler/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ typedef int64_t Milliseconds;
// Instruction count.
typedef int InstCount;

// type for the aco heuristics and ready list keys
typedef unsigned long HeurType;

// Pheromone type
typedef double pheromone_t;

// A generic sentinel value. Should be used with care.
// TODO(max): Get rid of this in favor of type- or purpose-specific sentinels.
const int INVALID_VALUE = -1;
Expand Down
6 changes: 5 additions & 1 deletion include/opt-sched/Scheduler/gen_sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,18 @@ class ConstrainedScheduler : public InstScheduler {
// Constructs a constrained scheduler for the given machine and dependence
// graph, with the specified upper bound.
ConstrainedScheduler(DataDepGraph *dataDepGraph, MachineModel *machMdl,
InstCount schedUprBound);
InstCount schedUprBound, bool ACOEn=false);
// Deallocates memory used by the scheduler.
virtual ~ConstrainedScheduler();

// Calculates the schedule and returns it in the passed argument.
virtual FUNC_RESULT FindSchedule(InstSchedule *sched, SchedRegion *rgn) = 0;

protected:
// Whether this instance of ConstrainedScheduler is being used with ACO and
// therefore can use the aco optimizations
bool IsACO;

// The data dependence graph to be scheduled.
DataDepGraph *dataDepGraph_;
// The current schedule.
Expand Down
72 changes: 48 additions & 24 deletions include/opt-sched/Scheduler/ready_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,50 @@ Last Update: Sept. 2013
namespace llvm {
namespace opt_sched {

struct PriorityEntry {
uint16_t Width;
uint16_t Offset;
};

class KeysHelper {
public:
KeysHelper(SchedPriorities Prirts) : Priorities(Prirts), Entries{} {};
KeysHelper() : KeysHelper(SchedPriorities{}) {};

// pre-compute region info
void initForRegion(DataDepGraph *DDG);

// compute key
HeurType computeKey(SchedInstruction *Inst, bool IncludeDynamic) const;
HeurType computeKey(const uint64_t *Values) const;

// get information about a keys layout
PriorityEntry getPriorityEntry(int16_t Indx) const { return Entries[Indx]; }

//get the max key size and value
HeurType getKeySizeInBits() const { return KeysSz; }
HeurType getMaxValue() const { return MaxValue; }

private:
// private member variables
// scheduling priorities used for this KeysHelper
SchedPriorities Priorities;

// width and offset info for each priority
PriorityEntry Entries[MAX_SCHED_PRIRTS];

// pre-computed size of all keys for this region
uint16_t KeysSz = 0;

// pre-computed max key value;
HeurType MaxValue = 0;
HeurType MaxNID = 0;
HeurType MaxISO = 0;

// Field to store if this KeyHelper was initialized
bool WasInitialized = false;
};

// A priority list of instruction that are ready to schedule at a given point
// during the scheduling process.
class ReadyList {
Expand Down Expand Up @@ -90,6 +134,9 @@ class ReadyList {
// An ordered vector of priorities
SchedPriorities prirts_;

// The KeysHelper for the key computations
KeysHelper KHelper;

// The priority list containing the actual instructions.
PriorityList<SchedInstruction> prirtyLst_;

Expand All @@ -100,36 +147,13 @@ class ReadyList {
llvm::SmallVector<KeyedEntry<SchedInstruction, unsigned long> *, 0>
keyedEntries_;

// Is there a priority scheme that needs to be changed dynamically
// bool isDynmcPrirty_;

// The maximum values for each part of the priority key.
InstCount maxUseCnt_;
InstCount maxCrtclPath_;
InstCount maxScsrCnt_;
InstCount maxLtncySum_;
InstCount maxNodeID_;
InstCount maxInptSchedOrder_;

unsigned long maxPriority_;

// The number of bits for each part of the priority key.
int16_t useCntBits_;
int16_t crtclPathBits_;
int16_t scsrCntBits_;
int16_t ltncySumBits_;
int16_t nodeID_Bits_;
int16_t inptSchedOrderBits_;
int16_t LUCOffset;

// Adds instructions at the bottom of a given list which have not been added
// to the ready list already.
void AddLatestSubList_(LinkedList<SchedInstruction> *lst);

// Calculates a new priority key given an existing key of size keySize by
// appending bitCnt bits holding the value val, assuming val < maxVal.
static void AddPrirtyToKey_(unsigned long &key, int16_t &keySize,
int16_t bitCnt, unsigned long val,
unsigned long maxVal);
};

} // namespace opt_sched
Expand Down
136 changes: 136 additions & 0 deletions include/opt-sched/Scheduler/simplified_aco_ds.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*******************************************************************************
Description: Implements smaller more performant data structures for ACO
Author: Paul McHugh
Created: Jun. 2021
*******************************************************************************/

#ifndef OPTSCHED_SIMPLIFIED_ACO_H
#define OPTSCHED_SIMPLIFIED_ACO_H

#include "opt-sched/Scheduler/defines.h"
#include "opt-sched/Scheduler/sched_basic_data.h"
#include <cstdint>

namespace llvm {
namespace opt_sched {

//forward declarations to reduce the number of classes that need to be included
class DataDepGraph;

//class for tracking the schedule cycle state
class ACOCycleState {

public:
ACOCycleState(InstCount IssueRate) : MIssueRate(IssueRate), cycle(0), slot(0) {}

//stores the issue rate of the CPU (here for convienience)
const InstCount MIssueRate;

//schedule cycle and slot
InstCount cycle;
InstCount slot;

};

struct ACOReadyListEntry {
InstCount InstId, ReadyOn;
HeurType Heuristic;
pheromone_t Score;
};

//this aco specific readylist stores each ready instruction, its dynamic heuristic score, and the cycle it becomes ready
//It uses a (generous) heuristic to decide how much space to allocate. If that space is exceeded then it gracefully handles it
//by making a larger allocation and copying the data to it. THIS WILL KILL PERFORMANCE(ESPECIALLY ON THE GPU). That is why it
//will also make a report that its heuristic max size was overrun. Strongly consider fixing such warnings
class ACOReadyList {

protected:

//used for the sizing heuristic
InstCount InstrCount;
InstCount PrimaryBufferCapacity;

bool Overflowed;
InstCount CurrentCapacity;
InstCount CurrentSize;

//allocation pointers
InstCount *IntAllocation;
HeurType *HeurAllocation;
pheromone_t *ScoreAllocation;

//pointers to areas in the InstCount allocation that store ready list entry attributes
InstCount *InstrBase;
InstCount *ReadyOnBase;
HeurType *HeurBase;
pheromone_t *ScoreBase;

//function to decide how large the primary buffer's capacity should be
InstCount computePrimaryCapacity(InstCount RegionSize);

public:

ACOReadyList();
explicit ACOReadyList(InstCount RegionSize);
ACOReadyList(const ACOReadyList &Other);
ACOReadyList &operator=(const ACOReadyList &Other);
ACOReadyList(ACOReadyList &&Other) noexcept;
ACOReadyList &operator=(ACOReadyList &&Other) noexcept;
~ACOReadyList();

//used to store the total score of all instructions in the ready list
pheromone_t ScoreSum;

//get the total size of both the primary and fallback allocations
size_t getTotalSizeInBytes() const;

//gets the number of insturctions in the ready list
InstCount getReadyListSize() const { return CurrentSize; }

//IMPORTANT NOTE: ADDING OR REMOVING INSTRUCTIONS CAN/WILL CAUSE THE INSTRUCTIONS IN THE READY LIST TO BE MOVED TO NEW INDICES
//DO NOT RELY ON AN INSTRUCTION'S INDEX IN THE READY LIST STAYING THE SAME FOLLOWING A REOMVAL/INSERTION
//get instruction into at an index
InstCount *getInstIdAtIndex(InstCount Indx) const;
InstCount *getInstReadyOnAtIndex(InstCount Indx) const;
HeurType *getInstHeuristicAtIndex(InstCount Indx) const;
pheromone_t *getInstScoreAtIndex(InstCount Indx) const;

//add a new instruction to the ready list
void addInstructionToReadyList(const ACOReadyListEntry &Entry);
ACOReadyListEntry removeInstructionAtIndex(InstCount Indx);
void clearReadyList();

};

// ----
// ACOReadyList
// ----
inline size_t ACOReadyList::getTotalSizeInBytes() const {
return (2 * sizeof(*IntAllocation) + sizeof(*HeurAllocation) + sizeof(*ScoreAllocation)) * CurrentCapacity;
}

inline InstCount *ACOReadyList::getInstIdAtIndex(InstCount Indx) const {
return InstrBase + Indx;
}

inline InstCount *ACOReadyList::getInstReadyOnAtIndex(InstCount Indx) const {
return ReadyOnBase + Indx;
}

inline HeurType *ACOReadyList::getInstHeuristicAtIndex(InstCount Indx) const {
return HeurBase + Indx;
}

inline pheromone_t *ACOReadyList::getInstScoreAtIndex(InstCount Indx) const {
return ScoreBase + Indx;
}

inline void ACOReadyList::clearReadyList() {
CurrentSize = 0;
ScoreSum = 0;
}

} // namespace opt_sched
} // namespace llvm

#endif
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ set(OPTSCHED_SRCS Scheduler/aco.cpp
Scheduler/relaxed_sched.cpp
Scheduler/sched_basic_data.cpp
Scheduler/sched_region.cpp
Scheduler/simplified_aco_ds.cpp
Scheduler/stats.cpp
Wrapper/OptimizingScheduler.cpp
Wrapper/OptSchedMachineWrapper.cpp
Expand Down
Loading