Skip to content

Commit

Permalink
BC changes and autotuner support for GPU backend
Browse files Browse the repository at this point in the history
  • Loading branch information
AjayBrahmakshatriya committed Apr 21, 2020
1 parent 6754d68 commit 08c3ae8
Show file tree
Hide file tree
Showing 27 changed files with 968 additions and 101 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@
.settings/
.idea/
build/
autotune/*.json
*.graphit_bin
*.graphit_sbin
1 change: 1 addition & 0 deletions autotune/compile_gpu.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
python ../build/bin/graphitc.py -a algotorun.gt -f schedule_0 -o test.cu
/usr/local/cuda/bin/nvcc -ccbin /usr/bin/c++ -std=c++11 -I ../src/runtime_lib/ -o test -Xcompiler "-w" -O3 test.cu -DNUM_CTA=80 -DCTA_SIZE=512 -Wno-deprecated-gpu-targets -gencode arch=compute_70,code=sm_70 --use_fast_math -Xptxas "-v -dlcm=ca --maxrregcount=64" -rdc=true -DFRONTIER_MULTIPLIER=3
#/usr/local/cuda/bin/nvcc -ccbin /usr/bin/c++ -std=c++11 -I ../src/runtime_lib/ -o test -Xcompiler "-w" -O3 test.cu -DNUM_CTA=60 -DCTA_SIZE=512 -Wno-deprecated-gpu-targets -gencode arch=compute_61,code=sm_61 --use_fast_math -Xptxas "-v -dlcm=ca --maxrregcount=64" -rdc=true -DFRONTIER_MULTIPLIER=2
41 changes: 41 additions & 0 deletions autotune/gpu_apps/bfs.gt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
element Vertex end
element Edge end

const edges : edgeset{Edge}(Vertex,Vertex) = load (argv[1]);
const vertices : vertexset{Vertex} = edges.getVertices();
const parent : vector{Vertex}(int) = -1;


func updateEdge(src : Vertex, dst : Vertex)
parent[dst] = src;
end

func toFilter(v : Vertex) -> output : bool
output = parent[v] == -1;
end

func reset(v: Vertex)
parent[v] = -1;
end

func main()
for trail in 0:10
var frontier : vertexset{Vertex} = new vertexset{Vertex}(0);
startTimer();
vertices.apply(reset);
var start_vertex : int = atoi(argv[2]);
frontier.addVertex(start_vertex);
parent[start_vertex] = start_vertex;

#s0# while (frontier.getVertexSetSize() != 0)
#s1# var output : vertexset{Vertex} = edges.from(frontier).to(toFilter).applyModified(updateEdge,parent, true);
delete frontier;
frontier = output;
end
var elapsed_time : float = stopTimer();
delete frontier;
print "elapsed time: ";
print elapsed_time;
end
end

55 changes: 55 additions & 0 deletions autotune/gpu_apps/cc.gt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
element Vertex end
element Edge end

const edges : edgeset{Edge}(Vertex,Vertex) = load (argv[1]);

const vertices : vertexset{Vertex} = edges.getVertices();
const IDs : vector{Vertex}(int) = 1;

const update: vector[1](int);

func updateEdge(src : Vertex, dst : Vertex)
var src_id: Vertex = IDs[src];
var dst_id: Vertex = IDs[dst];

IDs[dst_id] min= IDs[src_id];
IDs[src_id] min= IDs[dst_id];
end

func init(v : Vertex)
IDs[v] = v;
end

func pjump(v: Vertex)
var y: Vertex = IDs[v];
var x: Vertex = IDs[y];
if x != y
IDs[v] = x;
update[0] = 1;
end
end

func main()
var n : int = edges.getVertices();
for trail in 0:10
var frontier : vertexset{Vertex} = new vertexset{Vertex}(n);
startTimer();
vertices.apply(init);
#s0# while (frontier.getVertexSetSize() != 0)
#s1# var output: vertexset{Vertex} = edges.from(frontier).applyModified(updateEdge,IDs);
delete frontier;
frontier = output;
update[0] = 1;
while update[0] != 0
update[0] = 0;
vertices.apply(pjump);
end
end
var elapsed_time : float = stopTimer();
delete frontier;
print "elapsed time: ";
print elapsed_time;
end
end


53 changes: 53 additions & 0 deletions autotune/gpu_apps/pagerank.gt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
element Vertex end
element Edge end
const edges : edgeset{Edge}(Vertex,Vertex) = load (argv[1]);
const vertices : vertexset{Vertex} = edges.getVertices();
const old_rank : vector{Vertex}(float) = 1.0/vertices.size();
const new_rank : vector{Vertex}(float) = 0.0;
const out_degree : vector {Vertex}(int) = edges.getOutDegrees();
const contrib : vector{Vertex}(float) = 0.0;
const error : vector{Vertex}(float) = 0.0;
const damp : float = 0.85;
const beta_score : float = (1.0 - damp) / vertices.size();

func computeContrib(v : Vertex)
contrib[v] = old_rank[v] / out_degree[v];
end

func updateEdge(src : Vertex, dst : Vertex)
new_rank[dst] += contrib[src];
end

func updateVertex(v : Vertex)
var old_score : float = old_rank[v];
new_rank[v] = beta_score + damp*(new_rank[v]);
error[v] = fabs(new_rank[v] - old_rank[v]);
old_rank[v] = new_rank[v];
new_rank[v] = 0.0;
end

func printRank(v : Vertex)
print old_rank[v];
end

func reset(v: Vertex)
old_rank[v] = 1.0/vertices.size();
new_rank[v] = 0.0;
end

func main()
for trail in 0:10
startTimer();
vertices.apply(reset);
#s0# for i in 0:20
vertices.apply(computeContrib);
#s1# edges.apply(updateEdge);
vertices.apply(updateVertex);
end

var elapsed_time : float = stopTimer();
print "elapsed time: ";
print elapsed_time;
end
end

138 changes: 104 additions & 34 deletions autotune/graphit_gpu_autotuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,57 +29,117 @@ def manipulator(self):
Define the search space by creating a
ConfigurationManipulator
"""
manipulator = ConfigurationManipulator()
if self.args.edge_only:
#manipulator.add_parameter(EnumParameter('LB_0', ['VERTEX_BASED','TWC', 'TWCE', 'WM', 'CM', 'STRICT', 'EDGE_ONLY']))
manipulator.add_parameter(EnumParameter('LB_0', ['VERTEX_BASED','TWC', 'TWCE', 'WM', 'CM', 'EDGE_ONLY']))
manipulator.add_parameter(EnumParameter('EB_0', ['ENABLED', 'DISABLED']))
manipulator.add_parameter(IntegerParameter('BS_0', 1, 20))
else:
#manipulator.add_parameter(EnumParameter('LB_0', ['VERTEX_BASED','TWC', 'TWCE', 'WM', 'CM', 'STRICT']))
manipulator.add_parameter(EnumParameter('LB_0', ['VERTEX_BASED','TWC', 'TWCE', 'WM', 'CM']))

manipulator.add_parameter(EnumParameter('direction_0', ['PUSH', 'PULL']))
manipulator.add_parameter(EnumParameter('dedup_0', ['ENABLED', 'DISABLED']))
manipulator.add_parameter(EnumParameter('frontier_output_0', ['FUSED', 'UNFUSED_BITMAP', 'UNFUSED_BOOLMAP']))
manipulator.add_parameter(EnumParameter('pull_rep_0', ['BITMAP', 'BOOLMAP']))

if self.args.hybrid_schedule:
#manipulator.add_parameter(EnumParameter('LB_1', ['VERTEX_BASED','TWC', 'TWCE', 'WM', 'CM', 'STRICT']))
manipulator.add_parameter(EnumParameter('LB_1', ['VERTEX_BASED','TWC', 'TWCE', 'WM', 'CM']))

manipulator.add_parameter(EnumParameter('direction_1', ['PUSH', 'PULL']))
manipulator.add_parameter(EnumParameter('dedup_1', ['ENABLED', 'DISABLED']))
manipulator.add_parameter(EnumParameter('frontier_output_1', ['FUSED', 'UNFUSED_BITMAP', 'UNFUSED_BOOLMAP']))
manipulator.add_parameter(EnumParameter('pull_rep_1', ['BITMAP', 'BOOLMAP']))

# We also choose the hybrid schedule threshold here
manipulator.add_parameter(IntegerParameter('threshold', 0, 1000))



manipulator = ConfigurationManipulator()
manipulator.add_parameter(
EnumParameter('LB',
['VERTEX_BASED','TWC', 'TWCE', 'WM', 'CM', 'STRICT']))

#'edge-aware-dynamic-vertex-parallel' not supported with the latest g++ cilk implementation
manipulator.add_parameter(EnumParameter('direction', ['PUSH', 'PULL']))
manipulator.add_parameter(EnumParameter('dedup', ['ENABLED', 'DISABLED']))
manipulator.add_parameter(EnumParameter('frontier_output', ['FUSED', 'UNFUSED_BITMAP', 'UNFUSED_BOOLMAP']))
# adding new parameters for PriorityGraph (Ordered GraphIt)
manipulator.add_parameter(IntegerParameter('delta', 1, self.args.max_delta))
# Currently since delta is allowed to be configured only once for the entire program, we will make a single decision even if the schedule is hybrid
if self.args.tune_delta:
manipulator.add_parameter(IntegerParameter('delta', 1, self.args.max_delta))


if self.args.kernel_fusion:
manipulator.add_parameter(EnumParameter('kernel_fusion', ['DISABLED', 'ENABLED']))

manipulator.add_parameter(EnumParameter('kernel_fusion', ['DISABLED', 'ENABLED']))
manipulator.add_parameter(EnumParameter('pull_rep', ['BITMAP', 'BOOLMAP']))
return manipulator


def write_cfg_to_schedule(self, cfg):
#write into a schedule file the configuration
direction = cfg['direction']
delta = cfg['delta']
dedup = cfg['dedup']
frontier_output = cfg['frontier_output']
kernel_fusion = cfg['kernel_fusion']
pull_rep = cfg['pull_rep']
LB = cfg['LB']

direction_0 = cfg['direction_0']
if self.args.tune_delta:
delta_0 = cfg['delta']
dedup_0 = cfg['dedup_0']
frontier_output_0 = cfg['frontier_output_0']
pull_rep_0 = cfg['pull_rep_0']
LB_0 = cfg['LB_0']

new_schedule = "schedule:\n"

new_schedule += "SimpleGPUSchedule s1;\n";
new_schedule += "s1.configLoadBalance(" + LB + ");\n"
new_schedule += "s1.configFrontierCreation(" + frontier_output + ");\n"
if direction == "PULL":
new_schedule += "s1.configDirection(PULL, " + pull_rep + ");\n"
if LB_0 == "EDGE_ONLY" and cfg['EB_0'] == "ENABLED":
new_schedule += "s1.configLoadBalance(EDGE_ONLY, BLOCKED, " + str(int(int(self.args.num_vertices)/cfg['BS_0'])) + ");\n"
direction_0 = "PUSH"
else:
new_schedule += "s1.configLoadBalance(" + LB_0 + ");\n"
new_schedule += "s1.configFrontierCreation(" + frontier_output_0 + ");\n"
if direction_0 == "PULL":
new_schedule += "s1.configDirection(PULL, " + pull_rep_0 + ");\n"
else:
new_schedule += "s1.configDirection(PUSH);\n"
new_schedule += "s1.configDelta(" + str(delta) + ");\n"
new_schedule += "s1.configDeduplication(" + dedup + ");\n"
new_schedule += "program->applyGPUSchedule(\"s0:s1\", s1);\n"
new_schedule += "SimpleGPUSchedule s0;\n"
new_schedule += "s0.configKernelFusion(" + kernel_fusion + ");\n"
# We will currently not apply this. Use this after kernel fusion is fixed
#new_schedule += "program->applyGPUSchedule(\"s0\", s0);\n"
if self.args.tune_delta:
new_schedule += "s1.configDelta(" + str(delta_0) + ");\n"
new_schedule += "s1.configDeduplication(" + dedup_0 + ");\n"

if self.args.hybrid_schedule:
direction_1 = cfg['direction_1']
if self.args.tune_delta:
delta_1 = cfg['delta']
dedup_1 = cfg['dedup_1']
frontier_output_1 = cfg['frontier_output_1']
pull_rep_1 = cfg['pull_rep_1']
LB_1 = cfg['LB_1']

#threshold = self.args.hybrid_threshold
threshold = cfg['threshold']

new_schedule += "SimpleGPUSchedule s2;\n";
new_schedule += "s2.configLoadBalance(" + LB_1 + ");\n"
new_schedule += "s2.configFrontierCreation(" + frontier_output_1 + ");\n"
if direction_1 == "PULL":
new_schedule += "s2.configDirection(PULL, " + pull_rep_1 + ");\n"
else:
new_schedule += "s2.configDirection(PUSH);\n"
if self.args.tune_delta:
new_schedule += "s2.configDelta(" + str(delta_1) + ");\n"
new_schedule += "s2.configDeduplication(" + dedup_1 + ");\n"

new_schedule += "HybridGPUSchedule h1(INPUT_VERTEXSET_SIZE, " + str(threshold/1000) + ", s1, s2);\n"
new_schedule += "program->applyGPUSchedule(\"s0:s1\", h1);\n"

else:
new_schedule += "program->applyGPUSchedule(\"s0:s1\", s1);\n"



if self.args.kernel_fusion:
kernel_fusion = cfg['kernel_fusion']
new_schedule += "SimpleGPUSchedule s0;\n"
new_schedule += "s0.configKernelFusion(" + kernel_fusion + ");\n"
new_schedule += "program->applyGPUSchedule(\"s0\", s0);\n"

print (cfg)
print (new_schedule)
#print (new_schedule)

self.new_schedule_file_name = 'schedule_0'
print (self.new_schedule_file_name)
#print (self.new_schedule_file_name)
f1 = open (self.new_schedule_file_name, 'w')
f1.write(new_schedule)
f1.close()
Expand Down Expand Up @@ -174,7 +234,7 @@ def compile_and_run(self, desired_result, input, limit):
Compile and run a given configuration then
return performance
"""
print ("input graph: " + self.args.graph)
# print ("input graph: " + self.args.graph)

cfg = desired_result.configuration.data

Expand All @@ -190,7 +250,7 @@ def compile_and_run(self, desired_result, input, limit):
def save_final_config(self, configuration):
"""called at the end of tuning"""
print ('Final Configuration:', configuration.data)
self.manipulator().save_to_file(configuration.data,'final_config.json')
self.manipulator().save_to_file(configuration.data, self.args.final_config)



Expand All @@ -200,11 +260,21 @@ def save_final_config(self, configuration):
parser.add_argument('--start_vertex', type=str, default="0", help="Start vertex if applicable")

parser.add_argument('--algo_file', type=str, required=True, help='input algorithm file')
parser.add_argument('--final_config', type=str, help='Final config file', default="final_config.json")
parser.add_argument('--default_schedule_file', type=str, required=False, default="", help='default schedule file')
parser.add_argument('--runtime_limit', type=float, default=300, help='a limit on the running time of each program')
parser.add_argument('--max_delta', type=int, default=800000, help='maximum delta used for priority coarsening')
parser.add_argument('--memory_limit', type=int, default=-1,help='set memory limit on unix based systems [does not quite work yet]')
parser.add_argument('--killed_process_report_runtime_limit', type=int, default=0, help='reports runtime_limit when a process is killed by the shell. 0 for disable (default), 1 for enable')

parser.add_argument('--kernel_fusion', type=bool, default=False, help='Choose if you want to also tune kernel fusion')
parser.add_argument('--hybrid_schedule', type=bool, default=False, help='Choose if you want to also explore hybrid schedules')
parser.add_argument('--edge_only', type=bool, default=False, help='Choose if you want to also enable EDGE_ONLY schedules')
parser.add_argument('--num_vertices', type=int, required=True, help='Supply number of vertices in the graph')
parser.add_argument('--tune_delta', type=bool, default=False, help='Also tune the delta parameter')
parser.add_argument('--hybrid_threshold', type=int, default=1000, help='Threshold value on 1000')


args = parser.parse_args()
# pass the argumetns into the tuner
GraphItTuner.main(args)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class AssignFunctionContext : mir::MIRVisitor {
void visit(mir::UpdatePriorityEdgeSetApplyExpr::Ptr);
void visit(mir::PullEdgeSetApplyExpr::Ptr);
void visit(mir::VertexSetApplyExpr::Ptr);
void visit(mir::VertexSetWhereExpr::Ptr);
private:
MIRContext *mir_context_;
};
Expand Down
11 changes: 9 additions & 2 deletions include/graphit/backend/codegen_gpu/codegen_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class CodeGenGPU: public mir::MIRVisitor{
void genPropertyArrayAlloca(mir::VarDecl::Ptr);

void genFusedWhileLoop(mir::WhileStmt::Ptr);
void genEdgeSetApplyExpr(mir::EdgeSetApplyExpr::Ptr, mir::Expr::Ptr);
virtual void genEdgeSetApplyExpr(mir::EdgeSetApplyExpr::Ptr, mir::Expr::Ptr);

EdgesetApplyFunctionDeclGenerator* edgeset_apply_func_gen_;

Expand Down Expand Up @@ -142,6 +142,12 @@ class CodeGenGPU: public mir::MIRVisitor{

virtual void visit(mir::EnqueueVertex::Ptr) override;

virtual void visit(mir::VertexSetWhereExpr::Ptr) override;


virtual void visit(mir::ListType::Ptr) override;
virtual void visit(mir::ListAllocExpr::Ptr) override;

void genPriorityUpdateOperator(mir::PriorityUpdateOperator::Ptr);

};
Expand Down Expand Up @@ -179,14 +185,15 @@ class CodeGenGPUFusedKernel: public CodeGenGPU {
}
current_while_stmt->used_priority_queues.push_back(var);
}
void genEdgeSetApplyExpr(mir::EdgeSetApplyExpr::Ptr, mir::Expr::Ptr);
virtual void genEdgeSetApplyExpr(mir::EdgeSetApplyExpr::Ptr, mir::Expr::Ptr) override;
virtual void visit(mir::StmtBlock::Ptr) override;
virtual void visit(mir::AssignStmt::Ptr) override;
virtual void visit(mir::VarDecl::Ptr) override;
virtual void visit(mir::VarExpr::Ptr) override;
virtual void visit(mir::PrintStmt::Ptr) override;
virtual void visit(mir::HybridGPUStmt::Ptr) override;
virtual void visit(mir::VertexSetDedupExpr::Ptr) override;
virtual void visit(mir::VertexSetApplyExpr::Ptr) override;

std::string var_name (std::string var) {
//return current_kernel_name + "_" + var;
Expand Down
Loading

0 comments on commit 08c3ae8

Please sign in to comment.