Skip to content

Commit

Permalink
rework2
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreyPavlenko committed Nov 13, 2024
1 parent 0ca7a92 commit cdfd5b3
Show file tree
Hide file tree
Showing 7 changed files with 282 additions and 216 deletions.
20 changes: 5 additions & 15 deletions include/gc/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,10 @@ def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
def GpuTilingAndFusion : Pass<"gpu-tiling", "func::FuncOp"> {
let summary = "GPU tiling and fusion path.";
let description = [{
This pass tiles linalg operations and creates an inner loop that is mapped to the block sizes, when converting
to gpu.launch. The tiles calculation is based on the GPU device properties, retrieved from the DLTI attributes.
If the DLTI attributes are not specified, defaults to the pass options.
This pass tiles linalg operations and creates two nested csf.forall loops. When converting to gpu.launch,
the inner loop is mapped to the block sizes and the outer - to grid sizes. The tiles calculation is based
on the GPU device properties, retrieved from the DLTI attributes. If the DLTI attributes are not specified,
defaults to the pass options.
}];
let options = [
Option<"numEus", "num-eus", "size_t",
Expand All @@ -143,18 +144,7 @@ def GpuTilingAndFusion : Pass<"gpu-tiling", "func::FuncOp"> {
"Execution Unit cache size.">,
Option<"vectorWidth", "vector-width", "size_t",
/*default=*/"512",
"The maximum width of EU's vector registers.">
];
}

def GpuLoopTiling : Pass<"gpu-loop-tiling", "func::FuncOp"> {
let summary = "Create nested parallel loops to be mapped to GPU.";
let description = [{
This pass tiles the loops created by the GpuTilingAndFusion pass and converted to parallel loops. The tiles
calculation is based on the max_work_group_size DLTI attribute. If the attribute is not specified,
defaults to the pass options.
}];
let options = [
"The maximum width of EU's vector registers.">,
Option<"workGroupSize", "work-group-size", "size_t",
/*default=*/"64",
"The maximum workgroup size.">
Expand Down
1 change: 0 additions & 1 deletion lib/gc/Transforms/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ set_property(GLOBAL APPEND PROPERTY IMEX_LIBS ${IMEX_LIBS})
gc_add_mlir_library(GcGpuPasses
AddContextArg.cpp
AllocsToSLM.cpp
GpuLoopTiling.cpp
GpuTilingAndFusion.cpp
GpuToGpuOcl.cpp
LinalgToXeGPU.cpp
Expand Down
69 changes: 0 additions & 69 deletions lib/gc/Transforms/GPU/GpuLoopTiling.cpp

This file was deleted.

Loading

0 comments on commit cdfd5b3

Please sign in to comment.