Skip to content

Commit

Permalink
Testcases: Down adjust L2/L3 cache size for GPU targets
Browse files Browse the repository at this point in the history
  • Loading branch information
antonysigma committed Sep 6, 2024
1 parent 06a6219 commit de0a195
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 4 deletions.
6 changes: 5 additions & 1 deletion apps/bilateral_grid/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ add_halide_library(bilateral_grid_auto_schedule FROM bilateral_grid.generator
GENERATOR bilateral_grid
STMT bilateral_grid_auto_schedule_STMT
SCHEDULE bilateral_grid_auto_schedule_SCHEDULE
AUTOSCHEDULER Halide::Mullapudi2016)
AUTOSCHEDULER Halide::Mullapudi2016
# When target=host-cuda or host-metal, limit the GPU shared
# memory per block to avoid gpu kernel launch failure.
PARAMS autoscheduler.last_level_cache_size=20000
)

# Main executable
add_executable(bilateral_grid_process filter.cpp)
Expand Down
6 changes: 5 additions & 1 deletion apps/local_laplacian/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ add_halide_generator(local_laplacian.generator
add_halide_library(local_laplacian FROM local_laplacian.generator)
add_halide_library(local_laplacian_auto_schedule FROM local_laplacian.generator
GENERATOR local_laplacian
AUTOSCHEDULER Halide::Mullapudi2016)
AUTOSCHEDULER Halide::Mullapudi2016
# When target=host-cuda or host-metal, limit the GPU shared
# memory per block to avoid gpu kernel launch failure.
PARAMS autoscheduler.last_level_cache_size=30000
)

# Main executable
add_executable(local_laplacian_process process.cpp)
Expand Down
6 changes: 5 additions & 1 deletion apps/stencil_chain/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ add_halide_generator(stencil_chain.generator SOURCES stencil_chain_generator.cpp
add_halide_library(stencil_chain FROM stencil_chain.generator)
add_halide_library(stencil_chain_auto_schedule FROM stencil_chain.generator
GENERATOR stencil_chain
AUTOSCHEDULER Halide::Mullapudi2016)
AUTOSCHEDULER Halide::Mullapudi2016
# When target=host-cuda or host-metal, limit the GPU shared
# memory per block to avoid gpu kernel launch failure.
PARAMS autoscheduler.last_level_cache_size=15000
)

# Main executable
add_executable(stencil_chain_process process.cpp)
Expand Down
2 changes: 1 addition & 1 deletion src/autoschedulers/mullapudi2016/AutoSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct ArchParams {
* CACHE_SIZE to 48 KB.
*/
constexpr ArchParams(bool has_gpu_feature)
: parallelism(has_gpu_feature ? 128 : 16), last_level_cache_size(has_gpu_feature ? 48 * 1024 : 16 * 1024 * 1024),
: parallelism(has_gpu_feature ? 128 : 16), last_level_cache_size(has_gpu_feature ? 35 * 1024 : 16 * 1024 * 1024),
balance(has_gpu_feature ? 20 : 40) {
}
};
Expand Down

0 comments on commit de0a195

Please sign in to comment.