From 96adfab914679c28b00950d4d41a210980a5c8a5 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 17 Jul 2024 10:56:40 -0600 Subject: [PATCH] Add e2e tests that model switching costs --- build_tools/ci/cpu_comparison/run_test.sh | 9 +++-- .../test_files/matmul_f32_8_4_8.mlir | 26 ++++++++++++++ .../test_files/matmul_f32_8_8_4.mlir | 32 +++++++++++++++++ .../test_files/two_matmul_switching.mlir | 35 +++++++++++++++++++ 4 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 build_tools/ci/cpu_comparison/test_files/matmul_f32_8_4_8.mlir create mode 100644 build_tools/ci/cpu_comparison/test_files/matmul_f32_8_8_4.mlir create mode 100644 build_tools/ci/cpu_comparison/test_files/two_matmul_switching.mlir diff --git a/build_tools/ci/cpu_comparison/run_test.sh b/build_tools/ci/cpu_comparison/run_test.sh index 9e255375b..85620c6f3 100755 --- a/build_tools/ci/cpu_comparison/run_test.sh +++ b/build_tools/ci/cpu_comparison/run_test.sh @@ -148,8 +148,6 @@ fi source $XRT_DIR/setup.sh -# Circumvent xclbin security (no longer needed as of April 2024 XDNA driver) -export XRT_HACK_UNSECURE_LOADING_XCLBIN=1 cd ${OUTPUT_DIR} @@ -329,7 +327,7 @@ function run_test() { --iree-amd-aie-vitis-install-dir=${vitis_path} \ --iree-hal-dump-executable-files-to=$PWD \ --iree-amd-aie-show-invoked-commands \ - --mlir-disable-threading -o ${aie_vmfb}" + --iree-scheduling-optimize-bindings=false -o ${aie_vmfb}" # TODO(newling) The following logic is copied from run_matmul_test.sh, @@ -406,6 +404,11 @@ run_test --test_file ${THIS_DIR}/test_files/matmul_int32.mlir # An example of an arbitrary graph with three matmuls which form three dispatches. run_test --test_file ${THIS_DIR}/test_files/three_matmuls.mlir --function 'three_$mm$' +# tests that model kernel swicting costs +run_test --test_file ${THIS_DIR}/test_files/two_matmul_switching.mlir +run_test --test_file ${THIS_DIR}/test_files/matmul_f32_8_8_4.mlir +run_test --test_file ${THIS_DIR}/test_files/matmul_f32_8_4_8.mlir + # Example of generating a matmul test from a template, and then running it. test_name=${OUTPUT_DIR}/test_from_template.mlir matmul_template_dir=${THIS_DIR}/matmul_template diff --git a/build_tools/ci/cpu_comparison/test_files/matmul_f32_8_4_8.mlir b/build_tools/ci/cpu_comparison/test_files/matmul_f32_8_4_8.mlir new file mode 100644 index 000000000..1f0ad5109 --- /dev/null +++ b/build_tools/ci/cpu_comparison/test_files/matmul_f32_8_4_8.mlir @@ -0,0 +1,26 @@ +// This test is useful to compare against the `two_matmul_switching` when no switching happens +// and we successively call the same matmul. + +// These 2 lines are required by the script which generates input data: +// +// input 8x8xf32 +// input 8x4xf32 + +!A_TYPE = tensor<8x8xf32> +!B_TYPE = tensor<8x4xf32> +!C_TYPE = tensor<8x4xf32> +func.func @matmul_8_4_8(%lhs : !A_TYPE, + %rhs : !B_TYPE) -> !C_TYPE { + %empty = tensor.empty() : !C_TYPE + %cst = arith.constant 0.0 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%empty : !C_TYPE) -> !C_TYPE + %1 = linalg.matmul ins(%lhs, %rhs : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %2 = linalg.matmul ins(%lhs, %1 : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %3 = linalg.matmul ins(%lhs, %2 : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %4 = linalg.matmul ins(%lhs, %3 : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + return %4 : !C_TYPE +} diff --git a/build_tools/ci/cpu_comparison/test_files/matmul_f32_8_8_4.mlir b/build_tools/ci/cpu_comparison/test_files/matmul_f32_8_8_4.mlir new file mode 100644 index 000000000..9427515c7 --- /dev/null +++ b/build_tools/ci/cpu_comparison/test_files/matmul_f32_8_8_4.mlir @@ -0,0 +1,32 @@ +// This test is useful to compare against the `two_matmul_switching` when no switching happens +// and we successively call the same matmul + +// These 2 lines are required by the script which generates input data: +// +// input 8x4xf32 +// input 4x8xf32 + +!A_TYPE = tensor<8x4xf32> +!B_TYPE = tensor<4x8xf32> +!C_TYPE = tensor<8x8xf32> +func.func @matmul_8_8_4(%lhs : !A_TYPE, + %rhs : !B_TYPE) -> !C_TYPE { + %empty = tensor.empty() : !C_TYPE + %cst = arith.constant 0.0 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%empty : !C_TYPE) -> !C_TYPE + %1 = linalg.matmul ins(%lhs, %rhs : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %slice1 = tensor.extract_slice %1[0, 0][4, 8][1, 1] : + !C_TYPE to !B_TYPE + %2 = linalg.matmul ins(%lhs, %slice1 : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %slice2 = tensor.extract_slice %2[0, 0][4, 8][1, 1] : + !C_TYPE to !B_TYPE + %3 = linalg.matmul ins(%lhs, %slice2 : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %slice3 = tensor.extract_slice %3[0, 0][4, 8][1, 1] : + !C_TYPE to !B_TYPE + %4 = linalg.matmul ins(%lhs, %slice3 : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + return %4 : !C_TYPE +} diff --git a/build_tools/ci/cpu_comparison/test_files/two_matmul_switching.mlir b/build_tools/ci/cpu_comparison/test_files/two_matmul_switching.mlir new file mode 100644 index 000000000..3bec22115 --- /dev/null +++ b/build_tools/ci/cpu_comparison/test_files/two_matmul_switching.mlir @@ -0,0 +1,35 @@ +// This test shows switching between two matmuls and is useful to model the switching cost + +// These 2 lines are required by the script which generates input data: +// +// input 8x4xf32 +// input 4x8xf32 + +!A_TYPE = tensor<8x4xf32> +!B_TYPE = tensor<4x8xf32> +!C_TYPE = tensor<8x8xf32> +func.func @matmul_small(%lhs : !A_TYPE, + %rhs : !B_TYPE) -> !A_TYPE { + %empty = tensor.empty() : !C_TYPE + %empty2 = tensor.empty() : !A_TYPE + %cst = arith.constant 0.0 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%empty : !C_TYPE) -> !C_TYPE + %fill2 = linalg.fill ins(%cst : f32) outs(%empty2 : !A_TYPE) -> !A_TYPE + %1 = linalg.matmul ins(%lhs, %rhs : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %2 = linalg.matmul ins(%1, %lhs : !C_TYPE, !A_TYPE) + outs(%fill2 : !A_TYPE) -> !A_TYPE + %3 = linalg.matmul ins(%2, %rhs : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %4 = linalg.matmul ins(%3, %lhs : !C_TYPE, !A_TYPE) + outs(%fill2 : !A_TYPE) -> !A_TYPE + %5 = linalg.matmul ins(%4, %rhs : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %6 = linalg.matmul ins(%5, %lhs : !C_TYPE, !A_TYPE) + outs(%fill2 : !A_TYPE) -> !A_TYPE + %7 = linalg.matmul ins(%6, %rhs : !A_TYPE, !B_TYPE) + outs(%fill : !C_TYPE) -> !C_TYPE + %8 = linalg.matmul ins(%7, %lhs : !C_TYPE, !A_TYPE) + outs(%fill2 : !A_TYPE) -> !A_TYPE + return %8 : !A_TYPE +}