Skip to content

Commit

Permalink
Merge branch 'master' into c4_update
Browse files Browse the repository at this point in the history
  • Loading branch information
yolanda15 authored Oct 23, 2024
2 parents 290e2c4 + 8731139 commit 2e2670b
Show file tree
Hide file tree
Showing 987 changed files with 6,027 additions and 35,264 deletions.
33 changes: 29 additions & 4 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ MICROKERNEL_HDRS = [
"src/xnnpack/pad.h",
"src/xnnpack/pavgpool.h",
"src/xnnpack/ppmm.h",
"src/xnnpack/prelu.h",
"src/xnnpack/quantization.h",
"src/xnnpack/raddexpminusmax.h",
"src/xnnpack/raddextexp.h",
Expand All @@ -244,9 +243,6 @@ MICROKERNEL_HDRS = [
"src/xnnpack/unpool.h",
"src/xnnpack/vbinary.h",
"src/xnnpack/vcvt.h",
"src/xnnpack/vhswish.h",
"src/xnnpack/vlog.h",
"src/xnnpack/vlrelu.h",
"src/xnnpack/vmulcaddc.h",
"src/xnnpack/vscaleexpminusmax.h",
"src/xnnpack/vscaleextexp.h",
Expand Down Expand Up @@ -323,6 +319,7 @@ xnnpack_cc_library(
deps = [
":common",
":config_hdrs",
":fp16",
":math",
":memory",
":microparams",
Expand Down Expand Up @@ -1247,6 +1244,18 @@ config_setting(
define_values = {"xnn_enable_avx512skx": "false"},
)

# Enables usage of Intel AVX512VBMI (evex512) kernels.
config_setting(
name = "xnn_enable_avx512vbmi_explicit_true",
define_values = {"xnn_enable_avx512vbmi": "true"},
)

# Disables usage of Intel AVX512VBMI (evex512) kernels.
config_setting(
name = "xnn_enable_avx512vbmi_explicit_false",
define_values = {"xnn_enable_avx512vbmi": "false"},
)

# Enables usage of Intel AVX512VNNI (evex512) kernels.
config_setting(
name = "xnn_enable_avx512vnni_explicit_true",
Expand Down Expand Up @@ -1704,6 +1713,22 @@ alias(
}),
)

selects.config_setting_group(
name = "avx512vbmi_enabled_by_default",
match_any = [
"//build_config:x86",
],
)

alias(
name = "avx512vbmi_enabled",
actual = select({
":xnn_enable_avx512vbmi_explicit_true": ":xnn_enable_avx512vbmi_explicit_true",
":xnn_enable_avx512vbmi_explicit_false": ":xnn_enable_avx512vbmi_explicit_true",
"//conditions:default": ":avx512vbmi_enabled_by_default",
}),
)

selects.config_setting_group(
name = "avx512vnni_enabled_by_default",
match_any = [
Expand Down
90 changes: 46 additions & 44 deletions CMakeLists.txt

Large diffs are not rendered by default.

68 changes: 55 additions & 13 deletions bench/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ OPERATOR_BENCHMARK_DEPS = [
"//:math",
]

############################### Helper libraries ###############################

xnnpack_cxx_library(
name = "bench_utils",
srcs = ["utils.cc"],
Expand Down Expand Up @@ -98,8 +100,6 @@ cc_library(
],
)

######################### Benchmarks for micro-kernels #########################

xnnpack_cxx_library(
name = "gemm_benchmark",
srcs = [
Expand All @@ -115,6 +115,29 @@ xnnpack_cxx_library(
],
)

xnnpack_cxx_library(
name = "packw_benchmark",
hdrs = [
"packw-benchmark.h",
],
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
"@com_google_benchmark//:benchmark",
],
)

xnnpack_cxx_library(
name = "bgemm",
hdrs = [
"bgemm.h",
],
deps = MICROKERNEL_BENCHMARK_DEPS + [
"@com_google_benchmark//:benchmark",
],
)

######################### Benchmarks for micro-kernels #########################

[xnnpack_benchmark(
name = "%s_bench" % kernel,
srcs = [
Expand Down Expand Up @@ -167,12 +190,12 @@ xnnpack_cxx_library(
xnnpack_benchmark(
name = "f32_bgemm_bench",
srcs = [
"bgemm.h",
"f32-bgemm.cc",
],
copts = xnnpack_optional_ruy_copts(),
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
"//:allocator",
] + xnnpack_optional_ruy_deps(),
)
Expand All @@ -192,6 +215,19 @@ xnnpack_benchmark(
]),
)

xnnpack_benchmark(
name = "qp8_f32_qb4w_gemm",
srcs = ["qp8-f32-qb4w-gemm.cc"],
defines = xnnpack_kleidiai_defines(),
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":gemm_benchmark",
"//:isa_checks",
] + xnnpack_if_kleidiai_enabled([
"@KleidiAI//kai/ukernels/matmul",
]),
)

[xnnpack_benchmark(
name = "%s_bench" % kernel,
srcs = [
Expand Down Expand Up @@ -292,6 +328,7 @@ xnnpack_benchmark(
srcs = [
"qs8-dwconv.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":dwconv",
"//:indirection",
Expand Down Expand Up @@ -470,22 +507,23 @@ xnnpack_benchmark(
xnnpack_cxx_library(
name = "packq_benchmark",
srcs = [
"bgemm.h",
"packq-benchmark.cc",
],
hdrs = ["packq-benchmark.h"],
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
"@com_google_benchmark//:benchmark",
],
)

xnnpack_benchmark(
name = "x8_packq_bench",
srcs = [
"bgemm.h",
"x8-packq.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packq_benchmark",
"//:allocator",
],
Expand All @@ -494,47 +532,51 @@ xnnpack_benchmark(
xnnpack_benchmark(
name = "x8_packw_bench",
srcs = [
"bgemm.h",
"packw-benchmark.h",
"x8-packw.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packw_benchmark",
"//:allocator",
],
)

xnnpack_benchmark(
name = "qs8_packw_bench",
srcs = [
"bgemm.h",
"packw-benchmark.h",
"qs8-packw.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packw_benchmark",
"//:allocator",
],
)

xnnpack_benchmark(
name = "x16_packw_bench",
srcs = [
"bgemm.h",
"packw-benchmark.h",
"x16-packw.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packw_benchmark",
"//:allocator",
],
)

xnnpack_benchmark(
name = "x32_packw_bench",
srcs = [
"bgemm.h",
"packw-benchmark.h",
"x32-packw.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packw_benchmark",
"//:allocator",
],
)
Expand Down
6 changes: 3 additions & 3 deletions bench/abs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "xnnpack.h"

#include "unary_operator.h"
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack.h"
#include "xnnpack/math.h"
#include <benchmark/benchmark.h>
#ifdef BENCHMARK_TENSORFLOW_LITE
#include "tensorflow/lite/schema/schema_generated.h"
Expand Down
2 changes: 1 addition & 1 deletion bench/average-pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/version.h"
#endif // BENCHMARK_TENSORFLOW_LITE
#include "bench/utils.h"
#include "utils.h"

static void xnnpack_average_pooling_qu8(benchmark::State& state, const char* net) {
const size_t batch_size = state.range(0);
Expand Down
6 changes: 3 additions & 3 deletions bench/bankers-rounding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "xnnpack.h"

#include "unary_operator.h"
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack.h"
#include "xnnpack/math.h"
#include <benchmark/benchmark.h>
#ifdef BENCHMARK_TENSORFLOW_LITE
#include "tensorflow/lite/schema/schema_generated.h"
Expand Down
2 changes: 1 addition & 1 deletion bench/batch-matrix-multiply.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include "xnnpack.h"

#include <benchmark/benchmark.h>
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack/buffer.h"
#ifdef BENCHMARK_TENSORFLOW_LITE
#include "flatbuffers/include/flatbuffers/flatbuffers.h"
Expand Down
4 changes: 2 additions & 2 deletions bench/bf16-gemm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#include <random>
#include <vector>

#include "bench/gemm.h"
#include "bench/utils.h"
#include "gemm.h"
#include "utils.h"
#include "xnnpack.h"
#include "xnnpack/common.h"
#include "xnnpack/gemm.h"
Expand Down
6 changes: 3 additions & 3 deletions bench/ceiling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "xnnpack.h"

#include "unary_operator.h"
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack.h"
#include "xnnpack/math.h"
#include <benchmark/benchmark.h>
#ifdef BENCHMARK_TENSORFLOW_LITE
#include "tensorflow/lite/schema/schema_generated.h"
Expand Down
2 changes: 1 addition & 1 deletion bench/channel-shuffle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "xnnpack.h"

#include <benchmark/benchmark.h>
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack/buffer.h"


Expand Down
14 changes: 5 additions & 9 deletions bench/convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "xnnpack.h"

#include <limits>

#include "unary_operator.h"
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack.h"
#include "xnnpack/math.h"
#include <benchmark/benchmark.h>
#ifdef BENCHMARK_TENSORFLOW_LITE
#include "flatbuffers/include/flatbuffers/flatbuffers.h"
Expand All @@ -33,9 +33,7 @@ void xnnpack_convert_f32_qs8(benchmark::State& state) {
benchmark_unary_operator<float, int8_t>(
[](uint32_t flags, xnn_operator_t* op) {
return xnn_create_convert_nc_f32_qs8(
1.0f / 128.0f /* scale */, 1 /* zero point */,
std::numeric_limits<int8_t>::min(),
std::numeric_limits<int8_t>::max(), flags, op);
1.0f / 128.0f /* scale */, 1 /* zero point */, flags, op);
},
xnn_reshape_convert_nc_f32_qs8, xnn_setup_convert_nc_f32_qs8, state);
}
Expand All @@ -44,9 +42,7 @@ void xnnpack_convert_f32_qu8(benchmark::State& state) {
benchmark_unary_operator<float, uint8_t>(
[](uint32_t flags, xnn_operator_t* op) {
return xnn_create_convert_nc_f32_qu8(
1.0f / 128.0f /* scale */, 127 /* zero point */,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max(), flags, op);
1.0f / 128.0f /* scale */, 127 /* zero point */, flags, op);
},
xnn_reshape_convert_nc_f32_qu8, xnn_setup_convert_nc_f32_qu8, state);
}
Expand Down
2 changes: 1 addition & 1 deletion bench/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/version.h"
#endif // BENCHMARK_TENSORFLOW_LITE
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack/buffer.h"

void xnnpack_convolution_qu8(benchmark::State& state, const char* net) {
Expand Down
2 changes: 1 addition & 1 deletion bench/deconvolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/version.h"
#endif // BENCHMARK_TENSORFLOW_LITE */
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack/buffer.h"

void xnnpack_deconvolution_qu8(benchmark::State& state, const char* net) {
Expand Down
6 changes: 3 additions & 3 deletions bench/elu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "xnnpack.h"

#include <cstdint>
#include <limits>

#include "unary_operator.h"
#include "bench/utils.h"
#include "utils.h"
#include "xnnpack.h"
#include "xnnpack/math.h"
#include <benchmark/benchmark.h>
#ifdef BENCHMARK_TENSORFLOW_LITE
#include "flatbuffers/include/flatbuffers/flatbuffer_builder.h"
Expand Down
Loading

0 comments on commit 2e2670b

Please sign in to comment.