From edd5da7ab5b2f5265e88333182fefa1c31d7b261 Mon Sep 17 00:00:00 2001 From: Misha Gutman Date: Thu, 24 Oct 2024 05:54:40 -0700 Subject: [PATCH] Replaced gavgpool and gsumpool with static_reduce. PiperOrigin-RevId: 689348265 --- CMakeLists.txt | 10 - bench/BUILD.bazel | 9 - bench/f16-gavgpool-cw.cc | 76 - bench/f32-gavgpool-cw.cc | 106 -- bench/global-average-pooling.cc | 322 ----- build_srcs.bzl | 5 - cmake/gen/neon_microkernels.cmake | 1 - cmake/gen/neonfp16arith_microkernels.cmake | 1 - cmake/gen/scalar_microkernels.cmake | 1 - cmake/gen/sse_microkernels.cmake | 1 - cmake/gen/wasmsimd_microkernels.cmake | 2 - gen/neon_microkernels.bzl | 1 - gen/neonfp16arith_microkernels.bzl | 1 - gen/scalar_microkernels.bzl | 1 - gen/sse_microkernels.bzl | 1 - gen/wasmsimd_microkernels.bzl | 2 - include/xnnpack.h | 190 +-- scripts/generate-tests.sh | 4 - src/configs/gavgpool-cw-config.c | 103 -- .../f16-gavgpool-cw-neonfp16arith-u8.c | 78 - src/f32-gavgpool-cw/f32-gavgpool-cw-neon-u4.c | 74 - .../f32-gavgpool-cw-scalar-u1.c | 58 - src/f32-gavgpool-cw/f32-gavgpool-cw-sse-u4.c | 121 -- .../f32-gavgpool-cw-wasmsimd-arm-u4.c | 120 -- .../f32-gavgpool-cw-wasmsimd-x86-u4.c | 120 -- src/operators/global-average-pooling-ncw.c | 373 ----- src/operators/global-average-pooling-nwc.c | 851 ----------- src/subgraph/deprecated.c | 120 ++ src/subgraph/global-average-pooling.c | 442 ------ src/subgraph/global-sum-pooling.c | 315 ---- src/xnnpack/config.h | 3 - src/xnnpack/operator-type-defs.h | 8 - test/BUILD.bazel | 47 - test/f16-gavgpool-cw.cc | 89 -- test/f16-gavgpool-cw.yaml | 8 - test/f32-gavgpool-cw.cc | 349 ----- test/f32-gavgpool-cw.yaml | 22 - test/gavgpool-cw-microkernel-tester.h | 196 --- test/global-average-pooling-1d.cc | 481 +------ test/global-average-pooling-2d.cc | 487 +------ test/global-average-pooling-ncw.cc | 102 -- test/global-average-pooling-nwc.cc | 1261 ----------------- test/global-average-pooling-operator-tester.h | 731 ---------- test/global-sum-pooling-1d.cc | 294 +--- test/global-sum-pooling-2d.cc | 301 +--- test/global-sum-pooling-nwc.cc | 483 ------- test/global-sum-pooling-operator-tester.h | 301 ---- tools/generate-gavgpool-cw-test.py | 188 --- 48 files changed, 198 insertions(+), 8662 deletions(-) delete mode 100644 bench/f16-gavgpool-cw.cc delete mode 100644 bench/f32-gavgpool-cw.cc delete mode 100644 bench/global-average-pooling.cc delete mode 100644 src/configs/gavgpool-cw-config.c delete mode 100644 src/f16-gavgpool-cw/f16-gavgpool-cw-neonfp16arith-u8.c delete mode 100644 src/f32-gavgpool-cw/f32-gavgpool-cw-neon-u4.c delete mode 100644 src/f32-gavgpool-cw/f32-gavgpool-cw-scalar-u1.c delete mode 100644 src/f32-gavgpool-cw/f32-gavgpool-cw-sse-u4.c delete mode 100644 src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-arm-u4.c delete mode 100644 src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-x86-u4.c delete mode 100644 src/operators/global-average-pooling-ncw.c delete mode 100644 src/operators/global-average-pooling-nwc.c delete mode 100644 src/subgraph/global-average-pooling.c delete mode 100644 src/subgraph/global-sum-pooling.c delete mode 100644 test/f16-gavgpool-cw.cc delete mode 100644 test/f16-gavgpool-cw.yaml delete mode 100644 test/f32-gavgpool-cw.cc delete mode 100644 test/f32-gavgpool-cw.yaml delete mode 100644 test/gavgpool-cw-microkernel-tester.h delete mode 100644 test/global-average-pooling-ncw.cc delete mode 100644 test/global-average-pooling-nwc.cc delete mode 100644 test/global-average-pooling-operator-tester.h delete mode 100644 test/global-sum-pooling-nwc.cc delete mode 100644 test/global-sum-pooling-operator-tester.h delete mode 100755 tools/generate-gavgpool-cw-test.py diff --git a/CMakeLists.txt b/CMakeLists.txt index ac7e8c2250a..7b26c530497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -425,8 +425,6 @@ SET(OPERATOR_SRCS src/operators/deconvolution-nhwc.c src/operators/dynamic-fully-connected-nc.c src/operators/fully-connected-nc.c - src/operators/global-average-pooling-ncw.c - src/operators/global-average-pooling-nwc.c src/operators/lut-elementwise-nc.c src/operators/max-pooling-nhwc.c src/operators/pack-lh.c @@ -468,8 +466,6 @@ SET(SUBGRAPH_SRCS src/subgraph/fully-connected-sparse.c src/subgraph/fully-connected.c src/subgraph/gelu.c - src/subgraph/global-average-pooling.c - src/subgraph/global-sum-pooling.c src/subgraph/hardswish.c src/subgraph/leaky-relu.c src/subgraph/log.c @@ -512,7 +508,6 @@ SET(XNNPACK_SRCS src/configs/dwconv2d-chw-config.c src/configs/experiments-config.c src/configs/gavgpool-config.c - src/configs/gavgpool-cw-config.c src/configs/gemm-config.c src/configs/ibilinear-chw-config.c src/configs/ibilinear-config.c @@ -1480,7 +1475,6 @@ IF(XNNPACK_BUILD_TESTS) f16-conv-hwc2chw f16-f32acc-rdsum f16-f32acc-rsum - f16-gavgpool-cw f16-gavgpool-minmax f16-ibilinear-chw f16-ibilinear @@ -1492,7 +1486,6 @@ IF(XNNPACK_BUILD_TESTS) f16-vmulcaddc-minmax f32-conv-hwc f32-conv-hwc2chw - f32-gavgpool-cw f32-gavgpool-minmax f32-ibilinear-chw f32-ibilinear @@ -1950,7 +1943,6 @@ IF(XNNPACK_BUILD_BENCHMARKS) deconvolution elu floor - global-average-pooling hardswish leaky-relu max-pooling @@ -1983,7 +1975,6 @@ IF(XNNPACK_BUILD_BENCHMARKS) f16-f32acc-igemm f16-f32acc-rdsum f16-f32acc-rsum - f16-gavgpool-cw f16-gemm f16-gemm-minmax f16-igemm @@ -1999,7 +1990,6 @@ IF(XNNPACK_BUILD_BENCHMARKS) f32-dwconv f32-dwconv2d-chw f32-f16-vcvt - f32-gavgpool-cw f32-gemm f32-gemm-goi-minmax f32-gemm-minmax diff --git a/bench/BUILD.bazel b/bench/BUILD.bazel index 447555f1a53..8efbd1f459c 100644 --- a/bench/BUILD.bazel +++ b/bench/BUILD.bazel @@ -277,12 +277,10 @@ xnnpack_benchmark( ], deps = MICROKERNEL_BENCHMARK_DEPS, ) for kernel in [ - "f16_gavgpool_cw", "f16_raddstoreexpminusmax", "f16_rmax", "f16_rminmax", "f16_rmin", - "f32_gavgpool_cw", "f32_raddexpminusmax", "f32_raddextexp", "f32_raddstoreexpminusmax", @@ -641,13 +639,6 @@ xnnpack_benchmark( deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(), ) -xnnpack_benchmark( - name = "global_average_pooling_bench", - srcs = ["global-average-pooling.cc"], - tags = xnnpack_slow_benchmark_tags(), - deps = OPERATOR_BENCHMARK_DEPS, -) - xnnpack_benchmark( name = "max_pooling_bench", srcs = ["max-pooling.cc"], diff --git a/bench/f16-gavgpool-cw.cc b/bench/f16-gavgpool-cw.cc deleted file mode 100644 index b8913fffb10..00000000000 --- a/bench/f16-gavgpool-cw.cc +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2022 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include -#include -#include - -#include "utils.h" -#include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" -#include "xnnpack/gavgpool.h" -#include "xnnpack/math.h" -#include "xnnpack/microfnptr.h" -#include "xnnpack/microparams-init.h" -#include - -void f16_gavgpool_cw( - benchmark::State& state, - xnn_f16_gavgpool_cw_ukernel_fn gavgpool_cw, - xnn_init_f16_gavgpool_neon_params_fn init_params, - benchmark::utils::IsaCheckFunction isa_check = nullptr) -{ - if (isa_check && !isa_check(state)) { - return; - } - const size_t channels = state.range(0); - const size_t elements = state.range(1); - - std::vector> input(elements * channels + XNN_EXTRA_BYTES / sizeof(xnn_float16)); - std::vector output(channels); - std::iota(input.begin(), input.end(), 0); - - // Prepare parameters. - union xnn_f16_gavgpool_params params; - init_params(¶ms, - UINT16_C(0x3C00) /* scale */, UINT16_C(0xFC00) /* -inf */, UINT16_C(0x7C00) /* inf */, elements); - - for (auto _ : state) { - gavgpool_cw(elements, channels, input.data(), output.data(), ¶ms); - } - - const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); - if (cpu_frequency != 0) { - state.counters["cpufreq"] = cpu_frequency; - } -} - -static void BenchmarkBatch(benchmark::internal::Benchmark* b) -{ - b->ArgNames({"channels", "elements"}); - b->Args({1, 1024}); - b->Args({2, 1024}); - b->Args({4, 1024}); - b->Args({6, 1024}); - b->Args({8, 1024}); - b->Args({16, 1024}); - b->Args({1024, 1024}); -} - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - BENCHMARK_CAPTURE(f16_gavgpool_cw, f16_neon_u8, - xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8, - xnn_init_f16_gavgpool_scalar_params, - benchmark::utils::CheckNEONFP16ARITH) - ->Apply(BenchmarkBatch) - ->UseRealTime(); -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - -#ifndef XNNPACK_BENCHMARK_NO_MAIN -BENCHMARK_MAIN(); -#endif diff --git a/bench/f32-gavgpool-cw.cc b/bench/f32-gavgpool-cw.cc deleted file mode 100644 index fd19411217b..00000000000 --- a/bench/f32-gavgpool-cw.cc +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2022 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include -#include -#include -#include - -#include "utils.h" -#include - -#include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" -#include "xnnpack/gavgpool.h" -#include "xnnpack/microfnptr.h" -#include "xnnpack/microparams-init.h" - - -void f32_gavgpool_cw( - benchmark::State& state, - xnn_f32_gavgpool_cw_ukernel_fn gavgpool_cw, - xnn_init_f32_gavgpool_params_fn init_params, - benchmark::utils::IsaCheckFunction isa_check = nullptr) -{ - if (isa_check != nullptr && !isa_check(state)) { - return; - } - const size_t channels = state.range(0); - const size_t elements = state.range(1); - - std::vector> input(elements * channels + XNN_EXTRA_BYTES / sizeof(float)); - std::vector output(channels); - std::iota(input.begin(), input.end(), 0.0f); - - // Prepare parameters. - union xnn_f32_gavgpool_params params; - init_params(¶ms, - 1.0f /* scale */, -std::numeric_limits::infinity(), +std::numeric_limits::infinity(), elements); - - for (auto _ : state) { - gavgpool_cw(elements, channels, input.data(), output.data(), ¶ms); - } - - const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); - if (cpu_frequency != 0) { - state.counters["cpufreq"] = cpu_frequency; - } -} - -static void BenchmarkBatch(benchmark::internal::Benchmark* b) -{ - b->ArgNames({"channels", "elements"}); - b->Args({1, 1024}); - b->Args({2, 1024}); - b->Args({4, 1024}); - b->Args({6, 1024}); - b->Args({8, 1024}); - b->Args({16, 1024}); - b->Args({1024, 1024}); -} - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - BENCHMARK_CAPTURE(f32_gavgpool_cw, f32_neon_u4, - xnn_f32_gavgpool_cw_ukernel__neon_u4, - xnn_init_f32_gavgpool_neon_params, - benchmark::utils::CheckNEON) - ->Apply(BenchmarkBatch) - ->UseRealTime(); -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - BENCHMARK_CAPTURE(f32_gavgpool_cw, f32_sse_u4, - xnn_f32_gavgpool_cw_ukernel__sse_u4, - xnn_init_f32_gavgpool_sse_params) - ->Apply(BenchmarkBatch) - ->UseRealTime(); -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - -#if XNN_ARCH_WASMSIMD - BENCHMARK_CAPTURE(f32_gavgpool_cw, f32_wasmsimd_arm_u4, - xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4, - xnn_init_f32_gavgpool_scalar_params) - ->Apply(BenchmarkBatch) - ->UseRealTime(); - BENCHMARK_CAPTURE(f32_gavgpool_cw, f32_wasmsimd_x86_u4, - xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4, - xnn_init_f32_gavgpool_scalar_params) - ->Apply(BenchmarkBatch) - ->UseRealTime(); -#endif // XNN_ARCH_WASMSIMD - -BENCHMARK_CAPTURE(f32_gavgpool_cw, f32_scalar_u1, - xnn_f32_gavgpool_cw_ukernel__scalar_u1, - xnn_init_f32_gavgpool_scalar_params) - ->Apply(BenchmarkBatch) - ->UseRealTime(); - - -#ifndef XNNPACK_BENCHMARK_NO_MAIN -BENCHMARK_MAIN(); -#endif diff --git a/bench/global-average-pooling.cc b/bench/global-average-pooling.cc deleted file mode 100644 index a9adbba20ff..00000000000 --- a/bench/global-average-pooling.cc +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// All rights reserved. -// -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include -#include -#include -#include -#include - -#include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" -#include "xnnpack/math.h" - -#include -#include "utils.h" - -static void global_average_pooling_qu8(benchmark::State& state) { - const size_t batch_size = state.range(0); - const size_t input_height = state.range(1); - const size_t input_width = state.range(2); - const size_t channels = state.range(3); - - std::random_device random_device; - auto rng = std::mt19937(random_device()); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); - - std::vector input(batch_size * input_height * input_width * channels); - std::generate(input.begin(), input.end(), std::ref(u8rng)); - std::vector output(batch_size * channels); - - xnn_status status = xnn_initialize(nullptr /* allocator */); - if (status != xnn_status_success) { - state.SkipWithError("failed to initialize XNNPACK"); - } - - xnn_operator_t global_pooling_op = nullptr; - status = xnn_create_global_average_pooling_nwc_qu8( - 127 /* input zero point */, 0.75f /* input scale */, - 127 /* output zero point */, 1.25f /* output scale */, - 0, 255, - 0 /* flags */, &global_pooling_op); - if (status != xnn_status_success) { - state.SkipWithError("failed to create Global Average Pooling operator"); - } - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - status = xnn_reshape_global_average_pooling_nwc_qu8( - global_pooling_op, - batch_size, input_height * input_width, - channels, channels /* input stride */, channels /* output stride */, - &workspace_size, &workspace_alignment, - /*threadpool=*/nullptr); - if (status != xnn_status_success) { - state.SkipWithError("failed to reshape Global Average Pooling operator"); - } - - std::vector> workspace(workspace_size); - - status = xnn_setup_global_average_pooling_nwc_qu8( - global_pooling_op, - workspace.data(), - input.data(), output.data()); - if (status != xnn_status_success) { - state.SkipWithError("failed to setup Global Average Pooling operator"); - } - - for (auto _ : state) { - xnn_run_operator(global_pooling_op, /*threadpool=*/nullptr); - } - - status = xnn_delete_operator(global_pooling_op); - if (status != xnn_status_success) { - state.SkipWithError("failed to delete Global Average Pooling operator"); - } - global_pooling_op = nullptr; - - const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); - if (cpu_frequency != 0) { - state.counters["cpufreq"] = cpu_frequency; - } - - state.counters["bytes"] = benchmark::Counter( - uint64_t(state.iterations()) * - batch_size * (input_height * input_width + 1) * channels * sizeof(uint8_t), - benchmark::Counter::kIsRate); -} - -static void global_average_pooling_qs8(benchmark::State& state) { - const size_t batch_size = state.range(0); - const size_t input_height = state.range(1); - const size_t input_width = state.range(2); - const size_t channels = state.range(3); - - std::random_device random_device; - auto rng = std::mt19937(random_device()); - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), std::ref(rng)); - - std::vector input(batch_size * input_height * input_width * channels); - std::generate(input.begin(), input.end(), std::ref(i8rng)); - std::vector output(batch_size * channels); - - xnn_status status = xnn_initialize(nullptr /* allocator */); - if (status != xnn_status_success) { - state.SkipWithError("failed to initialize XNNPACK"); - } - - xnn_operator_t global_pooling_op = nullptr; - status = xnn_create_global_average_pooling_nwc_qs8( - -1 /* input zero point */, 0.75f /* input scale */, - -1 /* output zero point */, 1.25f /* output scale */, - -128, 127, - 0 /* flags */, &global_pooling_op); - if (status != xnn_status_success) { - state.SkipWithError("failed to create Global Average Pooling operator"); - } - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - status = xnn_reshape_global_average_pooling_nwc_qs8( - global_pooling_op, - batch_size, input_height * input_width, - channels, channels /* input stride */, channels /* output stride */, - &workspace_size, &workspace_alignment, - /*threadpool=*/nullptr); - if (status != xnn_status_success) { - state.SkipWithError("failed to reshape Global Average Pooling operator"); - } - - std::vector> workspace(workspace_size); - status = xnn_setup_global_average_pooling_nwc_qs8( - global_pooling_op, - workspace.data(), - input.data(), output.data()); - if (status != xnn_status_success) { - state.SkipWithError("failed to setup Global Average Pooling operator"); - } - - for (auto _ : state) { - xnn_run_operator(global_pooling_op, /*threadpool=*/nullptr); - } - - status = xnn_delete_operator(global_pooling_op); - if (status != xnn_status_success) { - state.SkipWithError("failed to delete Global Average Pooling operator"); - } - global_pooling_op = nullptr; - - const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); - if (cpu_frequency != 0) { - state.counters["cpufreq"] = cpu_frequency; - } - - state.counters["bytes"] = benchmark::Counter( - uint64_t(state.iterations()) * - batch_size * (input_height * input_width + 1) * channels * sizeof(int8_t), - benchmark::Counter::kIsRate); -} - -static void global_average_pooling_f16(benchmark::State& state) { - const size_t batch_size = state.range(0); - const size_t input_height = state.range(1); - const size_t input_width = state.range(2); - const size_t channels = state.range(3); - - std::random_device random_device; - auto rng = std::mt19937(random_device()); - auto f32rng = std::bind(std::uniform_real_distribution(0.1f, 1.0f), std::ref(rng)); - - std::vector input(batch_size * input_height * input_width * channels); - std::generate(input.begin(), input.end(), f32rng); - std::vector output(batch_size * channels); - - xnn_status status = xnn_initialize(nullptr /* allocator */); - if (status != xnn_status_success) { - state.SkipWithError("failed to initialize XNNPACK"); - } - - xnn_operator_t global_pooling_op = nullptr; - status = xnn_create_global_average_pooling_nwc_f16( - -std::numeric_limits::infinity(), +std::numeric_limits::infinity(), - 0 /* flags */, &global_pooling_op); - if (status != xnn_status_success) { - state.SkipWithError("failed to create Global Average Pooling operator"); - } - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - status = xnn_reshape_global_average_pooling_nwc_f16( - global_pooling_op, - batch_size, input_height * input_width, - channels, channels /* input stride */, channels /* output stride */, - &workspace_size, &workspace_alignment, - /*threadpool=*/nullptr); - if (status != xnn_status_success) { - state.SkipWithError("failed to reshape Global Average Pooling operator"); - } - - std::vector> workspace(workspace_size); - status = xnn_setup_global_average_pooling_nwc_f16( - global_pooling_op, - workspace.data(), - input.data(), output.data()); - if (status != xnn_status_success) { - state.SkipWithError("failed to setup Global Average Pooling operator"); - } - - for (auto _ : state) { - xnn_run_operator(global_pooling_op, /*threadpool=*/nullptr); - } - - status = xnn_delete_operator(global_pooling_op); - if (status != xnn_status_success) { - state.SkipWithError("failed to delete Global Average Pooling operator"); - } - global_pooling_op = nullptr; - - const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); - if (cpu_frequency != 0) { - state.counters["cpufreq"] = cpu_frequency; - } - - state.counters["bytes"] = benchmark::Counter( - uint64_t(state.iterations()) * - batch_size * (input_height * input_width + 1) * channels * sizeof(xnn_float16), - benchmark::Counter::kIsRate); -} - -static void global_average_pooling_f32(benchmark::State& state) { - const size_t batch_size = state.range(0); - const size_t input_height = state.range(1); - const size_t input_width = state.range(2); - const size_t channels = state.range(3); - - std::random_device random_device; - auto rng = std::mt19937(random_device()); - auto f32rng = std::bind(std::uniform_real_distribution(), std::ref(rng)); - - std::vector input(batch_size * input_height * input_width * channels); - std::generate(input.begin(), input.end(), std::ref(f32rng)); - std::vector output(batch_size * channels); - - xnn_status status = xnn_initialize(nullptr /* allocator */); - if (status != xnn_status_success) { - state.SkipWithError("failed to initialize XNNPACK"); - } - - xnn_operator_t global_pooling_op = nullptr; - status = xnn_create_global_average_pooling_nwc_f32( - -std::numeric_limits::infinity(), +std::numeric_limits::infinity(), - 0 /* flags */, &global_pooling_op); - if (status != xnn_status_success) { - state.SkipWithError("failed to create Global Average Pooling operator"); - } - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - status = xnn_reshape_global_average_pooling_nwc_f32( - global_pooling_op, - batch_size, input_height * input_width, - channels, channels /* input stride */, channels /* output stride */, - &workspace_size, &workspace_alignment, - /*threadpool=*/nullptr); - if (status != xnn_status_success) { - state.SkipWithError("failed to reshape Global Average Pooling operator"); - } - - std::vector> workspace(workspace_size); - status = xnn_setup_global_average_pooling_nwc_f32( - global_pooling_op, - workspace.data(), - input.data(), output.data()); - if (status != xnn_status_success) { - state.SkipWithError("failed to setup Global Average Pooling operator"); - } - - for (auto _ : state) { - xnn_run_operator(global_pooling_op, /*threadpool=*/nullptr); - } - - status = xnn_delete_operator(global_pooling_op); - if (status != xnn_status_success) { - state.SkipWithError("failed to delete Global Average Pooling operator"); - } - global_pooling_op = nullptr; - - const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); - if (cpu_frequency != 0) { - state.counters["cpufreq"] = cpu_frequency; - } - - state.counters["bytes"] = benchmark::Counter( - uint64_t(state.iterations()) * - batch_size * (input_height * input_width + 1) * channels * sizeof(float), - benchmark::Counter::kIsRate); -} - -static void ImageNetArguments(benchmark::internal::Benchmark* b) { - b->ArgNames({"N", "H", "W", "C"}); - - /* N IH IW C */ - b->Args({1, 7, 7, 1000}); - b->Args({1, 13, 13, 1000}); -} - -BENCHMARK(global_average_pooling_qu8)->Apply(ImageNetArguments)->UseRealTime(); -BENCHMARK(global_average_pooling_qs8)->Apply(ImageNetArguments)->UseRealTime(); -BENCHMARK(global_average_pooling_f16)->Apply(ImageNetArguments)->UseRealTime(); -BENCHMARK(global_average_pooling_f32)->Apply(ImageNetArguments)->UseRealTime(); - -#ifndef XNNPACK_BENCHMARK_NO_MAIN -BENCHMARK_MAIN(); -#endif diff --git a/build_srcs.bzl b/build_srcs.bzl index c2e1fa7155c..53d0491e94e 100644 --- a/build_srcs.bzl +++ b/build_srcs.bzl @@ -20,8 +20,6 @@ OPERATOR_SRCS = [ "src/operators/deconvolution-nhwc.c", "src/operators/dynamic-fully-connected-nc.c", "src/operators/fully-connected-nc.c", - "src/operators/global-average-pooling-ncw.c", - "src/operators/global-average-pooling-nwc.c", "src/operators/lut-elementwise-nc.c", "src/operators/max-pooling-nhwc.c", "src/operators/pack-lh.c", @@ -64,8 +62,6 @@ SUBGRAPH_SRCS = [ "src/subgraph/fully-connected-sparse.c", "src/subgraph/fully-connected.c", "src/subgraph/gelu.c", - "src/subgraph/global-average-pooling.c", - "src/subgraph/global-sum-pooling.c", "src/subgraph/hardswish.c", "src/subgraph/leaky-relu.c", "src/subgraph/log.c", @@ -113,7 +109,6 @@ XNNPACK_SRCS = [ "src/configs/dwconv-config.c", "src/configs/dwconv2d-chw-config.c", "src/configs/gavgpool-config.c", - "src/configs/gavgpool-cw-config.c", "src/configs/gemm-config.c", "src/configs/ibilinear-chw-config.c", "src/configs/ibilinear-config.c", diff --git a/cmake/gen/neon_microkernels.cmake b/cmake/gen/neon_microkernels.cmake index a35d80ac0e0..a9a6d6a7ae3 100644 --- a/cmake/gen/neon_microkernels.cmake +++ b/cmake/gen/neon_microkernels.cmake @@ -27,7 +27,6 @@ SET(PROD_NEON_MICROKERNEL_SRCS src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5p2-minmax-neon-1x4.c src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-neon-1x4.c src/f32-f16-vcvt/gen/f32-f16-vcvt-neon-u8.c - src/f32-gavgpool-cw/f32-gavgpool-cw-neon-u4.c src/f32-gavgpool/f32-gavgpool-7p7x-minmax-neon-c4.c src/f32-gavgpool/f32-gavgpool-7x-minmax-neon-c4.c src/f32-gemm/gen/f32-gemm-1x8-minmax-neon-lane-ld64.c diff --git a/cmake/gen/neonfp16arith_microkernels.cmake b/cmake/gen/neonfp16arith_microkernels.cmake index aa8ca503c40..62dbe4a7a37 100644 --- a/cmake/gen/neonfp16arith_microkernels.cmake +++ b/cmake/gen/neonfp16arith_microkernels.cmake @@ -24,7 +24,6 @@ SET(PROD_NEONFP16ARITH_MICROKERNEL_SRCS src/f16-dwconv2d-chw/gen/f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-1x8.c src/f16-f32acc-rdsum/gen/f16-f32acc-rdsum-7p7x-minmax-neonfp16arith-c16.c src/f16-f32acc-rsum/gen/f16-f32acc-rsum-neonfp16arith-u32-acc4.c - src/f16-gavgpool-cw/f16-gavgpool-cw-neonfp16arith-u8.c src/f16-gavgpool/gen/f16-gavgpool-7p7x-minmax-neonfp16arith-c8.c src/f16-gavgpool/gen/f16-gavgpool-7x-minmax-neonfp16arith-c8.c src/f16-gemm/gen/f16-gemm-1x8-minmax-neonfp16arith-ld64.c diff --git a/cmake/gen/scalar_microkernels.cmake b/cmake/gen/scalar_microkernels.cmake index 4a1afffec47..30082cad98f 100644 --- a/cmake/gen/scalar_microkernels.cmake +++ b/cmake/gen/scalar_microkernels.cmake @@ -40,7 +40,6 @@ SET(PROD_SCALAR_MICROKERNEL_SRCS src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-scalar-2x1-acc2.c src/f32-f16-vcvt/gen/f32-f16-vcvt-scalar-bitcast-u4.c src/f32-f16-vcvt/gen/f32-f16-vcvt-scalar-fabsf-u2.c - src/f32-gavgpool-cw/f32-gavgpool-cw-scalar-u1.c src/f32-gavgpool/f32-gavgpool-7p7x-minmax-scalar-c1.c src/f32-gavgpool/f32-gavgpool-7x-minmax-scalar-c1.c src/f32-gemm/gen/f32-gemm-1x4-minmax-scalar.c diff --git a/cmake/gen/sse_microkernels.cmake b/cmake/gen/sse_microkernels.cmake index a682a18f490..b8777e138f4 100644 --- a/cmake/gen/sse_microkernels.cmake +++ b/cmake/gen/sse_microkernels.cmake @@ -22,7 +22,6 @@ SET(PROD_SSE_MICROKERNEL_SRCS src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-sse-1x4-acc3.c src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5p2-minmax-sse-4x4.c src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-sse-2x4.c - src/f32-gavgpool-cw/f32-gavgpool-cw-sse-u4.c src/f32-gavgpool/f32-gavgpool-7p7x-minmax-sse-c4.c src/f32-gavgpool/f32-gavgpool-7x-minmax-sse-c4.c src/f32-gemm/gen/f32-gemm-1x8-minmax-sse-load1.c diff --git a/cmake/gen/wasmsimd_microkernels.cmake b/cmake/gen/wasmsimd_microkernels.cmake index 4a5c9d0db28..b8d29cb8180 100644 --- a/cmake/gen/wasmsimd_microkernels.cmake +++ b/cmake/gen/wasmsimd_microkernels.cmake @@ -48,8 +48,6 @@ SET(PROD_WASMSIMD_MICROKERNEL_SRCS src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc2.c src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc2.c src/f32-f16-vcvt/gen/f32-f16-vcvt-wasmsimd-u24.c - src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-arm-u4.c - src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-x86-u4.c src/f32-gavgpool/f32-gavgpool-7p7x-minmax-wasmsimd-arm-c4.c src/f32-gavgpool/f32-gavgpool-7p7x-minmax-wasmsimd-x86-c4.c src/f32-gavgpool/f32-gavgpool-7x-minmax-wasmsimd-arm-c4.c diff --git a/gen/neon_microkernels.bzl b/gen/neon_microkernels.bzl index db2bf55c00a..80e0b69fa88 100644 --- a/gen/neon_microkernels.bzl +++ b/gen/neon_microkernels.bzl @@ -23,7 +23,6 @@ PROD_NEON_MICROKERNEL_SRCS = [ "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5p2-minmax-neon-1x4.c", "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-neon-1x4.c", "src/f32-f16-vcvt/gen/f32-f16-vcvt-neon-u8.c", - "src/f32-gavgpool-cw/f32-gavgpool-cw-neon-u4.c", "src/f32-gavgpool/f32-gavgpool-7p7x-minmax-neon-c4.c", "src/f32-gavgpool/f32-gavgpool-7x-minmax-neon-c4.c", "src/f32-gemm/gen/f32-gemm-1x8-minmax-neon-lane-ld64.c", diff --git a/gen/neonfp16arith_microkernels.bzl b/gen/neonfp16arith_microkernels.bzl index 018ca23cdfd..3477376ceb5 100644 --- a/gen/neonfp16arith_microkernels.bzl +++ b/gen/neonfp16arith_microkernels.bzl @@ -20,7 +20,6 @@ PROD_NEONFP16ARITH_MICROKERNEL_SRCS = [ "src/f16-dwconv2d-chw/gen/f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-1x8.c", "src/f16-f32acc-rdsum/gen/f16-f32acc-rdsum-7p7x-minmax-neonfp16arith-c16.c", "src/f16-f32acc-rsum/gen/f16-f32acc-rsum-neonfp16arith-u32-acc4.c", - "src/f16-gavgpool-cw/f16-gavgpool-cw-neonfp16arith-u8.c", "src/f16-gavgpool/gen/f16-gavgpool-7p7x-minmax-neonfp16arith-c8.c", "src/f16-gavgpool/gen/f16-gavgpool-7x-minmax-neonfp16arith-c8.c", "src/f16-gemm/gen/f16-gemm-1x8-minmax-neonfp16arith-ld64.c", diff --git a/gen/scalar_microkernels.bzl b/gen/scalar_microkernels.bzl index 1f07560dcea..dc83dabaae6 100644 --- a/gen/scalar_microkernels.bzl +++ b/gen/scalar_microkernels.bzl @@ -36,7 +36,6 @@ PROD_SCALAR_MICROKERNEL_SRCS = [ "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-scalar-2x1-acc2.c", "src/f32-f16-vcvt/gen/f32-f16-vcvt-scalar-bitcast-u4.c", "src/f32-f16-vcvt/gen/f32-f16-vcvt-scalar-fabsf-u2.c", - "src/f32-gavgpool-cw/f32-gavgpool-cw-scalar-u1.c", "src/f32-gavgpool/f32-gavgpool-7p7x-minmax-scalar-c1.c", "src/f32-gavgpool/f32-gavgpool-7x-minmax-scalar-c1.c", "src/f32-gemm/gen/f32-gemm-1x4-minmax-scalar.c", diff --git a/gen/sse_microkernels.bzl b/gen/sse_microkernels.bzl index e912651df93..d27e5296e19 100644 --- a/gen/sse_microkernels.bzl +++ b/gen/sse_microkernels.bzl @@ -18,7 +18,6 @@ PROD_SSE_MICROKERNEL_SRCS = [ "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-sse-1x4-acc3.c", "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5p2-minmax-sse-4x4.c", "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-sse-2x4.c", - "src/f32-gavgpool-cw/f32-gavgpool-cw-sse-u4.c", "src/f32-gavgpool/f32-gavgpool-7p7x-minmax-sse-c4.c", "src/f32-gavgpool/f32-gavgpool-7x-minmax-sse-c4.c", "src/f32-gemm/gen/f32-gemm-1x8-minmax-sse-load1.c", diff --git a/gen/wasmsimd_microkernels.bzl b/gen/wasmsimd_microkernels.bzl index ec627c74f10..39965b5bb9e 100644 --- a/gen/wasmsimd_microkernels.bzl +++ b/gen/wasmsimd_microkernels.bzl @@ -44,8 +44,6 @@ PROD_WASMSIMD_MICROKERNEL_SRCS = [ "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc2.c", "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc2.c", "src/f32-f16-vcvt/gen/f32-f16-vcvt-wasmsimd-u24.c", - "src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-arm-u4.c", - "src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-x86-u4.c", "src/f32-gavgpool/f32-gavgpool-7p7x-minmax-wasmsimd-arm-c4.c", "src/f32-gavgpool/f32-gavgpool-7p7x-minmax-wasmsimd-x86-c4.c", "src/f32-gavgpool/f32-gavgpool-7x-minmax-wasmsimd-arm-c4.c", diff --git a/include/xnnpack.h b/include/xnnpack.h index a08ac9b09cd..7d32db7ce00 100644 --- a/include/xnnpack.h +++ b/include/xnnpack.h @@ -690,7 +690,7 @@ enum xnn_status xnn_define_depth_to_space( /// dimensions defined in the @a subgraph. /// @param flags - binary features of the 1D Global Average Pooling Node. The only currently supported value is /// XNN_FLAG_KEEP_DIMS. -enum xnn_status xnn_define_global_average_pooling_1d( +XNN_DEPRECATED enum xnn_status xnn_define_global_average_pooling_1d( xnn_subgraph_t subgraph, float output_min, float output_max, @@ -710,7 +710,7 @@ enum xnn_status xnn_define_global_average_pooling_1d( /// dimensions defined in the @a subgraph. /// @param flags - binary features of the 2D Global Average Pooling Node. The only currently supported value is /// XNN_FLAG_KEEP_DIMS. -enum xnn_status xnn_define_global_average_pooling_2d( +XNN_DEPRECATED enum xnn_status xnn_define_global_average_pooling_2d( xnn_subgraph_t subgraph, float output_min, float output_max, @@ -729,7 +729,7 @@ enum xnn_status xnn_define_global_average_pooling_2d( /// dimensions defined in the @a subgraph. /// @param flags - binary features of the 1D Global Sum Pooling Node. The only currently supported value is /// XNN_FLAG_KEEP_DIMS. -enum xnn_status xnn_define_global_sum_pooling_1d( +XNN_DEPRECATED enum xnn_status xnn_define_global_sum_pooling_1d( xnn_subgraph_t subgraph, float output_min, float output_max, @@ -749,7 +749,7 @@ enum xnn_status xnn_define_global_sum_pooling_1d( /// dimensions defined in the @a subgraph. /// @param flags - binary features of the 2D Global Sum Pooling Node. The only currently supported value is /// XNN_FLAG_KEEP_DIMS. -enum xnn_status xnn_define_global_sum_pooling_2d( +XNN_DEPRECATED enum xnn_status xnn_define_global_sum_pooling_2d( xnn_subgraph_t subgraph, float output_min, float output_max, @@ -4557,188 +4557,6 @@ enum xnn_status xnn_run_gelu_nc_f32( uint32_t flags, pthreadpool_t threadpool); -enum xnn_status xnn_create_global_average_pooling_ncw_f16( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out); - -enum xnn_status xnn_reshape_global_average_pooling_ncw_f16( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - pthreadpool_t threadpool); - -enum xnn_status xnn_setup_global_average_pooling_ncw_f16( - xnn_operator_t global_average_pooling_op, - const void* input, - void* output); - -enum xnn_status xnn_create_global_average_pooling_ncw_f32( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out); - -enum xnn_status xnn_reshape_global_average_pooling_ncw_f32( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - pthreadpool_t threadpool); - -enum xnn_status xnn_setup_global_average_pooling_ncw_f32( - xnn_operator_t global_average_pooling_op, - const float* input, - float* output); - -enum xnn_status xnn_create_global_average_pooling_nwc_f16( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out); - -enum xnn_status xnn_reshape_global_average_pooling_nwc_f16( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool); - -enum xnn_status xnn_setup_global_average_pooling_nwc_f16( - xnn_operator_t global_average_pooling_op, - void* workspace, - const void* input, - void* output); - -enum xnn_status xnn_create_global_average_pooling_nwc_f32( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out); - -enum xnn_status xnn_reshape_global_average_pooling_nwc_f32( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool); - -enum xnn_status xnn_setup_global_average_pooling_nwc_f32( - xnn_operator_t global_average_pooling_op, - void* workspace, - const float* input, - float* output); - -enum xnn_status xnn_create_global_average_pooling_nwc_qs8( - int8_t input_zero_point, - float input_scale, - int8_t output_zero_point, - float output_scale, - int8_t output_min, - int8_t output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out); - -enum xnn_status xnn_reshape_global_average_pooling_nwc_qs8( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool); - -enum xnn_status xnn_setup_global_average_pooling_nwc_qs8( - xnn_operator_t global_average_pooling_op, - void* workspace, - const int8_t* input, - int8_t* output); - -enum xnn_status xnn_create_global_average_pooling_nwc_qu8( - uint8_t input_zero_point, - float input_scale, - uint8_t output_zero_point, - float output_scale, - uint8_t output_min, - uint8_t output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out); - -enum xnn_status xnn_reshape_global_average_pooling_nwc_qu8( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool); - -enum xnn_status xnn_setup_global_average_pooling_nwc_qu8( - xnn_operator_t global_average_pooling_op, - void* workspace, - const uint8_t* input, - uint8_t* output); - -enum xnn_status xnn_create_global_sum_pooling_nwc_f16( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_sum_pooling_op_out); - -enum xnn_status xnn_reshape_global_sum_pooling_nwc_f16( - xnn_operator_t global_sum_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool); - -enum xnn_status xnn_setup_global_sum_pooling_nwc_f16( - xnn_operator_t global_sum_pooling_op, - void* workspace, - const void* input, - void* output); - -enum xnn_status xnn_create_global_sum_pooling_nwc_f32( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_sum_pooling_op_out); - -enum xnn_status xnn_reshape_global_sum_pooling_nwc_f32( - xnn_operator_t global_sum_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool); - -enum xnn_status xnn_setup_global_sum_pooling_nwc_f32( - xnn_operator_t global_sum_pooling_op, - void* workspace, - const float* input, - float* output); - enum xnn_status xnn_create_hardswish_nc_f16( uint32_t flags, xnn_operator_t* hardswish_op_out); diff --git a/scripts/generate-tests.sh b/scripts/generate-tests.sh index 75fc425003e..3ffc01f4221 100755 --- a/scripts/generate-tests.sh +++ b/scripts/generate-tests.sh @@ -12,10 +12,6 @@ tools/generate-gavgpool-test.py --spec test/qs8-gavgpool-minmax-rndnu.yaml --out tools/generate-gavgpool-test.py --spec test/qu8-gavgpool-minmax-fp32.yaml --output test/qu8-gavgpool-minmax-fp32.cc & tools/generate-gavgpool-test.py --spec test/qu8-gavgpool-minmax-rndnu.yaml --output test/qu8-gavgpool-minmax-rndnu.cc & -### Tests for GAvgPool CW layout micro-kernels -tools/generate-gavgpool-cw-test.py --spec test/f16-gavgpool-cw.yaml --output test/f16-gavgpool-cw.cc & -tools/generate-gavgpool-cw-test.py --spec test/f32-gavgpool-cw.yaml --output test/f32-gavgpool-cw.cc & - ### Tests for ArgMaxPool micro-kernels tools/generate-argmaxpool-test.py --spec test/f32-argmaxpool.yaml --output test/f32-argmaxpool.cc & diff --git a/src/configs/gavgpool-cw-config.c b/src/configs/gavgpool-cw-config.c deleted file mode 100644 index 6c56c70ec63..00000000000 --- a/src/configs/gavgpool-cw-config.c +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2023 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include - -#include "xnnpack/common.h" -#include "xnnpack/config.h" -#include "xnnpack/gavgpool.h" -#include "xnnpack/init-once.h" -#include "xnnpack/microfnptr.h" -#include "xnnpack/microparams-init.h" - -static struct xnn_gavgpool_cw_config f16_gavgpool_cw_config = {0}; -static struct xnn_gavgpool_cw_config f32_gavgpool_cw_config = {0}; - -XNN_INIT_ONCE_GUARD(f16_gavgpool_cw); -XNN_INIT_ONCE_GUARD(f32_gavgpool_cw); - -static void init_f16_gavgpool_cw_config(void) { - #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR - const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); - assert(hardware_config != NULL); - if (hardware_config->use_arm_neon_fp16_arith) { - f16_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8; - f16_gavgpool_cw_config.init.f16 = xnn_init_f16_gavgpool_scalar_params; - f16_gavgpool_cw_config.update.f16 = xnn_update_f16_gavgpool_scalar_params; - f16_gavgpool_cw_config.pixel_tile = 8; - } - #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR - const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); - assert(hardware_config != NULL); - if (hardware_config->use_arm_neon_fp16_arith) { - f16_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8; - f16_gavgpool_cw_config.init.f16 = xnn_init_f16_gavgpool_scalar_params; - f16_gavgpool_cw_config.update.f16 = xnn_update_f16_gavgpool_scalar_params; - f16_gavgpool_cw_config.pixel_tile = 8; - } - #endif -} - -static void init_f32_gavgpool_cw_config(void) { - #if XNN_ARCH_ARM - const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); - assert(hardware_config != NULL); - if (hardware_config->use_arm_neon) { - f32_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f32_gavgpool_cw_ukernel__neon_u4; - f32_gavgpool_cw_config.pixel_tile = 4; - } else if (!XNN_PLATFORM_MOBILE) { - f32_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f32_gavgpool_cw_ukernel__scalar_u1; - f32_gavgpool_cw_config.pixel_tile = 1; - } - f32_gavgpool_cw_config.init.f32 = xnn_init_f32_gavgpool_neon_params; - f32_gavgpool_cw_config.update.f32 = xnn_update_f32_gavgpool_params; - #elif XNN_ARCH_ARM64 - f32_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f32_gavgpool_cw_ukernel__neon_u4; - f32_gavgpool_cw_config.pixel_tile = 4; - f32_gavgpool_cw_config.init.f32 = xnn_init_f32_gavgpool_neon_params; - f32_gavgpool_cw_config.update.f32 = xnn_update_f32_gavgpool_params; - #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 - f32_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f32_gavgpool_cw_ukernel__sse_u4; - f32_gavgpool_cw_config.pixel_tile = 4; - f32_gavgpool_cw_config.init.f32 = xnn_init_f32_gavgpool_sse_params; - f32_gavgpool_cw_config.update.f32 = xnn_update_f32_gavgpool_params; - #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); - assert(hardware_config != NULL); - if (hardware_config->is_x86) { - f32_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4; - f32_gavgpool_cw_config.pixel_tile = 4; - } else { - f32_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4; - f32_gavgpool_cw_config.pixel_tile = 4; - } - f32_gavgpool_cw_config.init.f32 = xnn_init_f32_gavgpool_scalar_params; - f32_gavgpool_cw_config.update.f32 = xnn_update_f32_gavgpool_params; - #else - f32_gavgpool_cw_config.ukernel = (xnn_gavgpool_cw_ukernel_fn) xnn_f32_gavgpool_cw_ukernel__scalar_u1; - f32_gavgpool_cw_config.pixel_tile = 1; - f32_gavgpool_cw_config.init.f32 = xnn_init_f32_gavgpool_scalar_params; - f32_gavgpool_cw_config.update.f32 = xnn_update_f32_gavgpool_params; - #endif -} - -const struct xnn_gavgpool_cw_config* xnn_init_f16_gavgpool_cw_config() { - const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); - if (hardware_config == NULL || !xnn_is_f16_chw_compatible_config(hardware_config)) { - return NULL; - } - XNN_INIT_ONCE(f16_gavgpool_cw); - return &f16_gavgpool_cw_config; -} - -const struct xnn_gavgpool_cw_config* xnn_init_f32_gavgpool_cw_config() { - const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); - if (hardware_config == NULL || !xnn_is_chw_compatible_config(hardware_config)) { - return NULL; - } - XNN_INIT_ONCE(f32_gavgpool_cw); - return &f32_gavgpool_cw_config; -} diff --git a/src/f16-gavgpool-cw/f16-gavgpool-cw-neonfp16arith-u8.c b/src/f16-gavgpool-cw/f16-gavgpool-cw-neonfp16arith-u8.c deleted file mode 100644 index 07a6722e0bd..00000000000 --- a/src/f16-gavgpool-cw/f16-gavgpool-cw-neonfp16arith-u8.c +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include - -#include - -#include "xnnpack/gavgpool.h" -#include "xnnpack/math.h" - - -void xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8( - size_t elements, - size_t channels, - const xnn_float16* input, - xnn_float16* output, - const union xnn_f16_gavgpool_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS -{ - assert(elements != 0); - assert(elements % sizeof(uint16_t) == 0); - assert(channels != 0); - - const uint16x8_t vmask = vld1q_u16(params->scalar.mask); - const float16x4_t vmultiplier = vreinterpret_f16_u16(vld1_dup_u16(¶ms->scalar.multiplier)); - const float16x4_t voutput_min = vreinterpret_f16_u16(vld1_dup_u16(¶ms->scalar.output_min)); - const float16x4_t voutput_max = vreinterpret_f16_u16(vld1_dup_u16(¶ms->scalar.output_max)); - - uint16_t* o = (uint16_t*) output; - const uint16_t* i = (const uint16_t*) input; - do { - float16x8_t vsum0 = vreinterpretq_f16_u16(vmovq_n_u16(0)); - float16x8_t vsum1 = vreinterpretq_f16_u16(vmovq_n_u16(0)); - size_t n = elements; - if (n >= 32 * sizeof(uint16_t)) { - do { - const float16x8_t vi0 = vreinterpretq_f16_u16(vld1q_u16(i)); - const float16x8_t vi1 = vreinterpretq_f16_u16(vld1q_u16(i + 8)); - const float16x8_t vi2 = vreinterpretq_f16_u16(vld1q_u16(i + 16)); - const float16x8_t vi3 = vreinterpretq_f16_u16(vld1q_u16(i + 24)); - i += 32; - const float16x8_t acc0 = vaddq_f16(vi0, vi1); - const float16x8_t acc1 = vaddq_f16(vi2, vi3); - vsum0 = vaddq_f16(vsum0, acc0); - vsum1 = vaddq_f16(vsum1, acc1); - n -= 32 * sizeof(uint16_t); - } while (n >= 32 * sizeof(uint16_t)); - } - vsum0 = vaddq_f16(vsum0, vsum1); - - while (n >= 8 * sizeof(uint16_t)) { - const float16x8_t vi0 = vreinterpretq_f16_u16(vld1q_u16(i)); - i += 8; - vsum0 = vaddq_f16(vsum0, vi0); - n -= 8 * sizeof(uint16_t); - } - - if XNN_UNLIKELY(n != 0) { - float16x8_t vi0 = vreinterpretq_f16_u16(vld1q_u16(i)); i = (const uint16_t*) ((uintptr_t) i + n); - - vi0 = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi0))); - - vsum0 = vaddq_f16(vsum0, vi0); - } - - const float16x4_t vout4 = vpadd_f16(vget_low_f16(vsum0), vget_high_f16(vsum0)); - const float16x4_t vout2 = vpadd_f16(vout4, vout4); - const float16x4_t vout1 = vpadd_f16(vout2, vout2); - - float16x4_t vout = vmul_f16(vout1, vmultiplier); - - vout = vmax_f16(vout, voutput_min); - vout = vmin_f16(vout, voutput_max); - - vst1_lane_u16(o, vreinterpret_u16_f16(vout), 0); o += 1; - } while (--channels != 0); -} diff --git a/src/f32-gavgpool-cw/f32-gavgpool-cw-neon-u4.c b/src/f32-gavgpool-cw/f32-gavgpool-cw-neon-u4.c deleted file mode 100644 index bf1f1d53b47..00000000000 --- a/src/f32-gavgpool-cw/f32-gavgpool-cw-neon-u4.c +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include - -#include - -#include "xnnpack/gavgpool.h" -#include "xnnpack/math.h" - - -void xnn_f32_gavgpool_cw_ukernel__neon_u4( - size_t elements, - size_t channels, - const float* input, - float* output, - const union xnn_f32_gavgpool_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS -{ - assert(elements != 0); - assert(elements % sizeof(float) == 0); - assert(channels != 0); - - const uint32x4_t vmask = vld1q_u32(params->neon.mask); - const float32x2_t vmultiplier = vld1_dup_f32(¶ms->neon.multiplier); - const float32x2_t voutput_min = vld1_dup_f32(¶ms->neon.output_min); - const float32x2_t voutput_max = vld1_dup_f32(¶ms->neon.output_max); - - do { - float32x4_t vsum0 = vmovq_n_f32(0.0f); - size_t n = elements; - if (n >= 16 * sizeof(float)) { - float32x4_t vsum1 = vmovq_n_f32(0.0f); - do { - const float32x4_t vi0 = vld1q_f32(input); - const float32x4_t vi1 = vld1q_f32(input + 4); - const float32x4_t vi2 = vld1q_f32(input + 8); - const float32x4_t vi3 = vld1q_f32(input + 12); - input += 16; - const float32x4_t acc0 = vaddq_f32(vi0, vi1); - const float32x4_t acc1 = vaddq_f32(vi2, vi3); - vsum0 = vaddq_f32(vsum0, acc0); - vsum1 = vaddq_f32(vsum1, acc1); - n -= 16 * sizeof(float); - } while (n >= 32 * sizeof(float)); - vsum0 = vaddq_f32(vsum0, vsum1); - } - - while (n >= 4 * sizeof(float)) { - const float32x4_t vi0 = vld1q_f32(input); - input += 4; - vsum0 = vaddq_f32(vsum0, vi0); - n -= 4 * sizeof(float); - } - - if XNN_UNLIKELY(n != 0) { - float32x4_t vi0 = vld1q_f32(input); input = (const float*) ((uintptr_t) input + n); - - vi0 = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi0))); - - vsum0 = vaddq_f32(vsum0, vi0); - } - - const float32x2_t vout2 = vpadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)); - const float32x2_t vout1 = vpadd_f32(vout2, vout2); - - float32x2_t vout = vmul_f32(vout1, vmultiplier); - vout = vmax_f32(vout, voutput_min); - vout = vmin_f32(vout, voutput_max); - - vst1_lane_f32(output, vout, 0); output += 1; - } while (--channels != 0); -} diff --git a/src/f32-gavgpool-cw/f32-gavgpool-cw-scalar-u1.c b/src/f32-gavgpool-cw/f32-gavgpool-cw-scalar-u1.c deleted file mode 100644 index 3340aeb42ff..00000000000 --- a/src/f32-gavgpool-cw/f32-gavgpool-cw-scalar-u1.c +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include - -#include "xnnpack/gavgpool.h" -#include "xnnpack/math.h" - - -void xnn_f32_gavgpool_cw_ukernel__scalar_u1( - size_t elements, - size_t channels, - const float* input, - float* output, - const union xnn_f32_gavgpool_params params[restrict XNN_MIN_ELEMENTS(1)]) -{ - assert(elements != 0); - assert(elements % sizeof(float) == 0); - assert(channels != 0); - - const float* i0 = input; - - const float vmultiplier = params->scalar.multiplier; - const float voutput_max = params->scalar.output_max; - const float voutput_min = params->scalar.output_min; - - while (channels != 0) { - float vsum0 = 0.f; - float vsum1 = 0.f; - float vsum2 = 0.f; - float vsum3 = 0.f; - size_t n = elements; - while (n >= 4 * sizeof(float)) { - vsum0 += i0[0]; - vsum1 += i0[1]; - vsum2 += i0[2]; - vsum3 += i0[3]; - - i0 += 4; - n -= 4 * sizeof(float); - } - - while (n != 0) { - vsum0 += *i0++; - n -= sizeof(float); - } - - float vout = ( (vsum0 + vsum1) + (vsum2 + vsum3) ) * vmultiplier; - - vout = math_min_f32(vout, voutput_max); - vout = math_max_f32(vout, voutput_min); - - *output++ = vout; - channels -= 1; - } -} diff --git a/src/f32-gavgpool-cw/f32-gavgpool-cw-sse-u4.c b/src/f32-gavgpool-cw/f32-gavgpool-cw-sse-u4.c deleted file mode 100644 index c2f6fc4a593..00000000000 --- a/src/f32-gavgpool-cw/f32-gavgpool-cw-sse-u4.c +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include - -#include - -#include "xnnpack/gavgpool.h" -#include "xnnpack/math.h" - - -void xnn_f32_gavgpool_cw_ukernel__sse_u4( - size_t elements, - size_t channels, - const float* input, - float* output, - const union xnn_f32_gavgpool_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS -{ - assert(elements != 0); - assert(elements % sizeof(float) == 0); - assert(channels != 0); - - const float* i0 = input; - const float* i1 = (const float*) ((uintptr_t) i0 + elements); - const float* i2 = (const float*) ((uintptr_t) i1 + elements); - const float* i3 = (const float*) ((uintptr_t) i2 + elements); - - const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); - const __m128 vmultiplier = _mm_load_ps(params->sse.multiplier); - const __m128 voutput_min = _mm_load_ps(params->sse.output_min); - const __m128 voutput_max = _mm_load_ps(params->sse.output_max); - - while (channels >= 4) { - __m128 vsum0 = _mm_setzero_ps(); - __m128 vsum1 = _mm_setzero_ps(); - __m128 vsum2 = _mm_setzero_ps(); - __m128 vsum3 = _mm_setzero_ps(); - size_t n = elements; - while (n >= 4 * sizeof(float)) { - const __m128 vi0 = _mm_loadu_ps(i0); - i0 += 4; - const __m128 vi1 = _mm_loadu_ps(i1); - i1 += 4; - const __m128 vi2 = _mm_loadu_ps(i2); - i2 += 4; - const __m128 vi3 = _mm_loadu_ps(i3); - i3 += 4; - - vsum0 = _mm_add_ps(vsum0, vi0); - vsum1 = _mm_add_ps(vsum1, vi1); - vsum2 = _mm_add_ps(vsum2, vi2); - vsum3 = _mm_add_ps(vsum3, vi3); - n -= 4 * sizeof(float); - } - - if XNN_UNLIKELY(n != 0) { - const __m128 vi0 = _mm_and_ps(_mm_loadu_ps(i0), vmask); - i0 = (const float*) ((uintptr_t) i0 + n); - const __m128 vi1 = _mm_and_ps(_mm_loadu_ps(i1), vmask); - i1 = (const float*) ((uintptr_t) i1 + n); - const __m128 vi2 = _mm_and_ps(_mm_loadu_ps(i2), vmask); - i2 = (const float*) ((uintptr_t) i2 + n); - const __m128 vi3 = _mm_and_ps(_mm_loadu_ps(i3), vmask); - i3 = (const float*) ((uintptr_t) i3 + n); - - vsum0 = _mm_add_ps(vsum0, vi0); - vsum1 = _mm_add_ps(vsum1, vi1); - vsum2 = _mm_add_ps(vsum2, vi2); - vsum3 = _mm_add_ps(vsum3, vi3); - } - - // Having exactly 4 rows makes this work out nicely as we end up with - // the 4 totals in 4 different lanes of the same vector. - const __m128 vsum01 = _mm_add_ps(_mm_unpacklo_ps(vsum0, vsum1), _mm_unpackhi_ps(vsum0, vsum1)); - const __m128 vsum23 = _mm_add_ps(_mm_unpacklo_ps(vsum2, vsum3), _mm_unpackhi_ps(vsum2, vsum3)); - const __m128 vsum = _mm_add_ps(_mm_movelh_ps(vsum01, vsum23), _mm_movehl_ps(vsum23, vsum01)); - __m128 vout = _mm_mul_ps(vsum, vmultiplier); - - vout = _mm_max_ps(vout, voutput_min); - vout = _mm_min_ps(vout, voutput_max); - - _mm_storeu_ps(output, vout); - output += 4; - i0 = i3; - i1 = (const float*) ((uintptr_t) i0 + elements); - i2 = (const float*) ((uintptr_t) i1 + elements); - i3 = (const float*) ((uintptr_t) i2 + elements); - channels -= 4; - } - - while (channels != 0) { - __m128 vsum = _mm_setzero_ps(); - size_t n = elements; - while (n >= 4 * sizeof(float)) { - const __m128 vi0 = _mm_loadu_ps(i0); - i0 += 4; - vsum = _mm_add_ps(vsum, vi0); - n -= 4 * sizeof(float); - } - - if XNN_UNLIKELY(n != 0) { - __m128 vi0 = _mm_and_ps(_mm_loadu_ps(i0), vmask); - i0 = (const float*) ((uintptr_t) i0 + n); - vsum = _mm_add_ps(vsum, vi0); - } - - vsum = _mm_add_ps(vsum, _mm_movehl_ps(vsum, vsum)); - vsum = _mm_add_ss(vsum, _mm_shuffle_ps(vsum, vsum, _MM_SHUFFLE(3, 2, 1, 1))); - - __m128 vout = _mm_mul_ss(vsum, vmultiplier); - - vout = _mm_max_ss(vout, voutput_min); - vout = _mm_min_ss(vout, voutput_max); - - _mm_store_ss(output, vout); - output += 1; - channels -= 1; - } -} diff --git a/src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-arm-u4.c b/src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-arm-u4.c deleted file mode 100644 index 7f0bd6319a4..00000000000 --- a/src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-arm-u4.c +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2020 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include - -#include - -#include "xnnpack/gavgpool.h" -#include "xnnpack/math.h" - - -void xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4( - size_t elements, - size_t channels, - const float* input, - float* output, - const union xnn_f32_gavgpool_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS -{ - assert(elements != 0); - assert(elements % sizeof(float) == 0); - assert(channels != 0); - - const float* i0 = input; - const float* i1 = (const float*) ((uintptr_t) i0 + elements); - const float* i2 = (const float*) ((uintptr_t) i1 + elements); - const float* i3 = (const float*) ((uintptr_t) i2 + elements); - - const v128_t vmask = wasm_v128_load(params->scalar.mask); - const v128_t vmultiplier = wasm_v128_load32_splat(¶ms->scalar.multiplier); - const v128_t vmin = wasm_v128_load32_splat(¶ms->scalar.output_min); - const v128_t vmax = wasm_v128_load32_splat(¶ms->scalar.output_max); - - while (channels >= 4) { - v128_t vsum0 = wasm_f32x4_const_splat(0.0f); - v128_t vsum1 = vsum0; - v128_t vsum2 = vsum0; - v128_t vsum3 = vsum0; - size_t n = elements; - while (n >= 4 * sizeof(float)) { - const v128_t vi0 = wasm_v128_load(i0); - i0 += 4; - const v128_t vi1 = wasm_v128_load(i1); - i1 += 4; - const v128_t vi2 = wasm_v128_load(i2); - i2 += 4; - const v128_t vi3 = wasm_v128_load(i3); - i3 += 4; - - vsum0 = wasm_f32x4_add(vsum0, vi0); - vsum1 = wasm_f32x4_add(vsum1, vi1); - vsum2 = wasm_f32x4_add(vsum2, vi2); - vsum3 = wasm_f32x4_add(vsum3, vi3); - n -= 4 * sizeof(float); - } - - if XNN_UNLIKELY(n != 0) { - const v128_t vi0 = wasm_v128_and(wasm_v128_load(i0), vmask); - i0 = (const float*) ((uintptr_t) i0 + n); - const v128_t vi1 = wasm_v128_and(wasm_v128_load(i1), vmask); - i1 = (const float*) ((uintptr_t) i1 + n); - const v128_t vi2 = wasm_v128_and(wasm_v128_load(i2), vmask); - i2 = (const float*) ((uintptr_t) i2 + n); - const v128_t vi3 = wasm_v128_and(wasm_v128_load(i3), vmask); - i3 = (const float*) ((uintptr_t) i3 + n); - - vsum0 = wasm_f32x4_add(vsum0, vi0); - vsum1 = wasm_f32x4_add(vsum1, vi1); - vsum2 = wasm_f32x4_add(vsum2, vi2); - vsum3 = wasm_f32x4_add(vsum3, vi3); - } - - // Having exactly 4 rows makes this work out nicely as we end up with - // the 4 totals in 4 different lanes of the same vector. - const v128_t vsum01 = wasm_f32x4_add(wasm_v32x4_shuffle(vsum0, vsum1, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum0, vsum1, 1, 3, 5, 7)); - const v128_t vsum23 = wasm_f32x4_add(wasm_v32x4_shuffle(vsum2, vsum3, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum2, vsum3, 1, 3, 5, 7)); - const v128_t vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum01, vsum23, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum01, vsum23, 1, 3, 5, 7)); - v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); - - vout = wasm_f32x4_max(vout, vmin); - vout = wasm_f32x4_min(vout, vmax); - - wasm_v128_store(output, vout); - output += 4; - i0 = i3; - i1 = (const float*) ((uintptr_t) i0 + elements); - i2 = (const float*) ((uintptr_t) i1 + elements); - i3 = (const float*) ((uintptr_t) i2 + elements); - channels -= 4; - } - - while (channels != 0) { - v128_t vsum = wasm_f32x4_const_splat(0.0f); - size_t n = elements; - while (n >= 4 * sizeof(float)) { - const v128_t vi0 = wasm_v128_load(i0); - i0 += 4; - vsum = wasm_f32x4_add(vsum, vi0); - n -= 4 * sizeof(float); - } - - if XNN_UNLIKELY(n != 0) { - v128_t vi0 = wasm_v128_and(vmask, wasm_v128_load(i0)); - i0 = (const float*) ((uintptr_t) i0 + n); - vsum = wasm_f32x4_add(vsum, vi0); - } - - vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1, 3, 5, 7)); - vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1, 3, 5, 7)); - - v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); - - vout = wasm_f32x4_max(vout, vmin); - vout = wasm_f32x4_min(vout, vmax); - - *output++ = wasm_f32x4_extract_lane(vout, 0); - channels -= 1; - } -} diff --git a/src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-x86-u4.c b/src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-x86-u4.c deleted file mode 100644 index b1d276eecf9..00000000000 --- a/src/f32-gavgpool-cw/f32-gavgpool-cw-wasmsimd-x86-u4.c +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2020 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include - -#include - -#include "xnnpack/gavgpool.h" -#include "xnnpack/math.h" - - -void xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4( - size_t elements, - size_t channels, - const float* input, - float* output, - const union xnn_f32_gavgpool_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS -{ - assert(elements != 0); - assert(elements % sizeof(float) == 0); - assert(channels != 0); - - const float* i0 = input; - const float* i1 = (const float*) ((uintptr_t) i0 + elements); - const float* i2 = (const float*) ((uintptr_t) i1 + elements); - const float* i3 = (const float*) ((uintptr_t) i2 + elements); - - const v128_t vmask = wasm_v128_load(params->scalar.mask); - const v128_t vmultiplier = wasm_v128_load32_splat(¶ms->scalar.multiplier); - const v128_t vmin = wasm_v128_load32_splat(¶ms->scalar.output_min); - const v128_t vmax = wasm_v128_load32_splat(¶ms->scalar.output_max); - - while (channels >= 4) { - v128_t vsum0 = wasm_f32x4_const_splat(0.0f); - v128_t vsum1 = vsum0; - v128_t vsum2 = vsum0; - v128_t vsum3 = vsum0; - size_t n = elements; - while (n >= 4 * sizeof(float)) { - const v128_t vi0 = wasm_v128_load(i0); - i0 += 4; - const v128_t vi1 = wasm_v128_load(i1); - i1 += 4; - const v128_t vi2 = wasm_v128_load(i2); - i2 += 4; - const v128_t vi3 = wasm_v128_load(i3); - i3 += 4; - - vsum0 = wasm_f32x4_add(vsum0, vi0); - vsum1 = wasm_f32x4_add(vsum1, vi1); - vsum2 = wasm_f32x4_add(vsum2, vi2); - vsum3 = wasm_f32x4_add(vsum3, vi3); - n -= 4 * sizeof(float); - } - - if XNN_UNLIKELY(n != 0) { - const v128_t vi0 = wasm_v128_and(wasm_v128_load(i0), vmask); - i0 = (const float*) ((uintptr_t) i0 + n); - const v128_t vi1 = wasm_v128_and(wasm_v128_load(i1), vmask); - i1 = (const float*) ((uintptr_t) i1 + n); - const v128_t vi2 = wasm_v128_and(wasm_v128_load(i2), vmask); - i2 = (const float*) ((uintptr_t) i2 + n); - const v128_t vi3 = wasm_v128_and(wasm_v128_load(i3), vmask); - i3 = (const float*) ((uintptr_t) i3 + n); - - vsum0 = wasm_f32x4_add(vsum0, vi0); - vsum1 = wasm_f32x4_add(vsum1, vi1); - vsum2 = wasm_f32x4_add(vsum2, vi2); - vsum3 = wasm_f32x4_add(vsum3, vi3); - } - - // Having exactly 4 rows makes this work out nicely as we end up with - // the 4 totals in 4 different lanes of the same vector. - const v128_t vsum01 = wasm_f32x4_add(wasm_v32x4_shuffle(vsum0, vsum1, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum0, vsum1, 1, 3, 5, 7)); - const v128_t vsum23 = wasm_f32x4_add(wasm_v32x4_shuffle(vsum2, vsum3, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum2, vsum3, 1, 3, 5, 7)); - const v128_t vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum01, vsum23, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum01, vsum23, 1, 3, 5, 7)); - v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); - - vout = wasm_f32x4_pmin(vmax, vout); - vout = wasm_f32x4_pmax(vmin, vout); - - wasm_v128_store(output, vout); - output += 4; - i0 = i3; - i1 = (const float*) ((uintptr_t) i0 + elements); - i2 = (const float*) ((uintptr_t) i1 + elements); - i3 = (const float*) ((uintptr_t) i2 + elements); - channels -= 4; - } - - while (channels != 0) { - v128_t vsum = wasm_f32x4_const_splat(0.0f); - size_t n = elements; - while (n >= 4 * sizeof(float)) { - const v128_t vi0 = wasm_v128_load(i0); - i0 += 4; - vsum = wasm_f32x4_add(vsum, vi0); - n -= 4 * sizeof(float); - } - - if XNN_UNLIKELY(n != 0) { - v128_t vi0 = wasm_v128_and(vmask, wasm_v128_load(i0)); - i0 = (const float*) ((uintptr_t) i0 + n); - vsum = wasm_f32x4_add(vsum, vi0); - } - - vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1, 3, 5, 7)); - vsum = wasm_f32x4_add(wasm_v32x4_shuffle(vsum, vsum, 0, 2, 4, 6), wasm_v32x4_shuffle(vsum, vsum, 1, 3, 5, 7)); - - v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); - - vout = wasm_f32x4_pmin(vmax, vout); - vout = wasm_f32x4_pmax(vmin, vout); - - *output++ = wasm_f32x4_extract_lane(vout, 0); - channels -= 1; - } -} diff --git a/src/operators/global-average-pooling-ncw.c b/src/operators/global-average-pooling-ncw.c deleted file mode 100644 index 6e7c3700200..00000000000 --- a/src/operators/global-average-pooling-ncw.c +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include -#include -#include -#include - -#include "xnnpack.h" -#include "xnnpack/allocator.h" -#include "xnnpack/common.h" -#include "xnnpack/compute.h" -#include "xnnpack/config-types.h" -#include "xnnpack/config.h" -#include "xnnpack/fp16.h" -#include "xnnpack/log.h" -#include "xnnpack/math.h" -#include "xnnpack/microparams-init.h" -#include "xnnpack/microparams.h" -#include "xnnpack/operator-type.h" -#include "xnnpack/operator.h" -#include "xnnpack/params.h" -#include "pthreadpool.h" - -static enum xnn_status create_global_average_pooling_ncw( - uint32_t flags, - uint32_t log2_element_size, - size_t params_offset, - const void* params, - size_t params_size, - enum xnn_operator_type operator_type, - const struct xnn_gavgpool_cw_config* gavgpool_cw_config, - xnn_operator_t* global_average_pooling_op_out) -{ - xnn_operator_t global_average_pooling_op = NULL; - enum xnn_status status = xnn_status_uninitialized; - - if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { - xnn_log_error("failed to create %s operator: XNNPACK is not initialized", - xnn_operator_type_to_string(operator_type)); - goto error; - } - - status = xnn_status_out_of_memory; - - global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator)); - if (global_average_pooling_op == NULL) { - xnn_log_error( - "failed to allocate %zu bytes for %s operator descriptor", - sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type)); - goto error; - } - - memcpy((void*) ((uintptr_t) global_average_pooling_op + params_offset), params, params_size); - - global_average_pooling_op->type = operator_type; - global_average_pooling_op->flags = flags; - - global_average_pooling_op->state = xnn_run_state_invalid; - global_average_pooling_op->gavgpool_cw_config = gavgpool_cw_config; - - *global_average_pooling_op_out = global_average_pooling_op; - return xnn_status_success; - -error: - xnn_delete_operator(global_average_pooling_op); - return status; -} - -enum xnn_status xnn_create_global_average_pooling_ncw_f16( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out) -{ - if (isnan(output_min)) { - xnn_log_error( - "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f16)); - return xnn_status_invalid_parameter; - } - - if (isnan(output_max)) { - xnn_log_error( - "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f16)); - return xnn_status_invalid_parameter; - } - - if (fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)) >= fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max))) { - xnn_log_error( - "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f16), - fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)), - fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max))); - return xnn_status_invalid_parameter; - } - - const struct xnn_gavgpool_cw_config* gavgpool_cw_config = xnn_init_f16_gavgpool_cw_config(); - if (gavgpool_cw_config == NULL) { - xnn_log_error("failed to create %s operator: unsupported hardware configuration", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f16)); - return xnn_status_unsupported_hardware; - } - - union xnn_f16_gavgpool_params params; - if (gavgpool_cw_config->init.f16 != NULL) { - gavgpool_cw_config->init.f16( - ¶ms, 0 /* scale */, fp16_ieee_from_fp32_value(output_min), fp16_ieee_from_fp32_value(output_max), 0); - } - - return create_global_average_pooling_ncw( - flags, /*log2_element_size=*/XNN_LOG2_SIZEOF_HALF, - offsetof(struct xnn_operator, params.f16_gavgpool), - ¶ms, sizeof(params), - xnn_operator_type_global_average_pooling_ncw_f16, - gavgpool_cw_config, - global_average_pooling_op_out); -} - -enum xnn_status xnn_create_global_average_pooling_ncw_f32( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out) -{ - if (isnan(output_min)) { - xnn_log_error( - "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f32)); - return xnn_status_invalid_parameter; - } - - if (isnan(output_max)) { - xnn_log_error( - "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f32)); - return xnn_status_invalid_parameter; - } - - if (output_min > output_max) { - xnn_log_error( - "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be less than or equal to upper bound", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f32), output_min, output_max); - return xnn_status_invalid_parameter; - } - - const struct xnn_gavgpool_cw_config* gavgpool_cw_config = xnn_init_f32_gavgpool_cw_config(); - if (gavgpool_cw_config == NULL) { - xnn_log_error("failed to create %s operator: unsupported hardware configuration", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f32)); - return xnn_status_unsupported_hardware; - } - - union xnn_f32_gavgpool_params params; - assert(gavgpool_cw_config->init.f32 != NULL); - gavgpool_cw_config->init.f32(¶ms, nanf(""), output_min, output_max, 0); - - return create_global_average_pooling_ncw( - flags, /*log2_element_size=*/XNN_LOG2_SIZEOF_FLOAT, - offsetof(struct xnn_operator, params.f32_gavgpool), - ¶ms, sizeof(params), - xnn_operator_type_global_average_pooling_ncw_f32, - gavgpool_cw_config, - global_average_pooling_op_out); -} - -enum xnn_status xnn_reshape_global_average_pooling_ncw_f32( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - pthreadpool_t threadpool) -{ - if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_ncw_f32) { - xnn_log_error("failed to reshape operator: operator type mismatch (expected %s, got %s)", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f32), - xnn_operator_type_to_string(global_average_pooling_op->type)); - return xnn_status_invalid_parameter; - } - global_average_pooling_op->state = xnn_run_state_invalid; - - if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { - xnn_log_error("failed to reshape %s operator: XNNPACK is not initialized", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f32)); - return xnn_status_uninitialized; - } - - if (width == 0) { - xnn_log_error( - "failed to reshape %s operator with width %zu: width must be non-zero", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f32), width); - return xnn_status_invalid_parameter; - } - - if (channels == 0) { - xnn_log_error( - "failed to create %s operator with %zu channels: number of channels must be non-zero", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f32), channels); - return xnn_status_invalid_parameter; - } - global_average_pooling_op->channels = channels; - - if (batch_size == 0) { - global_average_pooling_op->state = xnn_run_state_skip; - return xnn_status_success; - } - - xnn_update_f32_gavgpool_params(&global_average_pooling_op->params.f32_gavgpool, - 1.0f / (float) width, width); - - global_average_pooling_op->context.global_average_pooling_ncw = (struct global_average_pooling_ncw_context) { - .input_elements = width * sizeof(float), - .input_channel_stride = width * sizeof(float), - .input_batch_stride = channels * width * sizeof(float), - .output_channel_stride = sizeof(float), - .output_batch_stride = channels * sizeof(float), - .ukernel = global_average_pooling_op->gavgpool_cw_config->ukernel, - .params.f32 = global_average_pooling_op->params.f32_gavgpool, - }; - - global_average_pooling_op->compute[0].type = xnn_parallelization_type_2d_tile_1d; - global_average_pooling_op->compute[0].task_2d_tile_1d = - (pthreadpool_task_2d_tile_1d_t) xnn_compute_global_average_pooling_ncw; - global_average_pooling_op->compute[0].range[0] = batch_size; - global_average_pooling_op->compute[0].range[1] = channels; - - const size_t num_threads = pthreadpool_get_threads_count(threadpool); - if (num_threads > 1) { - const size_t target_channels_per_thread = 8; - global_average_pooling_op->compute[0].tile[0] = - divide_round_up(channels, num_threads * target_channels_per_thread); - } else { - global_average_pooling_op->compute[0].tile[0] = channels; - } - - global_average_pooling_op->state = xnn_run_state_needs_setup; - - return xnn_status_success; -} - -enum xnn_status xnn_reshape_global_average_pooling_ncw_f16( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - pthreadpool_t threadpool) -{ - if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_ncw_f16) { - xnn_log_error("failed to reshape operator: operator type mismatch (expected %s, got %s)", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f16), - xnn_operator_type_to_string(global_average_pooling_op->type)); - return xnn_status_invalid_parameter; - } - global_average_pooling_op->state = xnn_run_state_invalid; - - if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { - xnn_log_error("failed to reshape %s operator: XNNPACK is not initialized", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f16)); - return xnn_status_uninitialized; - } - - if (width == 0) { - xnn_log_error( - "failed to reshape %s operator with width %zu: width must be non-zero", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f16), width); - return xnn_status_invalid_parameter; - } - - if (channels == 0) { - xnn_log_error( - "failed to create %s operator with %zu channels: number of channels must be non-zero", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_ncw_f16), channels); - return xnn_status_invalid_parameter; - } - global_average_pooling_op->channels = channels; - if (batch_size == 0) { - global_average_pooling_op->state = xnn_run_state_skip; - return xnn_status_success; - } - - if (global_average_pooling_op->gavgpool_cw_config->update.f16 != NULL) { - global_average_pooling_op->gavgpool_cw_config->update.f16( - &global_average_pooling_op->params.f16_gavgpool, fp16_ieee_from_fp32_value(1.0f / (float) width), width); - } - - global_average_pooling_op->context.global_average_pooling_ncw = (struct global_average_pooling_ncw_context) { - .input_elements = width * sizeof(uint16_t), - .input_channel_stride = width * sizeof(uint16_t), - .input_batch_stride = channels * width * sizeof(uint16_t), - .output_channel_stride = sizeof(uint16_t), - .output_batch_stride = channels * sizeof(uint16_t), - .ukernel = global_average_pooling_op->gavgpool_cw_config->ukernel, - .params.f16 = global_average_pooling_op->params.f16_gavgpool, - }; - - global_average_pooling_op->compute[0].type = xnn_parallelization_type_2d_tile_1d; - global_average_pooling_op->compute[0].task_2d_tile_1d = - (pthreadpool_task_2d_tile_1d_t) xnn_compute_global_average_pooling_ncw; - global_average_pooling_op->compute[0].range[0] = batch_size; - global_average_pooling_op->compute[0].range[1] = channels; - - const size_t num_threads = pthreadpool_get_threads_count(threadpool); - if (num_threads > 1) { - const size_t target_channels_per_thread = 8; - global_average_pooling_op->compute[0].tile[0] = - divide_round_up(channels, num_threads * target_channels_per_thread); - } else { - global_average_pooling_op->compute[0].tile[0] = channels; - } - - global_average_pooling_op->state = xnn_run_state_needs_setup; - - return xnn_status_success; -} - -static enum xnn_status setup_global_average_pooling_ncw( - xnn_operator_t global_average_pooling_op, - enum xnn_operator_type expected_operator_type, - const float* input, - float* output) -{ - if (global_average_pooling_op->type != expected_operator_type) { - xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)", - xnn_operator_type_to_string(expected_operator_type), - xnn_operator_type_to_string(global_average_pooling_op->type)); - return xnn_status_invalid_parameter; - } - - switch (global_average_pooling_op->state) { - case xnn_run_state_skip: - return xnn_status_success; - case xnn_run_state_invalid: - xnn_log_error( - "failed to setup %s operator: operator has not been reshaped yet", - xnn_operator_type_to_string(global_average_pooling_op->type)); - return xnn_status_invalid_state; - case xnn_run_state_needs_setup: - // Operator has been reshaped, but not setup, continue with setup. - case xnn_run_state_ready: - // Operator has been reshaped, and we are setting up with different pointers. - break; - } - - global_average_pooling_op->context.global_average_pooling_ncw.input = input; - global_average_pooling_op->context.global_average_pooling_ncw.output = output; - - global_average_pooling_op->state = xnn_run_state_ready; - - return xnn_status_success; -} - -enum xnn_status xnn_setup_global_average_pooling_ncw_f32( - xnn_operator_t global_average_pooling_op, - const float* input, - float* output) -{ - return setup_global_average_pooling_ncw( - global_average_pooling_op, xnn_operator_type_global_average_pooling_ncw_f32, input, output); -} - -enum xnn_status xnn_setup_global_average_pooling_ncw_f16( - xnn_operator_t global_average_pooling_op, - const void* input, - void* output) -{ - return setup_global_average_pooling_ncw( - global_average_pooling_op, xnn_operator_type_global_average_pooling_ncw_f16, input, output); -} diff --git a/src/operators/global-average-pooling-nwc.c b/src/operators/global-average-pooling-nwc.c deleted file mode 100644 index e04b7943258..00000000000 --- a/src/operators/global-average-pooling-nwc.c +++ /dev/null @@ -1,851 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// All rights reserved. -// -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include -#include -#include -#include -#include - -#include "xnnpack.h" -#include "xnnpack/allocator.h" -#include "xnnpack/common.h" -#include "xnnpack/compute.h" -#include "xnnpack/config-types.h" -#include "xnnpack/config.h" -#include "xnnpack/fp16.h" -#include "xnnpack/log.h" -#include "xnnpack/math.h" -#include "xnnpack/microparams.h" -#include "xnnpack/operator-type.h" -#include "xnnpack/operator.h" -#include "xnnpack/params.h" -#include "pthreadpool.h" - -static enum xnn_status create_global_average_pooling_nwc( - uint32_t flags, - uint32_t log2_element_size, - size_t params_offset, - const void* params, - size_t params_size, - enum xnn_operator_type operator_type, - const struct xnn_gavgpool_config* gavgpool_config, - xnn_operator_t* global_average_pooling_op_out) -{ - xnn_operator_t global_average_pooling_op = NULL; - enum xnn_status status = xnn_status_uninitialized; - - if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { - xnn_log_error("failed to create %s operator: XNNPACK is not initialized", - xnn_operator_type_to_string(operator_type)); - goto error; - } - - status = xnn_status_out_of_memory; - - global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator)); - if (global_average_pooling_op == NULL) { - xnn_log_error( - "failed to allocate %zu bytes for %s operator descriptor", - sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type)); - goto error; - } - - memcpy((void*) ((uintptr_t) global_average_pooling_op + params_offset), params, params_size); - - global_average_pooling_op->type = operator_type; - global_average_pooling_op->flags = flags; - global_average_pooling_op->gavgpool_config = gavgpool_config; - - global_average_pooling_op->state = xnn_run_state_invalid; - - *global_average_pooling_op_out = global_average_pooling_op; - return xnn_status_success; - -error: - xnn_delete_operator(global_average_pooling_op); - return status; -} - -enum xnn_status xnn_create_global_average_pooling_nwc_qu8( - uint8_t input_zero_point, - float input_scale, - uint8_t output_zero_point, - float output_scale, - uint8_t output_min, - uint8_t output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out) -{ - if (input_scale <= 0.0f || !isnormal(input_scale)) { - xnn_log_error( - "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_qu8), input_scale); - return xnn_status_invalid_parameter; - } - - if (output_scale <= 0.0f || !isnormal(output_scale)) { - xnn_log_error( - "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_qu8), output_scale); - return xnn_status_invalid_parameter; - } - - if (output_min > output_max) { - xnn_log_error( - "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: lower bound must be less than or equal to upper bound", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_qu8), output_min, output_max); - return xnn_status_invalid_parameter; - } - - const float input_output_scale = input_scale / output_scale; - if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) { - xnn_log_error( - "failed to create %s operator with %.7g input-to-output scale ratio: scale ratio must be in [2**-8, 2**8) range", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_qu8), input_output_scale); - return xnn_status_unsupported_parameter; - } - - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - assert(gavgpool_config != NULL); - - union xnn_qu8_avgpool_minmax_params params; - if (gavgpool_config->init.qu8 != NULL) { - gavgpool_config->init.qu8(¶ms, 0 /* bias */, 1.0f /* scale */, output_zero_point, output_min, output_max); - } - const enum xnn_status status = create_global_average_pooling_nwc( - flags, /*log2_element_size=*/XNN_LOG2_SIZEOF_UINT8_T, - offsetof(struct xnn_operator, params.qu8_gavgpool), - ¶ms, sizeof(params), - xnn_operator_type_global_average_pooling_nwc_qu8, - gavgpool_config, - global_average_pooling_op_out); - if (status == xnn_status_success) { - xnn_operator_t global_average_pooling_op = *global_average_pooling_op_out; - global_average_pooling_op->input_zero_point = (int32_t) (uint32_t) input_zero_point; - global_average_pooling_op->input_scale = input_scale; - global_average_pooling_op->output_scale = output_scale; - } - return status; -} - -enum xnn_status xnn_create_global_average_pooling_nwc_qs8( - int8_t input_zero_point, - float input_scale, - int8_t output_zero_point, - float output_scale, - int8_t output_min, - int8_t output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out) -{ - if (input_scale <= 0.0f || !isnormal(input_scale)) { - xnn_log_error( - "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_qs8), input_scale); - return xnn_status_invalid_parameter; - } - - if (output_scale <= 0.0f || !isnormal(output_scale)) { - xnn_log_error( - "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_qs8), output_scale); - return xnn_status_invalid_parameter; - } - - if (output_min > output_max) { - xnn_log_error( - "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: lower bound must be less than or equal to upper bound", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_qs8), output_min, output_max); - return xnn_status_invalid_parameter; - } - - const float input_output_scale = input_scale / output_scale; - if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) { - xnn_log_error( - "failed to create %s operator with %.7g input-to-output scale ratio: scale ratio must be in [2**-8, 2**8) range", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_qs8), input_output_scale); - return xnn_status_unsupported_parameter; - } - - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - assert(gavgpool_config != NULL); - - union xnn_qs8_avgpool_minmax_params params; - if (gavgpool_config->init.qs8 != NULL) { - gavgpool_config->init.qs8(¶ms, 0 /* bias */, 1.0f /* scale */, output_zero_point, output_min, output_max); - } - const enum xnn_status status = create_global_average_pooling_nwc( - flags, /*log2_element_size=*/XNN_LOG2_SIZEOF_INT8_T, - offsetof(struct xnn_operator, params.qs8_gavgpool), - ¶ms, sizeof(params), - xnn_operator_type_global_average_pooling_nwc_qs8, - gavgpool_config, - global_average_pooling_op_out); - if (status == xnn_status_success) { - xnn_operator_t global_average_pooling_op = *global_average_pooling_op_out; - global_average_pooling_op->input_zero_point = (int32_t) input_zero_point; - global_average_pooling_op->input_scale = input_scale; - global_average_pooling_op->output_scale = output_scale; - } - return status; -} - -enum xnn_status xnn_create_global_average_pooling_nwc_f16( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out) -{ - if (isnan(output_min)) { - xnn_log_error( - "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_f16)); - return xnn_status_invalid_parameter; - } - - if (isnan(output_max)) { - xnn_log_error( - "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_f16)); - return xnn_status_invalid_parameter; - } - - if (fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)) >= fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max))) { - xnn_log_error( - "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_f16), - fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)), - fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max))); - return xnn_status_invalid_parameter; - } - - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == NULL) { - xnn_log_error("failed to create %s operator: unsupported hardware configuration", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_f16)); - return xnn_status_unsupported_hardware; - } - - struct xnn_f16_scaleminmax_params params; - if (gavgpool_config->init.f16 != NULL) { - gavgpool_config->init.f16( - ¶ms, /*scale=*/xnn_float16_from_float(0.0f), xnn_float16_from_float(output_min), xnn_float16_from_float(output_max)); - } - return create_global_average_pooling_nwc( - flags, /*log2_element_size=*/XNN_LOG2_SIZEOF_HALF, - offsetof(struct xnn_operator, params.f16_scaleminmax), - ¶ms, sizeof(params), - xnn_operator_type_global_average_pooling_nwc_f16, - gavgpool_config, - global_average_pooling_op_out); -} - -enum xnn_status xnn_create_global_average_pooling_nwc_f32( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_average_pooling_op_out) -{ - if (isnan(output_min)) { - xnn_log_error( - "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_f32)); - return xnn_status_invalid_parameter; - } - - if (isnan(output_max)) { - xnn_log_error( - "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_f32)); - return xnn_status_invalid_parameter; - } - - if (output_min > output_max) { - xnn_log_error( - "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be less than or equal to upper bound", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_f32), output_min, output_max); - return xnn_status_invalid_parameter; - } - - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - if (gavgpool_config == NULL) { - xnn_log_error("failed to create %s operator: unsupported hardware configuration", - xnn_operator_type_to_string(xnn_operator_type_global_average_pooling_nwc_f32)); - return xnn_status_unsupported_hardware; - } - - struct xnn_f32_scaleminmax_params params; - if (gavgpool_config->init.f32 != NULL) { - gavgpool_config->init.f32(¶ms, 0.0f /* scale */, output_min, output_max); - } - return create_global_average_pooling_nwc( - flags, /*log2_element_size=*/XNN_LOG2_SIZEOF_FLOAT, - offsetof(struct xnn_operator, params.f32_scaleminmax), - ¶ms, sizeof(params), - xnn_operator_type_global_average_pooling_nwc_f32, - gavgpool_config, - global_average_pooling_op_out); -} - -enum xnn_status xnn_create_global_sum_pooling_nwc_f16( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_sum_pooling_op_out) -{ - if (isnan(output_min)) { - xnn_log_error( - "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_sum_pooling_nwc_f16)); - return xnn_status_invalid_parameter; - } - - if (isnan(output_max)) { - xnn_log_error( - "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_sum_pooling_nwc_f16)); - return xnn_status_invalid_parameter; - } - - if (fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)) >= fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max))) { - xnn_log_error( - "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound", - xnn_operator_type_to_string(xnn_operator_type_global_sum_pooling_nwc_f16), - fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)), - fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max))); - return xnn_status_invalid_parameter; - } - - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == NULL) { - xnn_log_error("failed to create %s operator: unsupported hardware configuration", - xnn_operator_type_to_string(xnn_operator_type_global_sum_pooling_nwc_f16)); - return xnn_status_unsupported_hardware; - } - - struct xnn_f16_scaleminmax_params params; - if (gavgpool_config->init.f16 != NULL) { - gavgpool_config->init.f16( - ¶ms, - /*scale=*/xnn_float16_from_float(1.0f), - xnn_float16_from_float(output_min), - xnn_float16_from_float(output_max)); - } - return create_global_average_pooling_nwc( - flags, /*log2_element_size=*/XNN_LOG2_SIZEOF_HALF, - offsetof(struct xnn_operator, params.f16_scaleminmax), - ¶ms, sizeof(params), - xnn_operator_type_global_sum_pooling_nwc_f16, - gavgpool_config, - global_sum_pooling_op_out); -} - -enum xnn_status xnn_create_global_sum_pooling_nwc_f32( - float output_min, - float output_max, - uint32_t flags, - xnn_operator_t* global_sum_pooling_op_out) -{ - if (isnan(output_min)) { - xnn_log_error( - "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_sum_pooling_nwc_f32)); - return xnn_status_invalid_parameter; - } - - if (isnan(output_max)) { - xnn_log_error( - "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN", - xnn_operator_type_to_string(xnn_operator_type_global_sum_pooling_nwc_f32)); - return xnn_status_invalid_parameter; - } - - if (output_min > output_max) { - xnn_log_error( - "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be less than or equal to upper bound", - xnn_operator_type_to_string(xnn_operator_type_global_sum_pooling_nwc_f32), output_min, output_max); - return xnn_status_invalid_parameter; - } - - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - if (gavgpool_config == NULL) { - xnn_log_error("failed to create %s operator: unsupported hardware configuration", - xnn_operator_type_to_string(xnn_operator_type_global_sum_pooling_nwc_f32)); - return xnn_status_unsupported_hardware; - } - - struct xnn_f32_scaleminmax_params params; - if (gavgpool_config->init.f32 != NULL) { - gavgpool_config->init.f32(¶ms, /*scale=*/1.0f, output_min, output_max); - } - return create_global_average_pooling_nwc( - flags, /*log2_element_size=*/XNN_LOG2_SIZEOF_FLOAT, - offsetof(struct xnn_operator, params.f32_scaleminmax), - ¶ms, sizeof(params), - xnn_operator_type_global_sum_pooling_nwc_f32, - gavgpool_config, - global_sum_pooling_op_out); -} - -static enum xnn_status reshape_global_average_pooling_nwc( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - size_t log2_data_element_size, - size_t log2_accumulator_element_size, - const struct xnn_gavgpool_config gavgpool[restrict XNN_MIN_ELEMENTS(1)], - enum xnn_operator_type expected_operator_type, - const void* params, - size_t params_size, - void (*update_params)(xnn_operator_t, size_t), - pthreadpool_t threadpool) -{ - if (global_average_pooling_op->type != expected_operator_type) { - xnn_log_error("failed to reshape operator: operator type mismatch (expected %s, got %s)", - xnn_operator_type_to_string(expected_operator_type), - xnn_operator_type_to_string(global_average_pooling_op->type)); - return xnn_status_invalid_parameter; - } - global_average_pooling_op->state = xnn_run_state_invalid; - - if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { - xnn_log_error("failed to reshape %s operator: XNNPACK is not initialized", - xnn_operator_type_to_string(global_average_pooling_op->type)); - return xnn_status_uninitialized; - } - - if (channels == 0) { - xnn_log_error( - "failed to create %s operator with %zu channels: number of channels must be non-zero", - xnn_operator_type_to_string(expected_operator_type), channels); - return xnn_status_invalid_parameter; - } - - if (input_stride < channels) { - xnn_log_error( - "failed to create %s operator with input element stride of %zu: " - "stride must be at least as large as the number of channels (%zu)", - xnn_operator_type_to_string(expected_operator_type), input_stride, channels); - return xnn_status_invalid_parameter; - } - - if (output_stride < channels) { - xnn_log_error( - "failed to create %s operator with output element stride of %zu: " - "stride must be at least as large as the number of channels (%zu)", - xnn_operator_type_to_string(expected_operator_type), output_stride, channels); - return xnn_status_invalid_parameter; - } - - global_average_pooling_op->channels = channels; - global_average_pooling_op->input_pixel_stride = input_stride; - global_average_pooling_op->output_pixel_stride = output_stride; - - if (width == 0) { - xnn_log_error("failed to reshape %s operator with width %zu: width must be non-zero", - xnn_operator_type_to_string(global_average_pooling_op->type), width); - return xnn_status_invalid_parameter; - } - - if (batch_size == 0) { - global_average_pooling_op->state = xnn_run_state_skip; - return xnn_status_success; - } - - global_average_pooling_op->batch_size = batch_size; - global_average_pooling_op->input_width = width; - - if (update_params != NULL) { - update_params(global_average_pooling_op, width); - } - - const bool input_size_changed = (channels != global_average_pooling_op->last_input_channels); - if (input_size_changed) { - const size_t zero_bytes = (channels << log2_data_element_size) + XNN_EXTRA_BYTES; - void* zero_buffer = global_average_pooling_op->zero_buffer; - xnn_release_simd_memory(zero_buffer); - zero_buffer = - (void*) xnn_allocate_zero_simd_memory(zero_bytes); - global_average_pooling_op->zero_buffer = zero_buffer; - if (zero_buffer == NULL) { - xnn_log_error( - "failed to allocate %zu bytes for %s operator zero padding", - zero_bytes, xnn_operator_type_to_string(expected_operator_type)); - return xnn_status_out_of_memory; - } - global_average_pooling_op->zero_buffer = zero_buffer; - global_average_pooling_op->last_input_channels = channels; - } - - assert(gavgpool->row_tile != 0); - - const size_t input_stride_in_bytes = global_average_pooling_op->input_pixel_stride << log2_data_element_size; - global_average_pooling_op->context.global_average_pooling_nwc = (struct global_average_pooling_nwc_context) { - .zero = global_average_pooling_op->zero_buffer, - .input_pixel_stride = input_stride_in_bytes, - .input_batch_stride = input_stride_in_bytes * width, - .input_elements = width, - .channels = channels, - .output_batch_stride = (global_average_pooling_op->output_pixel_stride << log2_data_element_size), - }; - memcpy(&global_average_pooling_op->context.global_average_pooling_nwc.params, params, params_size); - global_average_pooling_op->compute[0].range[0] = batch_size; - - - if (width <= gavgpool->row_tile) { - *workspace_size = 0; - *workspace_alignment = 1; - global_average_pooling_op->compute[0].type = xnn_parallelization_type_1d; - global_average_pooling_op->compute[0].task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_unipass; - global_average_pooling_op->context.global_average_pooling_nwc.unipass_ukernel = gavgpool->unipass; - } else { - const size_t multipass_batch_stride = - round_up_po2( - (channels + (XNN_MULTIPASS_EXTRA_BYTES >> log2_data_element_size)) << log2_accumulator_element_size, - XNN_ALLOCATION_ALIGNMENT); - global_average_pooling_op->context.global_average_pooling_nwc.multipass_batch_stride = multipass_batch_stride; - - const size_t num_threads = pthreadpool_get_threads_count(threadpool); - const bool use_threads_workspace_size = num_threads < batch_size; - if (use_threads_workspace_size) { - *workspace_size = num_threads * multipass_batch_stride; - *workspace_alignment = XNN_ALLOCATION_ALIGNMENT; - global_average_pooling_op->compute[0].type = xnn_parallelization_type_1d_with_thread; - global_average_pooling_op->compute[0].task_1d_with_thread = - (pthreadpool_task_1d_with_thread_t) xnn_compute_global_average_pooling_nwc_multipass_with_thread; - } else { - *workspace_size = batch_size * multipass_batch_stride; - *workspace_alignment = XNN_ALLOCATION_ALIGNMENT; - global_average_pooling_op->compute[0].type = xnn_parallelization_type_1d; - global_average_pooling_op->compute[0].task_1d = - (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_multipass; - } - - global_average_pooling_op->context.global_average_pooling_nwc.multipass_ukernel = gavgpool->multipass; - } - global_average_pooling_op->state = xnn_run_state_needs_setup; - - return xnn_status_success; -} - -static void update_params_qu8( - xnn_operator_t global_average_pooling_op, - size_t width) -{ - const int32_t bias = -((int32_t) width * global_average_pooling_op->input_zero_point); - const float scale = global_average_pooling_op->input_scale / (global_average_pooling_op->output_scale * (float) width); - global_average_pooling_op->gavgpool_config->update.qu8(&global_average_pooling_op->params.qu8_gavgpool, bias, scale); -} - -enum xnn_status xnn_reshape_global_average_pooling_nwc_qu8( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool) -{ - return reshape_global_average_pooling_nwc( - global_average_pooling_op, - batch_size, width, channels, input_stride, output_stride, - workspace_size, workspace_alignment, - /*log2_data_element_size=*/XNN_LOG2_SIZEOF_UINT8_T, - /*log2_accumulator_element_size=*/XNN_LOG2_SIZEOF_INT32_T, - global_average_pooling_op->gavgpool_config, - xnn_operator_type_global_average_pooling_nwc_qu8, - &global_average_pooling_op->params.qu8_gavgpool, - sizeof(global_average_pooling_op->params.qu8_gavgpool), - update_params_qu8, - threadpool); -} - -static void update_params_qs8( - xnn_operator_t global_average_pooling_op, - size_t width) -{ - const int32_t bias = -((int32_t) width * global_average_pooling_op->input_zero_point); - const float scale = global_average_pooling_op->input_scale / (global_average_pooling_op->output_scale * (float) width); - global_average_pooling_op->gavgpool_config->update.qs8(&global_average_pooling_op->params.qs8_gavgpool, bias, scale); -} - -enum xnn_status xnn_reshape_global_average_pooling_nwc_qs8( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool) -{ - return reshape_global_average_pooling_nwc( - global_average_pooling_op, - batch_size, width, channels, input_stride, output_stride, - workspace_size, workspace_alignment, - /*log2_data_element_size=*/XNN_LOG2_SIZEOF_INT8_T, - /*log2_accumulator_element_size=*/XNN_LOG2_SIZEOF_INT32_T, - global_average_pooling_op->gavgpool_config, - xnn_operator_type_global_average_pooling_nwc_qs8, - &global_average_pooling_op->params.qs8_gavgpool, - sizeof(global_average_pooling_op->params.qs8_gavgpool), - update_params_qs8, - threadpool); -} - -static void update_params_f16( - xnn_operator_t global_average_pooling_op, - size_t width) -{ - global_average_pooling_op->gavgpool_config->update.f16( - &global_average_pooling_op->params.f16_scaleminmax, - xnn_float16_from_float(1.0f / (float) width)); -} - -enum xnn_status xnn_reshape_global_average_pooling_nwc_f16( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool) -{ - return reshape_global_average_pooling_nwc( - global_average_pooling_op, - batch_size, width, channels, input_stride, output_stride, - workspace_size, workspace_alignment, - /*log2_data_element_size=*/XNN_LOG2_SIZEOF_HALF, - /*log2_accumulator_element_size=*/XNN_LOG2_SIZEOF_HALF, - global_average_pooling_op->gavgpool_config, - xnn_operator_type_global_average_pooling_nwc_f16, - &global_average_pooling_op->params.f16_scaleminmax, - sizeof(global_average_pooling_op->params.f16_scaleminmax), - update_params_f16, - threadpool); -} - -static void update_params_f32( - xnn_operator_t global_average_pooling_op, - size_t width) -{ - global_average_pooling_op->gavgpool_config->update.f32( - &global_average_pooling_op->params.f32_scaleminmax, 1.0f / (float) width); -} - -enum xnn_status xnn_reshape_global_average_pooling_nwc_f32( - xnn_operator_t global_average_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool) -{ - return reshape_global_average_pooling_nwc( - global_average_pooling_op, - batch_size, width, channels, input_stride, output_stride, - workspace_size, workspace_alignment, - /*log2_data_element_size=*/XNN_LOG2_SIZEOF_FLOAT, - /*log2_accumulator_element_size=*/XNN_LOG2_SIZEOF_FLOAT, - global_average_pooling_op->gavgpool_config, - xnn_operator_type_global_average_pooling_nwc_f32, - &global_average_pooling_op->params.f32_scaleminmax, - sizeof(global_average_pooling_op->params.f32_scaleminmax), - update_params_f32, - threadpool); -} - -static enum xnn_status setup_global_average_pooling_nwc( - xnn_operator_t global_average_pooling_op, - enum xnn_operator_type expected_operator_type, - void* workspace, - const void* input, - void* output) -{ - if (global_average_pooling_op->type != expected_operator_type) { - xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)", - xnn_operator_type_to_string(expected_operator_type), - xnn_operator_type_to_string(global_average_pooling_op->type)); - return xnn_status_invalid_parameter; - } - - switch (global_average_pooling_op->state) { - case xnn_run_state_skip: - return xnn_status_success; - case xnn_run_state_invalid: - xnn_log_error( - "failed to setup %s operator: operator has not been reshaped yet", - xnn_operator_type_to_string(global_average_pooling_op->type)); - return xnn_status_invalid_state; - case xnn_run_state_needs_setup: - // Operator has been reshaped, but not setup, continue with setup. - case xnn_run_state_ready: - // Operator has been reshaped, and we are setting up with different pointers. - break; - } - - struct global_average_pooling_nwc_context* context = &global_average_pooling_op->context.global_average_pooling_nwc; - if (context->multipass_batch_stride != 0 && workspace == NULL) { - xnn_log_error( - "failed to setup %s operator: workspace of size %zu required but workspace is NULL", - xnn_operator_type_to_string(global_average_pooling_op->type), context->multipass_batch_stride); - return xnn_status_invalid_state; - } - - context->input = input; - context->output = output; - context->multipass_buffer = workspace; - - global_average_pooling_op->state = xnn_run_state_ready; - - return xnn_status_success; -} - -enum xnn_status xnn_setup_global_average_pooling_nwc_qu8( - xnn_operator_t global_average_pooling_op, - void* workspace, - const uint8_t* input, - uint8_t* output) -{ - return setup_global_average_pooling_nwc( - global_average_pooling_op, - xnn_operator_type_global_average_pooling_nwc_qu8, - workspace, - input, output); -} - -enum xnn_status xnn_setup_global_average_pooling_nwc_qs8( - xnn_operator_t global_average_pooling_op, - void* workspace, - const int8_t* input, - int8_t* output) -{ - return setup_global_average_pooling_nwc( - global_average_pooling_op, - xnn_operator_type_global_average_pooling_nwc_qs8, - workspace, - input, output); -} - -enum xnn_status xnn_setup_global_average_pooling_nwc_f16( - xnn_operator_t global_average_pooling_op, - void* workspace, - const void* input, - void* output) -{ - return setup_global_average_pooling_nwc( - global_average_pooling_op, - xnn_operator_type_global_average_pooling_nwc_f16, - workspace, - input, output); -} - -enum xnn_status xnn_setup_global_average_pooling_nwc_f32( - xnn_operator_t global_average_pooling_op, - void* workspace, - const float* input, - float* output) -{ - return setup_global_average_pooling_nwc( - global_average_pooling_op, - xnn_operator_type_global_average_pooling_nwc_f32, - workspace, - input, output); -} - -enum xnn_status xnn_reshape_global_sum_pooling_nwc_f16( - xnn_operator_t global_sum_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool) -{ - return reshape_global_average_pooling_nwc( - global_sum_pooling_op, - batch_size, width, channels, input_stride, output_stride, - workspace_size, workspace_alignment, - /*log2_data_element_size=*/XNN_LOG2_SIZEOF_HALF, - /*log2_accumulator_element_size=*/XNN_LOG2_SIZEOF_HALF, - global_sum_pooling_op->gavgpool_config, - xnn_operator_type_global_sum_pooling_nwc_f16, - &global_sum_pooling_op->params.f16_scaleminmax, - sizeof(global_sum_pooling_op->params.f16_scaleminmax), - /*update_params=*/NULL, - threadpool); -} - -enum xnn_status xnn_reshape_global_sum_pooling_nwc_f32( - xnn_operator_t global_sum_pooling_op, - size_t batch_size, - size_t width, - size_t channels, - size_t input_stride, - size_t output_stride, - size_t* workspace_size, - size_t* workspace_alignment, - pthreadpool_t threadpool) -{ - return reshape_global_average_pooling_nwc( - global_sum_pooling_op, - batch_size, width, channels, input_stride, output_stride, - workspace_size, workspace_alignment, - /*log2_data_element_size=*/XNN_LOG2_SIZEOF_FLOAT, - /*log2_accumulator_element_size=*/XNN_LOG2_SIZEOF_FLOAT, - global_sum_pooling_op->gavgpool_config, - xnn_operator_type_global_sum_pooling_nwc_f32, - &global_sum_pooling_op->params.f32_scaleminmax, - sizeof(global_sum_pooling_op->params.f32_scaleminmax), - /*update_params=*/NULL, - threadpool); -} - -enum xnn_status xnn_setup_global_sum_pooling_nwc_f16( - xnn_operator_t global_sum_pooling_op, - void* workspace, - const void* input, - void* output) -{ - return setup_global_average_pooling_nwc( - global_sum_pooling_op, - xnn_operator_type_global_sum_pooling_nwc_f16, - workspace, - input, output); -} - -enum xnn_status xnn_setup_global_sum_pooling_nwc_f32( - xnn_operator_t global_sum_pooling_op, - void* workspace, - const float* input, - float* output) -{ - return setup_global_average_pooling_nwc( - global_sum_pooling_op, - xnn_operator_type_global_sum_pooling_nwc_f32, - workspace, - input, output); -} diff --git a/src/subgraph/deprecated.c b/src/subgraph/deprecated.c index b5165d691cd..a86e5a68206 100644 --- a/src/subgraph/deprecated.c +++ b/src/subgraph/deprecated.c @@ -1,7 +1,9 @@ +#include #include #include #include "xnnpack.h" +#include "xnnpack/subgraph.h" enum xnn_status xnn_define_add2(xnn_subgraph_t subgraph, float output_min, float output_max, uint32_t input1_id, @@ -92,3 +94,121 @@ enum xnn_status xnn_define_static_mean(xnn_subgraph_t subgraph, return xnn_define_static_reduce(subgraph, xnn_reduce_mean, num_reduction_axes, reduction_axes, input_id, output_id, flags); } + +enum xnn_status xnn_define_global_average_pooling_1d( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags) +{ + const struct xnn_value* input_value = &subgraph->values[input_id]; + + size_t reduction_axes[XNN_MAX_TENSOR_DIMS]; + + reduction_axes[0] = input_value->shape.num_dims - 2; + + enum xnn_status status = (xnn_define_static_reduce( + subgraph, xnn_reduce_mean, 1, reduction_axes, input_id, + output_id, flags)); + + if (status != xnn_status_success) { + return status; + } + + if (output_min != -INFINITY || output_max != INFINITY) { + return xnn_insert_clamp_node(subgraph, output_min, output_max, + &subgraph->nodes[subgraph->num_nodes - 1]); + } + + return xnn_status_success; +} + +enum xnn_status xnn_define_global_average_pooling_2d( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags) +{ + const struct xnn_value* input_value = &subgraph->values[input_id]; + + size_t reduction_axes[XNN_MAX_TENSOR_DIMS]; + + reduction_axes[0] = input_value->shape.num_dims - 3; + reduction_axes[1] = input_value->shape.num_dims - 2; + + enum xnn_status status = xnn_define_static_reduce( + subgraph, xnn_reduce_mean, 2, reduction_axes, input_id, + output_id, flags); + + if (status != xnn_status_success) { + return status; + } + + if (output_min != -INFINITY || output_max != INFINITY) { + return xnn_insert_clamp_node(subgraph, output_min, output_max, + &subgraph->nodes[subgraph->num_nodes - 1]); + } + + return xnn_status_success; +} + +enum xnn_status xnn_define_global_sum_pooling_1d( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags) +{ + const struct xnn_value* input_value = &subgraph->values[input_id]; + size_t reduction_axes[XNN_MAX_TENSOR_DIMS]; + reduction_axes[0] = input_value->shape.num_dims - 2; + + enum xnn_status status = xnn_define_static_reduce( + subgraph, xnn_reduce_sum, 1, reduction_axes, input_id, + output_id, flags); + + if (status != xnn_status_success) { + return status; + } + + if (output_min != -INFINITY || output_max != INFINITY) { + return xnn_insert_clamp_node(subgraph, output_min, output_max, + &subgraph->nodes[subgraph->num_nodes - 1]); + } + + return xnn_status_success; +} + +enum xnn_status xnn_define_global_sum_pooling_2d( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags) +{ + const struct xnn_value* input_value = &subgraph->values[input_id]; + size_t reduction_axes[XNN_MAX_TENSOR_DIMS]; + reduction_axes[0] = input_value->shape.num_dims - 3; + reduction_axes[1] = input_value->shape.num_dims - 2; + + enum xnn_status status = xnn_define_static_reduce( + subgraph, xnn_reduce_sum, 2, reduction_axes, input_id, + output_id, flags); + + if (status != xnn_status_success) { + return status; + } + + if (output_min != -INFINITY || output_max != INFINITY) { + return xnn_insert_clamp_node(subgraph, output_min, output_max, + &subgraph->nodes[subgraph->num_nodes - 1]); + } + + return xnn_status_success; +} diff --git a/src/subgraph/global-average-pooling.c b/src/subgraph/global-average-pooling.c deleted file mode 100644 index 4674b099306..00000000000 --- a/src/subgraph/global-average-pooling.c +++ /dev/null @@ -1,442 +0,0 @@ -// Copyright 2020 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include -#include -#include - -#include "xnnpack.h" -#include "xnnpack/common.h" -#include "xnnpack/log.h" -#include "xnnpack/node-type.h" -#include "xnnpack/operator-type.h" -#include "xnnpack/operator.h" -#include "xnnpack/requantization.h" -#include "xnnpack/subgraph-validation.h" -#include "xnnpack/subgraph.h" -#include "pthreadpool.h" - -static enum xnn_status create_global_average_pooling_operator( - const struct xnn_node* node, - const struct xnn_value* values, - size_t num_values, - struct xnn_operator_data* opdata, - struct xnn_code_cache* code_cache, - xnn_weights_cache_t weights_cache) -{ - assert(node->num_inputs == 1); - const uint32_t input_id = node->inputs[0]; - assert(input_id != XNN_INVALID_VALUE_ID); - assert(input_id < num_values); - - assert(node->num_outputs == 1); - const uint32_t output_id = node->outputs[0]; - assert(output_id != XNN_INVALID_VALUE_ID); - assert(output_id < num_values); - - enum xnn_status status; - const struct xnn_value *input_value = &values[input_id]; - const enum xnn_datatype datatype = input_value->datatype; - if (input_value->layout == xnn_layout_type_nchw) { - assert(datatype == xnn_datatype_fp32 || datatype == xnn_datatype_fp16); - switch (datatype) { - case xnn_datatype_fp32: - status = xnn_create_global_average_pooling_ncw_f32( - node->activation.output_min, - node->activation.output_max, - node->flags, - &opdata->operator_objects[0]); - break; - case xnn_datatype_fp16: - status = xnn_create_global_average_pooling_ncw_f16( - node->activation.output_min, - node->activation.output_max, - node->flags, - &opdata->operator_objects[0]); - break; - default: - XNN_UNREACHABLE; - } - } else { - assert(values[node->inputs[0]].layout == xnn_layout_type_nhwc); - assert(values[node->outputs[0]].layout == xnn_layout_type_nhwc); - switch (datatype) { - case xnn_datatype_fp32: - status = xnn_create_global_average_pooling_nwc_f32( - node->activation.output_min, - node->activation.output_max, - node->flags, - &opdata->operator_objects[0]); - break; - case xnn_datatype_fp16: - status = xnn_create_global_average_pooling_nwc_f16( - node->activation.output_min, - node->activation.output_max, - node->flags, - &opdata->operator_objects[0]); - break; - case xnn_datatype_qint8: - { - const float output_scale = values[output_id].quantization.scale; - const int32_t output_zero_point = values[output_id].quantization.zero_point; - const int8_t output_min = xnn_qs8_quantize(node->activation.output_min, output_scale, output_zero_point); - const int8_t output_max = xnn_qs8_quantize(node->activation.output_max, output_scale, output_zero_point); - status = xnn_create_global_average_pooling_nwc_qs8( - (int8_t) values[input_id].quantization.zero_point, values[input_id].quantization.scale, - (int8_t) values[output_id].quantization.zero_point, values[output_id].quantization.scale, - output_min, - output_max, - node->flags, - &opdata->operator_objects[0]); - break; - } - case xnn_datatype_quint8: - { - const float output_scale = values[output_id].quantization.scale; - const int32_t output_zero_point = values[output_id].quantization.zero_point; - const uint8_t output_min = xnn_qu8_quantize(node->activation.output_min, output_scale, output_zero_point); - const uint8_t output_max = xnn_qu8_quantize(node->activation.output_max, output_scale, output_zero_point); - status = xnn_create_global_average_pooling_nwc_qu8( - (uint8_t) values[input_id].quantization.zero_point, values[input_id].quantization.scale, - (uint8_t) values[output_id].quantization.zero_point, values[output_id].quantization.scale, - output_min, - output_max, - node->flags, - &opdata->operator_objects[0]); - break; - } - default: - XNN_UNREACHABLE; - } - } - return status; -} - -static enum xnn_status reshape_global_average_pooling_operator( - struct xnn_operator_data* opdata, - struct xnn_value* values, - size_t num_values, - pthreadpool_t threadpool) -{ - const uint32_t input_id = opdata->inputs[0]; - assert(input_id < num_values); - const size_t num_input_dims = values[input_id].shape.num_dims; - assert(num_input_dims >= 1); - size_t batch_size, input_width, num_batch_dims; - switch (opdata->type) { - - case xnn_node_type_global_average_pooling_1d: - num_batch_dims = num_input_dims - 2; - batch_size = xnn_shape_multiply_batch_dims(&values[input_id].shape, 2); - input_width = values[input_id].shape.dim[num_input_dims - 2]; - break; - case xnn_node_type_global_average_pooling_2d: - num_batch_dims = num_input_dims - 3; - batch_size = xnn_shape_multiply_batch_dims(&values[input_id].shape, 3); - input_width = values[input_id].shape.dim[num_input_dims - 3] * values[input_id].shape.dim[num_input_dims - 2]; - break; - default: - XNN_UNREACHABLE; - } - const size_t channel_dim = values[input_id].shape.dim[num_input_dims - 1]; - enum xnn_status status = xnn_status_invalid_state; - const size_t old_workspace_size = opdata->workspace_size; - switch (opdata->operator_objects[0]->type) { - case xnn_operator_type_global_average_pooling_ncw_f32: - status = xnn_reshape_global_average_pooling_ncw_f32( - opdata->operator_objects[0], - batch_size, - input_width, - channel_dim, - threadpool); - break; - case xnn_operator_type_global_average_pooling_ncw_f16: - status = xnn_reshape_global_average_pooling_ncw_f16( - opdata->operator_objects[0], - batch_size, - input_width, - channel_dim, - threadpool); - break; - case xnn_operator_type_global_average_pooling_nwc_f32: - status = xnn_reshape_global_average_pooling_nwc_f32( - opdata->operator_objects[0], - batch_size, - input_width, - /*channels=*/channel_dim, - /*input_stride=*/channel_dim, - /*output_stride=*/channel_dim, - &opdata->workspace_size, &opdata->workspace_alignment, - threadpool); - break; - case xnn_operator_type_global_average_pooling_nwc_f16: - status = xnn_reshape_global_average_pooling_nwc_f16( - opdata->operator_objects[0], - batch_size, - input_width, - /*channels=*/channel_dim, - /*input_stride=*/channel_dim, - /*output_stride=*/channel_dim, - &opdata->workspace_size, &opdata->workspace_alignment, - threadpool); - break; - case xnn_operator_type_global_average_pooling_nwc_qs8: - status = xnn_reshape_global_average_pooling_nwc_qs8( - opdata->operator_objects[0], - batch_size, - input_width, - /*channels=*/channel_dim, - /*input_stride=*/channel_dim, - /*output_stride=*/channel_dim, - &opdata->workspace_size, &opdata->workspace_alignment, - threadpool); - break; - case xnn_operator_type_global_average_pooling_nwc_qu8: - status = xnn_reshape_global_average_pooling_nwc_qu8( - opdata->operator_objects[0], - batch_size, - input_width, - /*channels=*/channel_dim, - /*input_stride=*/channel_dim, - /*output_stride=*/channel_dim, - &opdata->workspace_size, &opdata->workspace_alignment, - threadpool); - break; - default: - XNN_UNREACHABLE; - } - if (status != xnn_status_success) { - return status; - } - const uint32_t output_id = opdata->outputs[0]; - assert(output_id != XNN_INVALID_VALUE_ID); - assert(output_id < num_values); - struct xnn_value* output_value = values + output_id; - - memcpy(&output_value->shape.dim[0], &values[input_id].shape.dim[0], num_batch_dims); - if (opdata->operator_objects[0]->flags & XNN_FLAG_KEEP_DIMS) { - output_value->shape.num_dims = num_input_dims; - output_value->shape.dim[num_input_dims - 1] = channel_dim; - switch (opdata->type) { - case xnn_node_type_global_average_pooling_1d: - output_value->shape.dim[num_batch_dims] = 1; - break; - case xnn_node_type_global_average_pooling_2d: - output_value->shape.dim[num_batch_dims] = 1; - output_value->shape.dim[num_batch_dims + 1] = 1; - break; - default: - XNN_UNREACHABLE; - } - } else { - output_value->shape.dim[num_batch_dims] = channel_dim; - output_value->shape.num_dims = num_batch_dims + 1; - } - const size_t new_size = xnn_tensor_get_size(output_value); - if (new_size > output_value->size || opdata->workspace_size > old_workspace_size) { - output_value->size = new_size; - return xnn_status_reallocation_required; - } - return xnn_status_success; -} - -static enum xnn_status setup_global_average_pooling_operator( - const struct xnn_operator_data* opdata, - const struct xnn_value* values, - size_t num_values, - pthreadpool_t threadpool) -{ - const uint32_t input_id = opdata->inputs[0]; - assert(input_id != XNN_INVALID_VALUE_ID); - assert(input_id < num_values); - - const uint32_t output_id = opdata->outputs[0]; - assert(output_id != XNN_INVALID_VALUE_ID); - assert(output_id < num_values); - - const struct xnn_value* input_value = values + input_id; - const void* input_data = input_value->data; - assert(input_data != NULL); - - const struct xnn_value* output_value = values + output_id; - void* output_data = output_value->data; - assert(output_data != NULL); - - switch (opdata->operator_objects[0]->type) { - case xnn_operator_type_global_average_pooling_ncw_f32: - return xnn_setup_global_average_pooling_ncw_f32( - opdata->operator_objects[0], - input_data, - output_data); - break; - case xnn_operator_type_global_average_pooling_ncw_f16: - return xnn_setup_global_average_pooling_ncw_f16( - opdata->operator_objects[0], - input_data, - output_data); - break; - case xnn_operator_type_global_average_pooling_nwc_f32: - return xnn_setup_global_average_pooling_nwc_f32( - opdata->operator_objects[0], - opdata->workspace, - input_data, - output_data); - break; - case xnn_operator_type_global_average_pooling_nwc_f16: - return xnn_setup_global_average_pooling_nwc_f16( - opdata->operator_objects[0], - opdata->workspace, - input_data, - output_data); - break; - case xnn_operator_type_global_average_pooling_nwc_qs8: - return xnn_setup_global_average_pooling_nwc_qs8( - opdata->operator_objects[0], - opdata->workspace, - input_data, - output_data); - break; - case xnn_operator_type_global_average_pooling_nwc_qu8: - return xnn_setup_global_average_pooling_nwc_qu8( - opdata->operator_objects[0], - opdata->workspace, - input_data, - output_data); - break; - default: - XNN_UNREACHABLE; - } -} - -static enum xnn_status define_global_average_pooling_nd( - xnn_subgraph_t subgraph, - enum xnn_node_type node_type, - float output_min, - float output_max, - uint32_t input_id, - uint32_t output_id, - uint32_t flags) -{ - enum xnn_status status; - if ((status = xnn_subgraph_check_xnnpack_initialized(node_type)) != xnn_status_success) { - return status; - } - - status = xnn_subgraph_check_output_min_max(node_type, output_min, output_max); - if (status != xnn_status_success) { - return status; - } - - status = xnn_subgraph_check_input_node_id(node_type, input_id, subgraph->num_values); - if (status != xnn_status_success) { - return status; - } - - const struct xnn_value* input_value = &subgraph->values[input_id]; - status = xnn_subgraph_check_input_type_dense(node_type, input_id, input_value); - if (status != xnn_status_success) { - return status; - } - - switch (input_value->datatype) { - case xnn_datatype_fp16: - case xnn_datatype_fp32: - case xnn_datatype_qint8: - case xnn_datatype_quint8: - break; - default: - xnn_log_error( - "failed to define %s operator with input ID #%" PRIu32 ": unsupported Value datatype %s (%d)", - xnn_node_type_to_string(node_type), input_id, - xnn_datatype_to_string(input_value->datatype), input_value->datatype); - return xnn_status_invalid_parameter; - } - - status = xnn_subgraph_check_output_node_id(node_type, output_id, subgraph->num_values); - if (status != xnn_status_success) { - return status; - } - - const struct xnn_value* output_value = &subgraph->values[output_id]; - status = xnn_subgraph_check_output_type_dense(node_type, output_id, output_value); - if (status != xnn_status_success) { - return status; - } - - enum xnn_compute_type compute_type = xnn_compute_type_invalid; - switch (output_value->datatype) { - case xnn_datatype_fp16: - compute_type = xnn_compute_type_fp16; - break; - case xnn_datatype_fp32: - compute_type = xnn_compute_type_fp32; - break; - case xnn_datatype_qint8: - compute_type = xnn_compute_type_qs8; - break; - case xnn_datatype_quint8: - compute_type = xnn_compute_type_qu8; - break; - default: - xnn_log_error( - "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value datatype %s (%d)", - xnn_node_type_to_string(node_type), output_id, - xnn_datatype_to_string(output_value->datatype), output_value->datatype); - return xnn_status_invalid_parameter; - } - - status = xnn_subgraph_check_datatype_matches( - node_type, input_id, input_value, output_id, output_value); - if (status != xnn_status_success) { - return status; - } - - struct xnn_node* node = xnn_subgraph_new_node(subgraph); - if (node == NULL) { - return xnn_status_out_of_memory; - } - - node->type = node_type; - node->compute_type = compute_type; - node->activation.output_min = output_min; - node->activation.output_max = output_max; - node->num_inputs = 1; - node->inputs[0] = input_id; - node->num_outputs = 1; - node->outputs[0] = output_id; - node->flags = flags; - - node->create = create_global_average_pooling_operator; - node->reshape = reshape_global_average_pooling_operator; - node->setup = setup_global_average_pooling_operator; - - return xnn_status_success; -} - -enum xnn_status xnn_define_global_average_pooling_1d( - xnn_subgraph_t subgraph, - float output_min, - float output_max, - uint32_t input_id, - uint32_t output_id, - uint32_t flags) -{ - return define_global_average_pooling_nd( - subgraph, xnn_node_type_global_average_pooling_1d, output_min, output_max, input_id, output_id, flags); -} - -enum xnn_status xnn_define_global_average_pooling_2d( - xnn_subgraph_t subgraph, - float output_min, - float output_max, - uint32_t input_id, - uint32_t output_id, - uint32_t flags) -{ - return define_global_average_pooling_nd( - subgraph, xnn_node_type_global_average_pooling_2d, output_min, output_max, input_id, output_id, flags); -} diff --git a/src/subgraph/global-sum-pooling.c b/src/subgraph/global-sum-pooling.c deleted file mode 100644 index e60533a047b..00000000000 --- a/src/subgraph/global-sum-pooling.c +++ /dev/null @@ -1,315 +0,0 @@ -// Copyright 2020 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include -#include -#include - -#include "xnnpack.h" -#include "xnnpack/common.h" -#include "xnnpack/log.h" -#include "xnnpack/node-type.h" -#include "xnnpack/operator-type.h" -#include "xnnpack/operator.h" -#include "xnnpack/subgraph-validation.h" -#include "xnnpack/subgraph.h" -#include "pthreadpool.h" - -static enum xnn_status create_global_sum_pooling_operator( - const struct xnn_node* node, - const struct xnn_value* values, - size_t num_values, - struct xnn_operator_data* opdata, - struct xnn_code_cache* code_cache, - xnn_weights_cache_t weights_cache) -{ - assert(node->num_inputs == 1); - assert(node->num_outputs == 1); - - enum xnn_status status; - const uint32_t input_id = node->inputs[0]; - assert(input_id != XNN_INVALID_VALUE_ID); - assert(input_id < num_values); - - const struct xnn_value *input_value = &values[input_id]; - const enum xnn_datatype datatype = input_value->datatype; - - assert(input_value->layout == xnn_layout_type_nhwc); - assert(values[node->outputs[0]].layout == xnn_layout_type_nhwc); - switch (datatype) { - case xnn_datatype_fp32: - status = xnn_create_global_sum_pooling_nwc_f32( - node->activation.output_min, - node->activation.output_max, - node->flags, - &opdata->operator_objects[0]); - break; - case xnn_datatype_fp16: - status = xnn_create_global_sum_pooling_nwc_f16( - node->activation.output_min, - node->activation.output_max, - node->flags, - &opdata->operator_objects[0]); - break; - default: - XNN_UNREACHABLE; - } - return status; -} - -static enum xnn_status reshape_global_sum_pooling_operator( - struct xnn_operator_data* opdata, - struct xnn_value* values, - size_t num_values, - pthreadpool_t threadpool) -{ - const uint32_t input_id = opdata->inputs[0]; - assert(input_id < num_values); - const size_t num_input_dims = values[input_id].shape.num_dims; - assert(num_input_dims >= 1); - size_t batch_size, input_width, num_batch_dims; - switch (opdata->type) { - case xnn_node_type_global_sum_pooling_1d: - num_batch_dims = num_input_dims - 2; - batch_size = xnn_shape_multiply_batch_dims(&values[input_id].shape, 2); - input_width = values[input_id].shape.dim[num_input_dims - 2]; - break; - case xnn_node_type_global_sum_pooling_2d: - num_batch_dims = num_input_dims - 3; - batch_size = xnn_shape_multiply_batch_dims(&values[input_id].shape, 3); - input_width = values[input_id].shape.dim[num_input_dims - 3] * values[input_id].shape.dim[num_input_dims - 2]; - break; - default: - XNN_UNREACHABLE; - } - const size_t channel_dim = values[input_id].shape.dim[num_input_dims - 1]; - enum xnn_status status = xnn_status_invalid_state; - const size_t old_workspace_size = opdata->workspace_size; - switch (opdata->operator_objects[0]->type) { - case xnn_operator_type_global_sum_pooling_nwc_f32: - status = xnn_reshape_global_sum_pooling_nwc_f32( - opdata->operator_objects[0], - batch_size, - input_width, - /*channels=*/channel_dim, - /*input_stride=*/channel_dim, - /*output_stride=*/channel_dim, - &opdata->workspace_size, &opdata->workspace_alignment, - threadpool); - break; - case xnn_operator_type_global_sum_pooling_nwc_f16: - status = xnn_reshape_global_sum_pooling_nwc_f16( - opdata->operator_objects[0], - batch_size, - input_width, - /*channels=*/channel_dim, - /*input_stride=*/channel_dim, - /*output_stride=*/channel_dim, - &opdata->workspace_size, &opdata->workspace_alignment, - threadpool); - break; - default: - XNN_UNREACHABLE; - } - if (status != xnn_status_success) { - return status; - } - const uint32_t output_id = opdata->outputs[0]; - assert(output_id != XNN_INVALID_VALUE_ID); - assert(output_id < num_values); - struct xnn_value* output_value = values + output_id; - - memcpy(&output_value->shape.dim[0], &values[input_id].shape.dim[0], num_batch_dims); - if (opdata->operator_objects[0]->flags & XNN_FLAG_KEEP_DIMS) { - output_value->shape.num_dims = num_input_dims; - output_value->shape.dim[num_input_dims - 1] = channel_dim; - switch (opdata->type) { - case xnn_node_type_global_sum_pooling_1d: - output_value->shape.dim[num_batch_dims] = 1; - break; - case xnn_node_type_global_sum_pooling_2d: - output_value->shape.dim[num_batch_dims] = 1; - output_value->shape.dim[num_batch_dims + 1] = 1; - break; - default: - XNN_UNREACHABLE; - } - } else { - output_value->shape.dim[num_batch_dims] = channel_dim; - output_value->shape.num_dims = num_batch_dims + 1; - } - const size_t new_size = xnn_tensor_get_size(output_value); - if (new_size > output_value->size || opdata->workspace_size > old_workspace_size) { - output_value->size = new_size; - return xnn_status_reallocation_required; - } - return xnn_status_success; -} - -static enum xnn_status setup_global_sum_pooling_operator( - const struct xnn_operator_data* opdata, - const struct xnn_value* values, - size_t num_values, - pthreadpool_t threadpool) -{ - const uint32_t input_id = opdata->inputs[0]; - assert(input_id != XNN_INVALID_VALUE_ID); - assert(input_id < num_values); - - const uint32_t output_id = opdata->outputs[0]; - assert(output_id != XNN_INVALID_VALUE_ID); - assert(output_id < num_values); - - const struct xnn_value* input_value = values + input_id; - const void* input_data = input_value->data; - assert(input_data != NULL); - - const struct xnn_value* output_value = values + output_id; - void* output_data = output_value->data; - assert(output_data != NULL); - - switch (opdata->operator_objects[0]->type) { - case xnn_operator_type_global_sum_pooling_nwc_f32: - return xnn_setup_global_sum_pooling_nwc_f32( - opdata->operator_objects[0], - opdata->workspace, - input_data, - output_data); - break; - case xnn_operator_type_global_sum_pooling_nwc_f16: - return xnn_setup_global_sum_pooling_nwc_f16( - opdata->operator_objects[0], - opdata->workspace, - input_data, - output_data); - break; - default: - XNN_UNREACHABLE; - } -} - -static enum xnn_status define_global_sum_pooling_nd( - xnn_subgraph_t subgraph, - enum xnn_node_type node_type, - float output_min, - float output_max, - uint32_t input_id, - uint32_t output_id, - uint32_t flags) -{ - enum xnn_status status; - if ((status = xnn_subgraph_check_xnnpack_initialized(node_type)) != xnn_status_success) { - return status; - } - - status = xnn_subgraph_check_output_min_max(node_type, output_min, output_max); - if (status != xnn_status_success) { - return status; - } - - status = xnn_subgraph_check_input_node_id(node_type, input_id, subgraph->num_values); - if (status != xnn_status_success) { - return status; - } - - const struct xnn_value* input_value = &subgraph->values[input_id]; - status = xnn_subgraph_check_input_type_dense(node_type, input_id, input_value); - if (status != xnn_status_success) { - return status; - } - - switch (input_value->datatype) { - case xnn_datatype_fp16: - break; - case xnn_datatype_fp32: - break; - default: - xnn_log_error( - "failed to define %s operator with input ID #%" PRIu32 ": unsupported Value datatype %s (%d)", - xnn_node_type_to_string(node_type), input_id, - xnn_datatype_to_string(input_value->datatype), input_value->datatype); - return xnn_status_invalid_parameter; - } - - status = xnn_subgraph_check_output_node_id(node_type, output_id, subgraph->num_values); - if (status != xnn_status_success) { - return status; - } - - const struct xnn_value* output_value = &subgraph->values[output_id]; - status = xnn_subgraph_check_output_type_dense(node_type, output_id, output_value); - if (status != xnn_status_success) { - return status; - } - - enum xnn_compute_type compute_type = xnn_compute_type_invalid; - switch (output_value->datatype) { - case xnn_datatype_fp16: - compute_type = xnn_compute_type_fp16; - break; - case xnn_datatype_fp32: - compute_type = xnn_compute_type_fp32; - break; - default: - xnn_log_error( - "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value datatype %s (%d)", - xnn_node_type_to_string(node_type), output_id, - xnn_datatype_to_string(output_value->datatype), output_value->datatype); - return xnn_status_invalid_parameter; - } - - status = xnn_subgraph_check_datatype_matches( - node_type, input_id, input_value, output_id, output_value); - if (status != xnn_status_success) { - return status; - } - - struct xnn_node* node = xnn_subgraph_new_node(subgraph); - if (node == NULL) { - return xnn_status_out_of_memory; - } - - node->type = node_type; - node->compute_type = compute_type; - node->activation.output_min = output_min; - node->activation.output_max = output_max; - node->num_inputs = 1; - node->inputs[0] = input_id; - node->num_outputs = 1; - node->outputs[0] = output_id; - node->flags = flags; - - node->create = create_global_sum_pooling_operator; - node->reshape = reshape_global_sum_pooling_operator; - node->setup = setup_global_sum_pooling_operator; - - return xnn_status_success; -} - -enum xnn_status xnn_define_global_sum_pooling_1d( - xnn_subgraph_t subgraph, - float output_min, - float output_max, - uint32_t input_id, - uint32_t output_id, - uint32_t flags) -{ - return define_global_sum_pooling_nd( - subgraph, xnn_node_type_global_sum_pooling_1d, output_min, output_max, input_id, output_id, flags); -} - -enum xnn_status xnn_define_global_sum_pooling_2d( - xnn_subgraph_t subgraph, - float output_min, - float output_max, - uint32_t input_id, - uint32_t output_id, - uint32_t flags) -{ - return define_global_sum_pooling_nd( - subgraph, xnn_node_type_global_sum_pooling_2d, output_min, output_max, input_id, output_id, flags); -} diff --git a/src/xnnpack/config.h b/src/xnnpack/config.h index abd90052087..d23151572a8 100644 --- a/src/xnnpack/config.h +++ b/src/xnnpack/config.h @@ -134,9 +134,6 @@ XNN_INTERNAL const struct xnn_gavgpool_config* xnn_init_f32_gavgpool_config(); XNN_INTERNAL const struct xnn_gavgpool_config* xnn_init_qs8_gavgpool_config(); XNN_INTERNAL const struct xnn_gavgpool_config* xnn_init_qu8_gavgpool_config(); -XNN_INTERNAL const struct xnn_gavgpool_cw_config* xnn_init_f16_gavgpool_cw_config(); -XNN_INTERNAL const struct xnn_gavgpool_cw_config* xnn_init_f32_gavgpool_cw_config(); - #define XNN_MAX_F16_DWCONV_UKERNELS 4 #define XNN_MAX_F32_DWCONV_UKERNELS 4 #define XNN_MAX_QC8_DWCONV_UKERNELS 3 diff --git a/src/xnnpack/operator-type-defs.h b/src/xnnpack/operator-type-defs.h index d170d25e5ea..91e35150ba5 100644 --- a/src/xnnpack/operator-type-defs.h +++ b/src/xnnpack/operator-type-defs.h @@ -96,14 +96,6 @@ XNN_ENUM_ITEM(xnn_operator_type_fully_connected_nc_qs8, "Fully Connected (NC, QS XNN_ENUM_ITEM(xnn_operator_type_fully_connected_nc_qs8_qc8w, "Fully Connected (NC, QS8, QC8W)") XNN_ENUM_ITEM(xnn_operator_type_fully_connected_nc_qu8, "Fully Connected (NC, QU8)") XNN_ENUM_ITEM(xnn_operator_type_gelu_nc_f32, "GELU (NC, F32)") -XNN_ENUM_ITEM(xnn_operator_type_global_average_pooling_ncw_f16, "Global Average Pooling (NCW, F16)") -XNN_ENUM_ITEM(xnn_operator_type_global_average_pooling_ncw_f32, "Global Average Pooling (NCW, F32)") -XNN_ENUM_ITEM(xnn_operator_type_global_average_pooling_nwc_f16, "Global Average Pooling (NWC, F16)") -XNN_ENUM_ITEM(xnn_operator_type_global_average_pooling_nwc_f32, "Global Average Pooling (NWC, F32)") -XNN_ENUM_ITEM(xnn_operator_type_global_average_pooling_nwc_qs8, "Global Average Pooling (NWC, QS8)") -XNN_ENUM_ITEM(xnn_operator_type_global_average_pooling_nwc_qu8, "Global Average Pooling (NWC, QU8)") -XNN_ENUM_ITEM(xnn_operator_type_global_sum_pooling_nwc_f16, "Global Sum Pooling (NWC, F16)") -XNN_ENUM_ITEM(xnn_operator_type_global_sum_pooling_nwc_f32, "Global Sum Pooling (NWC, F32)") XNN_ENUM_ITEM(xnn_operator_type_hardswish_nc_f16, "HardSwish (NC, F16)") XNN_ENUM_ITEM(xnn_operator_type_hardswish_nc_f32, "HardSwish (NC, F32)") XNN_ENUM_ITEM(xnn_operator_type_leaky_relu_nc_f16, "Leaky ReLU (NC, F16)") diff --git a/test/BUILD.bazel b/test/BUILD.bazel index 4060b1197e3..8ffa3659ce6 100644 --- a/test/BUILD.bazel +++ b/test/BUILD.bazel @@ -503,15 +503,6 @@ xnnpack_unit_test( deps = MICROKERNEL_TEST_DEPS, ) -xnnpack_unit_test( - name = "f16_gavgpool_cw_test", - srcs = [ - "f16-gavgpool-cw.cc", - "gavgpool-cw-microkernel-tester.h", - ], - deps = MICROKERNEL_TEST_DEPS, -) - xnnpack_unit_test( name = "f16_gavgpool_minmax_test", srcs = [ @@ -684,15 +675,6 @@ xnnpack_unit_test( deps = MICROKERNEL_TEST_DEPS, ) -xnnpack_unit_test( - name = "f32_gavgpool_cw_test", - srcs = [ - "f32-gavgpool-cw.cc", - "gavgpool-cw-microkernel-tester.h", - ], - deps = MICROKERNEL_TEST_DEPS, -) - xnnpack_unit_test( name = "f32_gemm_test", srcs = [ @@ -1505,35 +1487,6 @@ xnnpack_unit_test( ], ) -xnnpack_unit_test( - name = "global_average_pooling_nwc_test", - timeout = "moderate", - srcs = [ - "global-average-pooling-nwc.cc", - "global-average-pooling-operator-tester.h", - ], - deps = OPERATOR_TEST_DEPS, -) - -xnnpack_unit_test( - name = "global_average_pooling_ncw_test", - srcs = [ - "global-average-pooling-ncw.cc", - "global-average-pooling-operator-tester.h", - ], - deps = OPERATOR_TEST_DEPS, -) - -xnnpack_unit_test( - name = "global_sum_pooling_nwc_test", - srcs = [ - "global-sum-pooling-nwc.cc", - "global-sum-pooling-operator-tester.h", - ], - shard_count = 10, - deps = OPERATOR_TEST_DEPS, -) - xnnpack_unit_test( name = "max_pooling_nhwc_test", timeout = "moderate", diff --git a/test/f16-gavgpool-cw.cc b/test/f16-gavgpool-cw.cc deleted file mode 100644 index 2715556b0e4..00000000000 --- a/test/f16-gavgpool-cw.cc +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2023 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. -// -// Auto-generated file. Do not edit! -// Specification: test/f16-gavgpool-cw.yaml -// Generator: tools/generate-gavgpool-cw-test.py - - -#include -#include "xnnpack/common.h" -#include "xnnpack/gavgpool.h" -#include "xnnpack/isa-checks.h" -#include "xnnpack/microparams-init.h" -#include "gavgpool-cw-microkernel-tester.h" - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_GAVGPOOL_CW__NEONFP16ARITH_U8, elements_eq_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - GAvgPoolCWMicrokernelTester() - .elements(8) - .channels(1) - .Test(xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8, xnn_init_f16_gavgpool_scalar_params); - } - - TEST(F16_GAVGPOOL_CW__NEONFP16ARITH_U8, elements_gt_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 9; elements < 16; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8, xnn_init_f16_gavgpool_scalar_params); - } - } - - TEST(F16_GAVGPOOL_CW__NEONFP16ARITH_U8, elements_lt_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 8; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8, xnn_init_f16_gavgpool_scalar_params); - } - } - - TEST(F16_GAVGPOOL_CW__NEONFP16ARITH_U8, elements_div_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 16; elements < 40; elements += 8) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8, xnn_init_f16_gavgpool_scalar_params); - } - } - - TEST(F16_GAVGPOOL_CW__NEONFP16ARITH_U8, channels_gt_1) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 2; channels < 4; channels++) { - GAvgPoolCWMicrokernelTester() - .elements(8) - .channels(channels) - .Test(xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8, xnn_init_f16_gavgpool_scalar_params); - } - } - - TEST(F16_GAVGPOOL_CW__NEONFP16ARITH_U8, qmin) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 16; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmin(128) - .Test(xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8, xnn_init_f16_gavgpool_scalar_params); - } - } - - TEST(F16_GAVGPOOL_CW__NEONFP16ARITH_U8, qmax) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 16; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmax(128) - .Test(xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8, xnn_init_f16_gavgpool_scalar_params); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) diff --git a/test/f16-gavgpool-cw.yaml b/test/f16-gavgpool-cw.yaml deleted file mode 100644 index b550df2a7c4..00000000000 --- a/test/f16-gavgpool-cw.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright 2023 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# ARM NEON+FP16ARITH -- name: xnn_f16_gavgpool_cw_ukernel__neonfp16arith_u8 - init: xnn_init_f16_gavgpool_scalar_params diff --git a/test/f32-gavgpool-cw.cc b/test/f32-gavgpool-cw.cc deleted file mode 100644 index 5363fc6d7b4..00000000000 --- a/test/f32-gavgpool-cw.cc +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright 2023 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. -// -// Auto-generated file. Do not edit! -// Specification: test/f32-gavgpool-cw.yaml -// Generator: tools/generate-gavgpool-cw-test.py - - -#include -#include "xnnpack/common.h" -#include "xnnpack/gavgpool.h" -#include "xnnpack/isa-checks.h" -#include "xnnpack/microparams-init.h" -#include "gavgpool-cw-microkernel-tester.h" - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_GAVGPOOL_CW__NEON_U4, elements_eq_4) { - TEST_REQUIRES_ARM_NEON; - GAvgPoolCWMicrokernelTester() - .elements(4) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__neon_u4, xnn_init_f32_gavgpool_neon_params); - } - - TEST(F32_GAVGPOOL_CW__NEON_U4, elements_gt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 5; elements < 8; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__neon_u4, xnn_init_f32_gavgpool_neon_params); - } - } - - TEST(F32_GAVGPOOL_CW__NEON_U4, elements_lt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 4; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__neon_u4, xnn_init_f32_gavgpool_neon_params); - } - } - - TEST(F32_GAVGPOOL_CW__NEON_U4, elements_div_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 8; elements < 20; elements += 4) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__neon_u4, xnn_init_f32_gavgpool_neon_params); - } - } - - TEST(F32_GAVGPOOL_CW__NEON_U4, channels_gt_1) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 2; channels < 4; channels++) { - GAvgPoolCWMicrokernelTester() - .elements(4) - .channels(channels) - .Test(xnn_f32_gavgpool_cw_ukernel__neon_u4, xnn_init_f32_gavgpool_neon_params); - } - } - - TEST(F32_GAVGPOOL_CW__NEON_U4, qmin) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 8; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmin(128) - .Test(xnn_f32_gavgpool_cw_ukernel__neon_u4, xnn_init_f32_gavgpool_neon_params); - } - } - - TEST(F32_GAVGPOOL_CW__NEON_U4, qmax) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 8; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmax(128) - .Test(xnn_f32_gavgpool_cw_ukernel__neon_u4, xnn_init_f32_gavgpool_neon_params); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_GAVGPOOL_CW__SSE_U4, elements_eq_4) { - TEST_REQUIRES_X86_SSE; - GAvgPoolCWMicrokernelTester() - .elements(4) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__sse_u4, xnn_init_f32_gavgpool_sse_params); - } - - TEST(F32_GAVGPOOL_CW__SSE_U4, elements_gt_4) { - TEST_REQUIRES_X86_SSE; - for (size_t elements = 5; elements < 8; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__sse_u4, xnn_init_f32_gavgpool_sse_params); - } - } - - TEST(F32_GAVGPOOL_CW__SSE_U4, elements_lt_4) { - TEST_REQUIRES_X86_SSE; - for (size_t elements = 1; elements < 4; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__sse_u4, xnn_init_f32_gavgpool_sse_params); - } - } - - TEST(F32_GAVGPOOL_CW__SSE_U4, elements_div_4) { - TEST_REQUIRES_X86_SSE; - for (size_t elements = 8; elements < 20; elements += 4) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__sse_u4, xnn_init_f32_gavgpool_sse_params); - } - } - - TEST(F32_GAVGPOOL_CW__SSE_U4, channels_gt_1) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 2; channels < 4; channels++) { - GAvgPoolCWMicrokernelTester() - .elements(4) - .channels(channels) - .Test(xnn_f32_gavgpool_cw_ukernel__sse_u4, xnn_init_f32_gavgpool_sse_params); - } - } - - TEST(F32_GAVGPOOL_CW__SSE_U4, qmin) { - TEST_REQUIRES_X86_SSE; - for (size_t elements = 1; elements < 8; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmin(128) - .Test(xnn_f32_gavgpool_cw_ukernel__sse_u4, xnn_init_f32_gavgpool_sse_params); - } - } - - TEST(F32_GAVGPOOL_CW__SSE_U4, qmax) { - TEST_REQUIRES_X86_SSE; - for (size_t elements = 1; elements < 8; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmax(128) - .Test(xnn_f32_gavgpool_cw_ukernel__sse_u4, xnn_init_f32_gavgpool_sse_params); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_GAVGPOOL_CW__WASMSIMD_ARM_U4, elements_eq_4) { - GAvgPoolCWMicrokernelTester() - .elements(4) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4, xnn_init_f32_gavgpool_scalar_params); - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_ARM_U4, elements_gt_4) { - for (size_t elements = 5; elements < 8; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_ARM_U4, elements_lt_4) { - for (size_t elements = 1; elements < 4; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_ARM_U4, elements_div_4) { - for (size_t elements = 8; elements < 20; elements += 4) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_ARM_U4, channels_gt_1) { - for (size_t channels = 2; channels < 4; channels++) { - GAvgPoolCWMicrokernelTester() - .elements(4) - .channels(channels) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_ARM_U4, qmin) { - for (size_t elements = 1; elements < 8; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmin(128) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_ARM_U4, qmax) { - for (size_t elements = 1; elements < 8; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmax(128) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4, xnn_init_f32_gavgpool_scalar_params); - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_GAVGPOOL_CW__WASMSIMD_X86_U4, elements_eq_4) { - GAvgPoolCWMicrokernelTester() - .elements(4) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4, xnn_init_f32_gavgpool_scalar_params); - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_X86_U4, elements_gt_4) { - for (size_t elements = 5; elements < 8; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_X86_U4, elements_lt_4) { - for (size_t elements = 1; elements < 4; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_X86_U4, elements_div_4) { - for (size_t elements = 8; elements < 20; elements += 4) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_X86_U4, channels_gt_1) { - for (size_t channels = 2; channels < 4; channels++) { - GAvgPoolCWMicrokernelTester() - .elements(4) - .channels(channels) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_X86_U4, qmin) { - for (size_t elements = 1; elements < 8; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmin(128) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4, xnn_init_f32_gavgpool_scalar_params); - } - } - - TEST(F32_GAVGPOOL_CW__WASMSIMD_X86_U4, qmax) { - for (size_t elements = 1; elements < 8; elements += 3) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmax(128) - .Test(xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4, xnn_init_f32_gavgpool_scalar_params); - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -TEST(F32_GAVGPOOL_CW__SCALAR_U1, elements_eq_1) { - GAvgPoolCWMicrokernelTester() - .elements(1) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__scalar_u1, xnn_init_f32_gavgpool_scalar_params); -} - -TEST(F32_GAVGPOOL_CW__SCALAR_U1, elements_gt_1) { - for (size_t elements = 2; elements < 2; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__scalar_u1, xnn_init_f32_gavgpool_scalar_params); - } -} - -TEST(F32_GAVGPOOL_CW__SCALAR_U1, elements_div_1) { - for (size_t elements = 2; elements < 5; elements += 1) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(1) - .Test(xnn_f32_gavgpool_cw_ukernel__scalar_u1, xnn_init_f32_gavgpool_scalar_params); - } -} - -TEST(F32_GAVGPOOL_CW__SCALAR_U1, channels_gt_1) { - for (size_t channels = 2; channels < 4; channels++) { - GAvgPoolCWMicrokernelTester() - .elements(1) - .channels(channels) - .Test(xnn_f32_gavgpool_cw_ukernel__scalar_u1, xnn_init_f32_gavgpool_scalar_params); - } -} - -TEST(F32_GAVGPOOL_CW__SCALAR_U1, qmin) { - for (size_t elements = 1; elements < 2; elements += 1) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmin(128) - .Test(xnn_f32_gavgpool_cw_ukernel__scalar_u1, xnn_init_f32_gavgpool_scalar_params); - } -} - -TEST(F32_GAVGPOOL_CW__SCALAR_U1, qmax) { - for (size_t elements = 1; elements < 2; elements += 1) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(4) - .qmax(128) - .Test(xnn_f32_gavgpool_cw_ukernel__scalar_u1, xnn_init_f32_gavgpool_scalar_params); - } -} \ No newline at end of file diff --git a/test/f32-gavgpool-cw.yaml b/test/f32-gavgpool-cw.yaml deleted file mode 100644 index 4f1e7814ae4..00000000000 --- a/test/f32-gavgpool-cw.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2023 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# ARM NEON -- name: xnn_f32_gavgpool_cw_ukernel__neon_u4 - init: xnn_init_f32_gavgpool_neon_params - -# SSE -- name: xnn_f32_gavgpool_cw_ukernel__sse_u4 - init: xnn_init_f32_gavgpool_sse_params - -# Wasm SIMD -- name: xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_u4 - init: xnn_init_f32_gavgpool_scalar_params -- name: xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_u4 - init: xnn_init_f32_gavgpool_scalar_params - -# Scalar -- name: xnn_f32_gavgpool_cw_ukernel__scalar_u1 - init: xnn_init_f32_gavgpool_scalar_params diff --git a/test/gavgpool-cw-microkernel-tester.h b/test/gavgpool-cw-microkernel-tester.h deleted file mode 100644 index d9450a0f671..00000000000 --- a/test/gavgpool-cw-microkernel-tester.h +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "xnnpack.h" -#include "xnnpack/fp16.h" -#include "xnnpack/math.h" -#include "xnnpack/microfnptr.h" -#include "xnnpack/microparams.h" -#include "replicable_random_device.h" - -class GAvgPoolCWMicrokernelTester { - public: - GAvgPoolCWMicrokernelTester& elements(size_t elements) { - assert(elements != 0); - this->elements_ = elements; - return *this; - } - - size_t elements() const { - return this->elements_; - } - - GAvgPoolCWMicrokernelTester& channels(size_t channels) { - assert(channels != 0); - this->channels_ = channels; - return *this; - } - - size_t channels() const { - return this->channels_; - } - - GAvgPoolCWMicrokernelTester& qmin(uint8_t qmin) { - this->qmin_ = qmin; - return *this; - } - - uint8_t qmin() const { - return this->qmin_; - } - - GAvgPoolCWMicrokernelTester& qmax(uint8_t qmax) { - this->qmax_ = qmax; - return *this; - } - - uint8_t qmax() const { - return this->qmax_; - } - - GAvgPoolCWMicrokernelTester& iterations(size_t iterations) { - this->iterations_ = iterations; - return *this; - } - - size_t iterations() const { - return this->iterations_; - } - - - void Test(xnn_f32_gavgpool_cw_ukernel_fn gavgpool, xnn_init_f32_gavgpool_params_fn init_params) const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_real_distribution f32dist; - - std::vector x(elements() * channels() + XNN_EXTRA_BYTES / sizeof(float)); - std::vector y(channels()); - std::vector y_ref(channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(x.begin(), x.end(), [&]() { return f32dist(rng); }); - std::fill(y.begin(), y.end(), std::nanf("")); - - // Compute reference results, without clamping. - for (size_t i = 0; i < channels(); i++) { - float acc = 0.0f; - for (size_t j = 0; j < elements(); j++) { - acc += x[i * elements() + j]; - } - y_ref[i] = acc / float(elements()); - } - - // Compute clamping parameters. - const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); - const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); - const float accumulated_range = accumulated_max - accumulated_min; - const float y_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; - const float y_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; - - // Prepare parameters. - union xnn_f32_gavgpool_params params; - init_params(¶ms, 1.0f / float(elements()), y_min, y_max, elements()); - - // Clamp reference results. - for (float& y_value : y_ref) { - y_value = std::max(std::min(y_value, y_max), y_min); - } - - // Call optimized micro-kernel. - gavgpool(elements() * sizeof(float), channels(), x.data(), y.data(), ¶ms); - - // Verify results. - for (size_t i = 0; i < channels(); i++) { - EXPECT_LE(y[i], y_max) - << "at position " << i << ", elements = " << elements() << ", channels = " << channels(); - EXPECT_GE(y[i], y_min) - << "at position " << i << ", elements = " << elements() << ", channels = " << channels(); - EXPECT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) - << "at position " << i << ", elements = " << elements() << ", channels = " << channels(); - } - } - } - -void Test(xnn_f16_gavgpool_cw_ukernel_fn gavgpool, xnn_init_f16_gavgpool_neon_params_fn init_params) const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_real_distribution f32dist(0.1f, 10.0f); - - std::vector x(elements() * channels() + - XNN_EXTRA_BYTES / sizeof(xnn_float16)); - std::vector y(channels()); - std::vector y_ref(channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(x.begin(), x.end(), - [&]() { return f32dist(rng); }); - std::fill(y.begin(), y.end(), std::nanf("")); - - // Compute reference results, without clamping. - for (size_t i = 0; i < channels(); i++) { - float acc = 0.0f; - for (size_t j = 0; j < elements(); j++) { - acc += x[i * elements() + j]; - } - y_ref[i] = acc / float(elements()); - } - - // Compute clamping parameters. - const float accumulated_min = - *std::min_element(y_ref.cbegin(), y_ref.cend()); - const float accumulated_max = - *std::max_element(y_ref.cbegin(), y_ref.cend()); - const float accumulated_range = accumulated_max - accumulated_min; - const float y_min = xnn_float16( - accumulated_min + accumulated_range / 255.0f * float(qmin())); - const float y_max = xnn_float16( - accumulated_max - accumulated_range / 255.0f * float(255 - qmax())); - - // Prepare parameters. - union xnn_f16_gavgpool_params params; - init_params(¶ms, fp16_ieee_from_fp32_value(1.0f / float(elements())), - fp16_ieee_from_fp32_value(y_min), - fp16_ieee_from_fp32_value(y_max), elements()); - - // Clamp reference results. - for (float& y_value : y_ref) { - y_value = std::max(std::min(y_value, y_max), y_min); - } - - // Call optimized micro-kernel. - gavgpool(elements() * sizeof(xnn_float16), channels(), x.data(), y.data(), - ¶ms); - - // Verify results. - for (size_t i = 0; i < channels(); i++) { - EXPECT_LE(y[i], y_max) - << "at position " << i << ", elements = " << elements() - << ", channels = " << channels(); - EXPECT_GE(y[i], y_min) - << "at position " << i << ", elements = " << elements() - << ", channels = " << channels(); - EXPECT_NEAR(y[i], y_ref[i], - 1.0e-2f * std::abs(y_ref[i])) - << "at position " << i << ", elements = " << elements() - << ", channels = " << channels(); - } - } -} - - private: - size_t elements_{1}; - size_t channels_{1}; - uint8_t qmin_{0}; - uint8_t qmax_{255}; - size_t iterations_{15}; -}; diff --git a/test/global-average-pooling-1d.cc b/test/global-average-pooling-1d.cc index ec5ab308cb7..0281d75a68b 100644 --- a/test/global-average-pooling-1d.cc +++ b/test/global-average-pooling-1d.cc @@ -4,8 +4,6 @@ // LICENSE file in the root directory of this source tree. #include -#include -#include #include #include #include @@ -17,25 +15,24 @@ #include #include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" #include "xnnpack/math.h" #include "xnnpack/node-type.h" -#include "xnnpack/operator.h" -#include "xnnpack/requantization.h" #include "xnnpack/subgraph.h" #include "replicable_random_device.h" -template class GlobalAveragePooling1DTest : public ::testing::Test { +template +class GlobalAveragePooling1DTest : public ::testing::Test { protected: GlobalAveragePooling1DTest() { shape_dist = std::uniform_int_distribution(2, XNN_MAX_TENSOR_DIMS); dim_dist = std::uniform_int_distribution(1, 9); f32dist = std::uniform_real_distribution(); - i8dist = - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()); - u8dist = - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()); + i8dist = std::uniform_int_distribution( + std::numeric_limits::min(), + std::numeric_limits::max()); + u8dist = std::uniform_int_distribution( + std::numeric_limits::min(), + std::numeric_limits::max()); scale_dist = std::uniform_real_distribution(0.1f, 5.0f); input_dims = RandomShape(); @@ -48,10 +45,6 @@ template class GlobalAveragePooling1DTest : public ::testing::Test } input_width = input_dims[input_dims.size() - 2]; channels = input_dims[input_dims.size() - 1]; - - input = std::vector(XNN_EXTRA_BYTES / sizeof(T) + NumElements(input_dims)); - operator_output = std::vector(NumElements(output_dims)); - subgraph_output = std::vector(operator_output.size()); } std::vector RandomShape() @@ -64,7 +57,8 @@ template class GlobalAveragePooling1DTest : public ::testing::Test size_t NumElements(std::vector& dims) { - return std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies()); + return std::accumulate( + dims.begin(), dims.end(), size_t(1), std::multiplies()); } xnnpack::ReplicableRandomDevice rng; @@ -83,10 +77,6 @@ template class GlobalAveragePooling1DTest : public ::testing::Test std::vector input_dims; std::vector output_dims; - - std::vector input; - std::vector operator_output; - std::vector subgraph_output; }; using GlobalAveragePooling1DTestQS8 = GlobalAveragePooling1DTest; @@ -122,14 +112,14 @@ TEST_F(GlobalAveragePooling1DTestQS8, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_average_pooling_1d); + ASSERT_EQ(node->type, xnn_node_type_static_mean); ASSERT_EQ(node->compute_type, xnn_compute_type_qs8); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 1); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } @@ -161,14 +151,14 @@ TEST_F(GlobalAveragePooling1DTestQU8, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_average_pooling_1d); + ASSERT_EQ(node->type, xnn_node_type_static_mean); ASSERT_EQ(node->compute_type, xnn_compute_type_qu8); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 1); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } @@ -200,14 +190,14 @@ TEST_F(GlobalAveragePooling1DTestF16, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_average_pooling_1d); + ASSERT_EQ(node->type, xnn_node_type_static_mean); ASSERT_EQ(node->compute_type, xnn_compute_type_fp16); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 1); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } @@ -239,440 +229,13 @@ TEST_F(GlobalAveragePooling1DTestF32, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_average_pooling_1d); + ASSERT_EQ(node->type, xnn_node_type_static_mean); ASSERT_EQ(node->compute_type, xnn_compute_type_fp32); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 1); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } - -TEST_F(GlobalAveragePooling1DTestQS8, matches_operator_api) -{ - const int32_t input_zero_point = i8dist(rng); - const int32_t output_zero_point = i8dist(rng); - const float input_scale = scale_dist(rng); - const float output_scale = scale_dist(rng); - const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point); - const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - const xnn_status status = xnn_create_global_average_pooling_nwc_qs8( - input_zero_point, input_scale, output_zero_point, output_scale, quantized_output_min, - quantized_output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_average_pooling_nwc_qs8( - op, batch_size, input_width, channels, - /*input_stride=*/channels, - /*output_stride=*/channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_average_pooling_nwc_qs8(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, - xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, - xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_1d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalAveragePooling1DTestQU8, matches_operator_api) -{ - const int32_t input_zero_point = u8dist(rng); - const int32_t output_zero_point = u8dist(rng); - const float input_scale = scale_dist(rng); - const float output_scale = scale_dist(rng); - const uint8_t quantized_output_min = xnn_qu8_quantize(output_min, output_scale, output_zero_point); - const uint8_t quantized_output_max = xnn_qu8_quantize(output_max, output_scale, output_zero_point); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - const xnn_status status = xnn_create_global_average_pooling_nwc_qu8( - input_zero_point, input_scale, output_zero_point, output_scale, quantized_output_min, - quantized_output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_average_pooling_nwc_qu8( - op, batch_size, input_width, channels, - /*input_stride=*/channels, - /*output_stride=*/channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_average_pooling_nwc_qu8(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, - xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, - xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_1d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalAveragePooling1DTestF16, matches_operator_api) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Call operator API. - const xnn_status status = xnn_create_global_average_pooling_nwc_f16( - output_min, output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_average_pooling_nwc_f16( - op, batch_size, input_width, channels, - /*input_stride=*/channels, - /*output_stride=*/channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_average_pooling_nwc_f16(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_1d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalAveragePooling1DTestF32, matches_operator_api) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Call operator API. - const xnn_status status = xnn_create_global_average_pooling_nwc_f32( - output_min, output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_average_pooling_nwc_f32( - op, batch_size, input_width, channels, - /*input_stride=*/channels, - /*output_stride=*/channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_average_pooling_nwc_f32(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_1d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalAveragePooling1DTestF32, reshape_output_no_keep_dims) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_1d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - const size_t num_input_dims = input_dims.size(); - const size_t num_batch_dims = num_input_dims - 2; - const struct xnn_node* node = &subgraph->nodes[0]; - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 2], input_dims[num_input_dims - 1]); - - input_dims[0] += 2; - input_dims[num_input_dims - 1] += 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 2], input_dims[num_input_dims - 1]); - - input_dims[0] -= 2; - input_dims[num_input_dims - 1] -= 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 2], input_dims[num_input_dims - 1]); -} - -TEST_F(GlobalAveragePooling1DTestF32, reshape_output_keep_dims) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_1d(subgraph, output_min, output_max, input_id, output_id, XNN_FLAG_KEEP_DIMS)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - const size_t num_input_dims = input_dims.size(); - const size_t num_batch_dims = num_input_dims - 2; - const struct xnn_node* node = &subgraph->nodes[0]; - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); - input_dims[0] += 2; - input_dims[num_input_dims - 1] += 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); - input_dims[0] -= 2; - input_dims[num_input_dims - 1] -= 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); - - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); -} diff --git a/test/global-average-pooling-2d.cc b/test/global-average-pooling-2d.cc index c489b6f9a4f..0fb9765d61f 100644 --- a/test/global-average-pooling-2d.cc +++ b/test/global-average-pooling-2d.cc @@ -4,8 +4,6 @@ // LICENSE file in the root directory of this source tree. #include -#include -#include #include #include #include @@ -17,39 +15,36 @@ #include #include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" #include "xnnpack/math.h" #include "xnnpack/node-type.h" -#include "xnnpack/operator.h" -#include "xnnpack/requantization.h" #include "xnnpack/subgraph.h" #include "replicable_random_device.h" -template class GlobalAveragePooling2DTest : public ::testing::Test { +template +class GlobalAveragePooling2DTest : public ::testing::Test { protected: GlobalAveragePooling2DTest() { shape_dist = std::uniform_int_distribution(3, XNN_MAX_TENSOR_DIMS); dim_dist = std::uniform_int_distribution(1, 9); f32dist = std::uniform_real_distribution(); - i8dist = - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()); - u8dist = - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()); + i8dist = std::uniform_int_distribution( + std::numeric_limits::min(), + std::numeric_limits::max()); + u8dist = std::uniform_int_distribution( + std::numeric_limits::min(), + std::numeric_limits::max()); scale_dist = std::uniform_real_distribution(0.1f, 5.0f); input_dims = RandomShape(); output_dims = input_dims; output_dims[output_dims.size() - 3] = 1; output_dims[output_dims.size() - 2] = 1; - input = std::vector(XNN_EXTRA_BYTES / sizeof(T) + NumElements(input_dims)); - operator_output = std::vector(NumElements(output_dims)); - subgraph_output = std::vector(operator_output.size()); batch_size = 1; for (size_t i = 0; i < input_dims.size() - 3; i++) { batch_size *= input_dims[i]; } - input_width = input_dims[input_dims.size() - 3] * input_dims[input_dims.size() - 2]; + input_width = + input_dims[input_dims.size() - 3] * input_dims[input_dims.size() - 2]; channels = input_dims[input_dims.size() - 1]; } @@ -62,7 +57,8 @@ template class GlobalAveragePooling2DTest : public ::testing::Test size_t NumElements(std::vector& dims) { - return std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies()); + return std::accumulate( + dims.begin(), dims.end(), size_t(1), std::multiplies()); } xnnpack::ReplicableRandomDevice rng; @@ -81,10 +77,6 @@ template class GlobalAveragePooling2DTest : public ::testing::Test std::vector input_dims; std::vector output_dims; - - std::vector input; - std::vector operator_output; - std::vector subgraph_output; }; using GlobalAveragePooling2DTestQS8 = GlobalAveragePooling2DTest; @@ -120,14 +112,15 @@ TEST_F(GlobalAveragePooling2DTestQS8, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_average_pooling_2d); + ASSERT_EQ(node->type, xnn_node_type_static_mean); ASSERT_EQ(node->compute_type, xnn_compute_type_qs8); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 2); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 3); + ASSERT_EQ(node->params.reduce.reduction_axes[1], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } @@ -159,14 +152,15 @@ TEST_F(GlobalAveragePooling2DTestQU8, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_average_pooling_2d); + ASSERT_EQ(node->type, xnn_node_type_static_mean); ASSERT_EQ(node->compute_type, xnn_compute_type_qu8); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 2); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 3); + ASSERT_EQ(node->params.reduce.reduction_axes[1], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } @@ -198,14 +192,15 @@ TEST_F(GlobalAveragePooling2DTestF16, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_average_pooling_2d); + ASSERT_EQ(node->type, xnn_node_type_static_mean); ASSERT_EQ(node->compute_type, xnn_compute_type_fp16); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 2); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 3); + ASSERT_EQ(node->params.reduce.reduction_axes[1], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } @@ -237,440 +232,14 @@ TEST_F(GlobalAveragePooling2DTestF32, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_average_pooling_2d); + ASSERT_EQ(node->type, xnn_node_type_static_mean); ASSERT_EQ(node->compute_type, xnn_compute_type_fp32); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 2); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 3); + ASSERT_EQ(node->params.reduce.reduction_axes[1], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } - -TEST_F(GlobalAveragePooling2DTestQS8, matches_operator_api) -{ - const int32_t input_zero_point = i8dist(rng); - const int32_t output_zero_point = i8dist(rng); - const float input_scale = scale_dist(rng); - const float output_scale = scale_dist(rng); - const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point); - const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - const xnn_status status = xnn_create_global_average_pooling_nwc_qs8( - input_zero_point, input_scale, output_zero_point, output_scale, quantized_output_min, - quantized_output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_average_pooling_nwc_qs8( - op, batch_size, input_width, channels, - /*input_stride=*/channels, - /*output_stride=*/channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_average_pooling_nwc_qs8(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, - xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, - xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_2d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalAveragePooling2DTestQU8, matches_operator_api) -{ - const int32_t input_zero_point = u8dist(rng); - const int32_t output_zero_point = u8dist(rng); - const float input_scale = scale_dist(rng); - const float output_scale = scale_dist(rng); - const uint8_t quantized_output_min = xnn_qu8_quantize(output_min, output_scale, output_zero_point); - const uint8_t quantized_output_max = xnn_qu8_quantize(output_max, output_scale, output_zero_point); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - const xnn_status status = xnn_create_global_average_pooling_nwc_qu8( - input_zero_point, input_scale, output_zero_point, output_scale, quantized_output_min, - quantized_output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_average_pooling_nwc_qu8( - op, batch_size, input_width, channels, - /*input_stride=*/channels, - /*output_stride=*/channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_average_pooling_nwc_qu8(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, - xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, - xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_2d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalAveragePooling2DTestF16, matches_operator_api) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Call operator API. - const xnn_status status = xnn_create_global_average_pooling_nwc_f16( - output_min, output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_average_pooling_nwc_f16( - op, batch_size, input_width, channels, - /*input_stride=*/channels, - /*output_stride=*/channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_average_pooling_nwc_f16(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_2d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalAveragePooling2DTestF32, matches_operator_api) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Call operator API. - const xnn_status status = xnn_create_global_average_pooling_nwc_f32( - output_min, output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_average_pooling_nwc_f32( - op, batch_size, input_width, channels, - /*input_stride=*/channels, - /*output_stride=*/channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_average_pooling_nwc_f32(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_2d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalAveragePooling2DTestF32, reshape_output_no_keep_dims) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_2d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - const size_t num_input_dims = input_dims.size(); - const size_t num_batch_dims = num_input_dims - 3; - const struct xnn_node* node = &subgraph->nodes[0]; - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 3], input_dims[num_input_dims - 1]); - - input_dims[0] += 2; - input_dims[num_input_dims - 1] += 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 3], input_dims[num_input_dims - 1]); - - input_dims[0] -= 2; - input_dims[num_input_dims - 1] -= 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 3], input_dims[num_input_dims - 1]); -} - -TEST_F(GlobalAveragePooling2DTestF32, reshape_output_keep_dims) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_average_pooling_2d(subgraph, output_min, output_max, input_id, output_id, XNN_FLAG_KEEP_DIMS)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - const size_t num_input_dims = input_dims.size(); - const size_t num_batch_dims = num_input_dims - 3; - const struct xnn_node* node = &subgraph->nodes[0]; - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); - input_dims[0] += 2; - input_dims[num_input_dims - 1] += 3; - - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); - - input_dims[0] -= 2; - input_dims[num_input_dims - 1] -= 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); -} diff --git a/test/global-average-pooling-ncw.cc b/test/global-average-pooling-ncw.cc deleted file mode 100644 index c2cadb84019..00000000000 --- a/test/global-average-pooling-ncw.cc +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include - -#include -#include "xnnpack.h" -#include "global-average-pooling-operator-tester.h" - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F16, single_channel) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - GlobalAveragePoolingOperatorTester() - .width(27) - .channels(1) - .TestNCWxF16(); -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F16, varying_channels) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - for (size_t channels = 2; channels <= 16; channels += 3) { - GlobalAveragePoolingOperatorTester() - .width(27) - .channels(channels) - .TestNCWxF16(); - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F16, varying_width) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - for (size_t width = 25; width <= 31; width++) { - GlobalAveragePoolingOperatorTester() - .width(width) - .channels(19) - .TestNCWxF16(); - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F16, qmin) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - GlobalAveragePoolingOperatorTester() - .width(27) - .channels(19) - .qmin(128) - .TestNCWxF16(); -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F16, qmax) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - GlobalAveragePoolingOperatorTester() - .width(27) - .channels(19) - .qmax(128) - .TestNCWxF16(); -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F32, single_channel) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - GlobalAveragePoolingOperatorTester() - .width(27) - .channels(1) - .TestNCWxF32(); -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F32, varying_channels) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - for (size_t channels = 2; channels <= 16; channels += 3) { - GlobalAveragePoolingOperatorTester() - .width(27) - .channels(channels) - .TestNCWxF32(); - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F32, varying_width) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - for (size_t width = 25; width <= 31; width++) { - GlobalAveragePoolingOperatorTester() - .width(width) - .channels(19) - .TestNCWxF32(); - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F32, qmin) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - GlobalAveragePoolingOperatorTester() - .width(27) - .channels(19) - .qmin(128) - .TestNCWxF32(); -} - -TEST(GLOBAL_AVERAGE_POOLING_NCW_F32, qmax) { - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - GlobalAveragePoolingOperatorTester() - .width(27) - .channels(19) - .qmax(128) - .TestNCWxF32(); -} diff --git a/test/global-average-pooling-nwc.cc b/test/global-average-pooling-nwc.cc deleted file mode 100644 index 25fdf22d77a..00000000000 --- a/test/global-average-pooling-nwc.cc +++ /dev/null @@ -1,1261 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// All rights reserved. -// -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include - -#include -#include "xnnpack/config.h" -#include "global-average-pooling-operator-tester.h" - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_small_width_with_input_scale) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - for (float input_scale = 0.01f; input_scale < 100.0f; input_scale *= 3.14159265f) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_scale(input_scale) - .TestNWCxQU8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_small_width_with_input_zero_point) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - for (int32_t input_zero_point = 0; input_zero_point <= 255; input_zero_point += 51) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_zero_point(uint8_t(input_zero_point)) - .TestNWCxQU8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_small_width_with_output_scale) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - for (float output_scale = 0.01f; output_scale < 100.0f; output_scale *= 3.14159265f) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .output_scale(output_scale) - .TestNWCxQU8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_small_width_with_output_zero_point) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - for (int32_t output_zero_point = 0; output_zero_point <= 255; output_zero_point += 51) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .output_zero_point(uint8_t(output_zero_point)) - .TestNWCxQU8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_small_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_small_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_large_width_with_input_scale) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - for (float input_scale = 0.01f; input_scale < 100.0f; input_scale *= 3.14159265f) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_scale(input_scale) - .TestNWCxQU8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_large_width_with_input_zero_point) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - for (int32_t input_zero_point = 0; input_zero_point <= 255; input_zero_point += 51) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_zero_point(uint8_t(input_zero_point)) - .TestNWCxQU8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_large_width_with_output_scale) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - for (float output_scale = 0.01f; output_scale < 100.0f; output_scale *= 3.14159265f) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .output_scale(output_scale) - .TestNWCxQU8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_large_width_with_output_zero_point) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - for (int32_t output_zero_point = 0; output_zero_point <= 255; output_zero_point += 51) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .output_zero_point(uint8_t(output_zero_point)) - .TestNWCxQU8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_large_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, unit_batch_large_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, small_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, small_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, small_batch_small_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, small_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, small_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, small_batch_large_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxQU8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QU8, large_width_multithreaded) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qu8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - GlobalAveragePoolingOperatorTester() - .batch_size(5) - .width(spatial_tile * 3) - .channels(15) - .multithreaded(true) - .TestNWCxQU8(); -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_small_width_with_input_scale) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - for (float input_scale = 0.01f; input_scale < 100.0f; input_scale *= 3.14159265f) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_scale(input_scale) - .TestNWCxQS8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_small_width_with_input_zero_point) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - for (int32_t input_zero_point = 0; input_zero_point <= 255; input_zero_point += 51) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_zero_point(uint8_t(input_zero_point)) - .TestNWCxQS8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_small_width_with_output_scale) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - for (float output_scale = 0.01f; output_scale < 100.0f; output_scale *= 3.14159265f) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .output_scale(output_scale) - .TestNWCxQS8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_small_width_with_output_zero_point) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - for (int32_t output_zero_point = 0; output_zero_point <= 255; output_zero_point += 51) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .output_zero_point(uint8_t(output_zero_point)) - .TestNWCxQS8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_small_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_small_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_large_width_with_input_scale) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - for (float input_scale = 0.01f; input_scale < 100.0f; input_scale *= 3.14159265f) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_scale(input_scale) - .TestNWCxQS8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_large_width_with_input_zero_point) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - for (int32_t input_zero_point = 0; input_zero_point <= 255; input_zero_point += 51) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_zero_point(uint8_t(input_zero_point)) - .TestNWCxQS8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_large_width_with_output_scale) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - for (float output_scale = 0.01f; output_scale < 100.0f; output_scale *= 3.14159265f) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .output_scale(output_scale) - .TestNWCxQS8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_large_width_with_output_zero_point) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - for (int32_t output_zero_point = 0; output_zero_point <= 255; output_zero_point += 51) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .output_zero_point(uint8_t(output_zero_point)) - .TestNWCxQS8(); - } - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_large_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, unit_batch_large_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, small_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, small_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, small_batch_small_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, small_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, small_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, small_batch_large_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxQS8(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_QS8, large_width_multithreaded) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_qs8_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - GlobalAveragePoolingOperatorTester() - .batch_size(5) - .width(spatial_tile * 3) - .channels(15) - .multithreaded(true) - .TestNWCxQS8(); -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, unit_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, unit_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, unit_batch_small_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, unit_batch_small_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, unit_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, unit_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, unit_batch_large_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, unit_batch_large_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, small_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, small_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, small_batch_small_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, small_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, small_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, small_batch_large_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F16, large_width_multithreaded) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - GlobalAveragePoolingOperatorTester() - .batch_size(5) - .width(spatial_tile * 3) - .channels(15) - .multithreaded(true) - .TestNWCxF16(); -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, unit_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, unit_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, unit_batch_small_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, unit_batch_small_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, unit_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, unit_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, unit_batch_large_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, unit_batch_large_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, small_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, small_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, small_batch_small_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, small_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, small_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, small_batch_large_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalAveragePoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_AVERAGE_POOLING_NWC_F32, large_width_multithreaded) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - GlobalAveragePoolingOperatorTester() - .batch_size(5) - .width(spatial_tile * 3) - .channels(15) - .multithreaded(true) - .TestNWCxF32(); -} diff --git a/test/global-average-pooling-operator-tester.h b/test/global-average-pooling-operator-tester.h deleted file mode 100644 index 4b36595f2f2..00000000000 --- a/test/global-average-pooling-operator-tester.h +++ /dev/null @@ -1,731 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// All rights reserved. -// -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" -#include "xnnpack/math.h" -#include "replicable_random_device.h" -#include "pthreadpool.h" - -class GlobalAveragePoolingOperatorTester { - public: - GlobalAveragePoolingOperatorTester& channels(size_t channels) { - assert(channels != 0); - this->channels_ = channels; - return *this; - } - - size_t channels() const { - return this->channels_; - } - - GlobalAveragePoolingOperatorTester& width(size_t width) { - assert(width != 0); - this->width_ = width; - return *this; - } - - size_t width() const { - return this->width_; - } - - GlobalAveragePoolingOperatorTester& input_stride(size_t input_stride) { - assert(input_stride != 0); - this->input_stride_ = input_stride; - return *this; - } - - size_t input_stride() const { - if (this->input_stride_ == 0) { - return channels(); - } else { - assert(this->input_stride_ >= channels()); - return this->input_stride_; - } - } - - GlobalAveragePoolingOperatorTester& output_stride(size_t output_stride) { - assert(output_stride != 0); - this->output_stride_ = output_stride; - return *this; - } - - size_t output_stride() const { - if (this->output_stride_ == 0) { - return channels(); - } else { - assert(this->output_stride_ >= channels()); - return this->output_stride_; - } - } - - GlobalAveragePoolingOperatorTester& batch_size(size_t batch_size) { - assert(batch_size != 0); - this->batch_size_ = batch_size; - return *this; - } - - size_t batch_size() const { - return this->batch_size_; - } - - GlobalAveragePoolingOperatorTester& input_scale(float input_scale) { - assert(input_scale > 0.0f); - assert(std::isnormal(input_scale)); - this->input_scale_ = input_scale; - return *this; - } - - float input_scale() const { - return this->input_scale_; - } - - GlobalAveragePoolingOperatorTester& input_zero_point(uint8_t input_zero_point) { - this->input_zero_point_ = input_zero_point; - return *this; - } - - uint8_t input_zero_point() const { - return this->input_zero_point_; - } - - GlobalAveragePoolingOperatorTester& output_scale(float output_scale) { - assert(output_scale > 0.0f); - assert(std::isnormal(output_scale)); - this->output_scale_ = output_scale; - return *this; - } - - float output_scale() const { - return this->output_scale_; - } - - GlobalAveragePoolingOperatorTester& output_zero_point(uint8_t output_zero_point) { - this->output_zero_point_ = output_zero_point; - return *this; - } - - uint8_t output_zero_point() const { - return this->output_zero_point_; - } - - GlobalAveragePoolingOperatorTester& qmin(uint8_t qmin) { - this->qmin_ = qmin; - return *this; - } - - uint8_t qmin() const { - return this->qmin_; - } - - GlobalAveragePoolingOperatorTester& qmax(uint8_t qmax) { - this->qmax_ = qmax; - return *this; - } - - uint8_t qmax() const { - return this->qmax_; - } - - GlobalAveragePoolingOperatorTester& multithreaded(size_t multithreaded) { - this->multithreaded_ = multithreaded; - return *this; - } - - size_t multithreaded() const { - return this->multithreaded_; - } - - size_t num_threads() const { - // Do not spin up excessive number of threads for tests. - return multithreaded() ? 5 : 1; - } - - GlobalAveragePoolingOperatorTester& iterations(size_t iterations) { - this->iterations_ = iterations; - return *this; - } - - size_t iterations() const { - return this->iterations_; - } - - void TestNWCxQU8() const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_int_distribution u8dist( - std::numeric_limits::min(), std::numeric_limits::max()); - - std::vector input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); - std::vector output(batch_size() * output_stride()); - std::vector output_ref(batch_size() * channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::unique_ptr auto_threadpool{nullptr, pthreadpool_destroy}; - if (multithreaded()) { - const pthreadpool_t threadpool = pthreadpool_create(num_threads()); - if (pthreadpool_get_threads_count(threadpool) <= 1) { - GTEST_SKIP(); - } else { - auto_threadpool.reset(threadpool); - } - } - - std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); - - // Compute reference results. - const double scale = double(input_scale()) / (double(width()) * double(output_scale())); - for (size_t i = 0; i < batch_size(); i++) { - for (size_t j = 0; j < channels(); j++) { - double acc = 0.0f; - for (size_t k = 0; k < width(); k++) { - acc += double(int32_t(input[(i * width() + k) * input_stride() + j]) - int32_t(input_zero_point())); - } - output_ref[i * channels() + j] = float(acc * scale + double(output_zero_point())); - output_ref[i * channels() + j] = std::min(output_ref[i * channels() + j], float(qmax())); - output_ref[i * channels() + j] = std::max(output_ref[i * channels() + j], float(qmin())); - } - } - - // Create, setup, run, and destroy Global Average Pooling operator. - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - xnn_operator_t global_average_pooling_op = nullptr; - - xnn_status status = xnn_create_global_average_pooling_nwc_qu8( - input_zero_point(), input_scale(), - output_zero_point(), output_scale(), - qmin(), qmax(), - 0, &global_average_pooling_op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, global_average_pooling_op); - - // Smart pointer to automatically delete global_average_pooling_op. - std::unique_ptr auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ(xnn_status_success, - xnn_reshape_global_average_pooling_nwc_qu8( - global_average_pooling_op, - batch_size(), width(), - channels(), input_stride(), output_stride(), - &workspace_size, &workspace_alignment, - auto_threadpool.get())); - - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - std::vector> workspace(workspace_size); - - ASSERT_EQ(xnn_status_success, - xnn_setup_global_average_pooling_nwc_qu8( - global_average_pooling_op, - workspace.data(), - input.data(), output.data())); - - ASSERT_EQ(xnn_status_success, - xnn_run_operator(global_average_pooling_op, auto_threadpool.get())); - - // Verify results. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t c = 0; c < channels(); c++) { - EXPECT_LE(uint32_t(output[i * output_stride() + c]), uint32_t(qmax())); - EXPECT_GE(uint32_t(output[i * output_stride() + c]), uint32_t(qmin())); - EXPECT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.80f) - << "at batch index " << i << " / " << batch_size() - << ", channel " << c << " / " << channels(); - } - } - } - } - - void TestNWCxQS8() const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_int_distribution i8dist( - std::numeric_limits::min(), std::numeric_limits::max()); - - std::vector input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); - std::vector output(batch_size() * output_stride()); - std::vector output_ref(batch_size() * channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::unique_ptr auto_threadpool{nullptr, pthreadpool_destroy}; - if (multithreaded()) { - const pthreadpool_t threadpool = pthreadpool_create(num_threads()); - if (pthreadpool_get_threads_count(threadpool) <= 1) { - GTEST_SKIP(); - } else { - auto_threadpool.reset(threadpool); - } - } - - std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); - - // Compute reference results. - const double scale = double(input_scale()) / (double(width()) * double(output_scale())); - for (size_t i = 0; i < batch_size(); i++) { - for (size_t j = 0; j < channels(); j++) { - double acc = 0.0f; - for (size_t k = 0; k < width(); k++) { - acc += double(int32_t(input[(i * width() + k) * input_stride() + j]) - int32_t(input_zero_point() - 0x80)); - } - output_ref[i * channels() + j] = float(acc * scale + double(output_zero_point() - 0x80)); - output_ref[i * channels() + j] = std::min(output_ref[i * channels() + j], float(qmax() - 0x80)); - output_ref[i * channels() + j] = std::max(output_ref[i * channels() + j], float(qmin() - 0x80)); - } - } - - // Create, setup, run, and destroy Global Average Pooling operator. - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - xnn_operator_t global_average_pooling_op = nullptr; - - xnn_status status = xnn_create_global_average_pooling_nwc_qs8( - int8_t(input_zero_point() - 0x80), input_scale(), - int8_t(output_zero_point() - 0x80), output_scale(), - int8_t(qmin() - 0x80), int8_t(qmax() - 0x80), - 0, &global_average_pooling_op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, global_average_pooling_op); - - // Smart pointer to automatically delete global_average_pooling_op. - std::unique_ptr auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ(xnn_status_success, - xnn_reshape_global_average_pooling_nwc_qs8( - global_average_pooling_op, - batch_size(), width(), - channels(), input_stride(), output_stride(), - &workspace_size, &workspace_alignment, - auto_threadpool.get())); - - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - std::vector> workspace(workspace_size); - - ASSERT_EQ(xnn_status_success, - xnn_setup_global_average_pooling_nwc_qs8( - global_average_pooling_op, - workspace.data(), - input.data(), output.data())); - - ASSERT_EQ(xnn_status_success, - xnn_run_operator(global_average_pooling_op, auto_threadpool.get())); - - // Verify results. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t c = 0; c < channels(); c++) { - EXPECT_LE(int32_t(output[i * output_stride() + c]), int32_t(qmax() - 0x80)); - EXPECT_GE(int32_t(output[i * output_stride() + c]), int32_t(qmin() - 0x80)); - EXPECT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.80f) - << "at batch index " << i << " / " << batch_size() - << ", channel " << c << " / " << channels(); - } - } - } - } - - void TestNWCxF16() const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_real_distribution f32dist(1.0e-3f, 1.0f); - - std::vector input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(xnn_float16)); - std::vector output(batch_size() * output_stride()); - std::vector output_ref(batch_size() * channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::unique_ptr auto_threadpool{nullptr, pthreadpool_destroy}; - if (multithreaded()) { - const pthreadpool_t threadpool = pthreadpool_create(num_threads()); - if (pthreadpool_get_threads_count(threadpool) <= 1) { - GTEST_SKIP(); - } else { - auto_threadpool.reset(threadpool); - } - } - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Compute reference results, without clamping. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t j = 0; j < channels(); j++) { - float acc = 0.0f; - for (size_t k = 0; k < width(); k++) { - acc += input[(i * width() + k) * input_stride() + j]; - } - output_ref[i * channels() + j] = acc / float(width()); - } - } - - // Compute clamping parameters. - const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_range = accumulated_max - accumulated_min; - const float scaled_min = xnn_float16(accumulated_min + accumulated_range / 255.0f * float(qmin())); - const float scaled_max = xnn_float16(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())); - const float output_min = scaled_min == scaled_max ? -std::numeric_limits::infinity() : scaled_min; - const float output_max = scaled_min == scaled_max ? +std::numeric_limits::infinity() : scaled_max; - - // Clamp reference results. - for (float& value : output_ref) { - value = std::max(std::min(value, output_max), output_min); - } - - // Create, setup, run, and destroy Global Average Pooling operator. - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - xnn_operator_t global_average_pooling_op = nullptr; - - xnn_status status = xnn_create_global_average_pooling_nwc_f16( - output_min, output_max, - 0, &global_average_pooling_op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, global_average_pooling_op); - - // Smart pointer to automatically delete global_average_pooling_op. - std::unique_ptr auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ(xnn_status_success, - xnn_reshape_global_average_pooling_nwc_f16( - global_average_pooling_op, - batch_size(), width(), - channels(), input_stride(), output_stride(), - &workspace_size, &workspace_alignment, - auto_threadpool.get())); - - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - std::vector> workspace(workspace_size); - - ASSERT_EQ(xnn_status_success, - xnn_setup_global_average_pooling_nwc_f16( - global_average_pooling_op, - workspace.data(), - input.data(), output.data())); - - ASSERT_EQ(xnn_status_success, - xnn_run_operator(global_average_pooling_op, auto_threadpool.get())); - - // Verify results. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t c = 0; c < channels(); c++) { - EXPECT_LE(output[i * output_stride() + c], output_max); - EXPECT_GE(output[i * output_stride() + c], output_min); - EXPECT_NEAR(output[i * output_stride() + c], output_ref[i * channels() + c], std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f)) - << "at batch index " << i << " / " << batch_size() - << ", channel " << c << " / " << channels(); - } - } - } - } - - void TestNWCxF32() const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_real_distribution f32dist; - - std::vector input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); - std::vector output(batch_size() * output_stride()); - std::vector output_ref(batch_size() * channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::unique_ptr auto_threadpool{nullptr, pthreadpool_destroy}; - if (multithreaded()) { - const pthreadpool_t threadpool = pthreadpool_create(num_threads()); - if (pthreadpool_get_threads_count(threadpool) <= 1) { - GTEST_SKIP(); - } else { - auto_threadpool.reset(threadpool); - } - } - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Compute reference results, without clamping. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t j = 0; j < channels(); j++) { - float acc = 0.0f; - for (size_t k = 0; k < width(); k++) { - acc += input[(i * width() + k) * input_stride() + j]; - } - output_ref[i * channels() + j] = acc / float(width()); - } - } - - // Compute clamping parameters. - const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_range = accumulated_max - accumulated_min; - const float output_min = accumulated_range == 0.0f ? - -std::numeric_limits::infinity() : - accumulated_min + accumulated_range / 255.0f * float(qmin()); - const float output_max = accumulated_range == 0.0f ? - +std::numeric_limits::infinity() : - accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); - - // Clamp reference results. - for (float& value : output_ref) { - value = std::max(std::min(value, output_max), output_min); - } - - // Create, setup, run, and destroy Global Average Pooling operator. - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - xnn_operator_t global_average_pooling_op = nullptr; - - xnn_status status = xnn_create_global_average_pooling_nwc_f32( - output_min, output_max, - 0, &global_average_pooling_op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, global_average_pooling_op); - - // Smart pointer to automatically delete global_average_pooling_op. - std::unique_ptr auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ(xnn_status_success, - xnn_reshape_global_average_pooling_nwc_f32( - global_average_pooling_op, - batch_size(), width(), - channels(), input_stride(), output_stride(), - &workspace_size, &workspace_alignment, - auto_threadpool.get())); - - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - std::vector> workspace(workspace_size); - - ASSERT_EQ(xnn_status_success, - xnn_setup_global_average_pooling_nwc_f32( - global_average_pooling_op, - workspace.data(), - input.data(), output.data())); - - ASSERT_EQ(xnn_status_success, - xnn_run_operator(global_average_pooling_op, auto_threadpool.get())); - - // Verify results. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t c = 0; c < channels(); c++) { - EXPECT_LE(output[i * output_stride() + c], output_max); - EXPECT_GE(output[i * output_stride() + c], output_min); - EXPECT_NEAR(output[i * output_stride() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-6f) - << "at batch index " << i << " / " << batch_size() - << ", channel " << c << " / " << channels(); - } - } - } - } - - void TestNCWxF16() const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_real_distribution f32dist(1.0e-3f, 1.0f); - - std::vector input(batch_size() * channels() * width() + XNN_EXTRA_BYTES / sizeof(xnn_float16)); - std::vector output(batch_size() * channels()); - std::vector output_ref(batch_size() * channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::unique_ptr auto_threadpool{nullptr, pthreadpool_destroy}; - if (multithreaded()) { - const pthreadpool_t threadpool = pthreadpool_create(num_threads()); - if (pthreadpool_get_threads_count(threadpool) <= 1) { - GTEST_SKIP(); - } else { - auto_threadpool.reset(threadpool); - } - } - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Compute reference results, without clamping. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t j = 0; j < channels(); j++) { - float acc = 0.0f; - for (size_t k = 0; k < width(); k++) { - acc += input[(i * channels() + j) * width() + k]; - } - output_ref[i * channels() + j] = acc / float(width()); - } - } - - // Compute clamping parameters. - const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_range = accumulated_max - accumulated_min; - const float scaled_min = xnn_float16(accumulated_min + accumulated_range / 255.0f * float(qmin())); - const float scaled_max = xnn_float16(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())); - const float output_min = scaled_min == scaled_max ? -std::numeric_limits::infinity() : scaled_min; - const float output_max = scaled_min == scaled_max ? +std::numeric_limits::infinity() : scaled_max; - - // Clamp reference results. - for (float& value : output_ref) { - value = std::max(std::min(value, output_max), output_min); - } - - // Create, setup, run, and destroy Global Average Pooling operator. - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - xnn_operator_t global_average_pooling_op = nullptr; - - xnn_status status = xnn_create_global_average_pooling_ncw_f16( - output_min, output_max, 0, &global_average_pooling_op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - - // Smart pointer to automatically delete global_average_pooling_op. - std::unique_ptr auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); - - ASSERT_EQ(xnn_status_success, - xnn_reshape_global_average_pooling_ncw_f16( - global_average_pooling_op, - batch_size(), width(), channels(), - auto_threadpool.get())); - - ASSERT_EQ(xnn_status_success, - xnn_setup_global_average_pooling_ncw_f16( - global_average_pooling_op, - input.data(), output.data())); - - ASSERT_EQ(xnn_status_success, - xnn_run_operator(global_average_pooling_op, auto_threadpool.get())); - - // Verify results. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t c = 0; c < channels(); c++) { - EXPECT_LE(output[i * channels() + c], output_max); - EXPECT_GE(output[i * channels() + c], output_min); - EXPECT_NEAR(output[i * channels() + c], output_ref[i * channels() + c], std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f)) - << "at batch index " << i << " / " << batch_size() - << ", channel " << c << " / " << channels(); - } - } - } - } - - void TestNCWxF32() const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_real_distribution f32dist; - - std::vector input(batch_size() * channels() * width() + XNN_EXTRA_BYTES / sizeof(float)); - std::vector output(batch_size() * channels()); - std::vector output_ref(batch_size() * channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::unique_ptr auto_threadpool{nullptr, pthreadpool_destroy}; - if (multithreaded()) { - const pthreadpool_t threadpool = pthreadpool_create(num_threads()); - if (pthreadpool_get_threads_count(threadpool) <= 1) { - GTEST_SKIP(); - } else { - auto_threadpool.reset(threadpool); - } - } - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Compute reference results, without clamping. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t j = 0; j < channels(); j++) { - float acc = 0.0f; - for (size_t k = 0; k < width(); k++) { - acc += input[(i * channels() + j) * width() + k]; - } - output_ref[i * channels() + j] = acc / float(width()); - } - } - - // Compute clamping parameters. - const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_range = accumulated_max - accumulated_min; - const float output_min = accumulated_range == 0.0f ? - -std::numeric_limits::infinity() : - accumulated_min + accumulated_range / 255.0f * float(qmin()); - const float output_max = accumulated_range == 0.0f ? - +std::numeric_limits::infinity() : - accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); - - // Clamp reference results. - for (float& value : output_ref) { - value = std::max(std::min(value, output_max), output_min); - } - - // Create, setup, run, and destroy Global Average Pooling operator. - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - xnn_operator_t global_average_pooling_op = nullptr; - - xnn_status status = xnn_create_global_average_pooling_ncw_f32( - output_min, output_max, 0, &global_average_pooling_op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - - // Smart pointer to automatically delete global_average_pooling_op. - std::unique_ptr auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); - - ASSERT_EQ(xnn_status_success, - xnn_reshape_global_average_pooling_ncw_f32( - global_average_pooling_op, - batch_size(), width(), channels(), - auto_threadpool.get())); - - ASSERT_EQ(xnn_status_success, - xnn_setup_global_average_pooling_ncw_f32( - global_average_pooling_op, - input.data(), output.data())); - - ASSERT_EQ(xnn_status_success, - xnn_run_operator(global_average_pooling_op, auto_threadpool.get())); - - // Verify results. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t c = 0; c < channels(); c++) { - EXPECT_LE(output[i * channels() + c], output_max); - EXPECT_GE(output[i * channels() + c], output_min); - EXPECT_NEAR(output[i * channels() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-5f) - << "at batch index " << i << " / " << batch_size() - << ", channel " << c << " / " << channels(); - } - } - } - } - - private: - size_t batch_size_{1}; - size_t width_{1}; - size_t channels_{1}; - size_t input_stride_{0}; - size_t output_stride_{0}; - float input_scale_{1.0f}; - float output_scale_{1.0f}; - uint8_t input_zero_point_{121}; - uint8_t output_zero_point_{133}; - uint8_t qmin_{0}; - uint8_t qmax_{255}; - bool multithreaded_{false}; - size_t iterations_{1}; -}; diff --git a/test/global-sum-pooling-1d.cc b/test/global-sum-pooling-1d.cc index bd7c813c762..ac2a09bbfd9 100644 --- a/test/global-sum-pooling-1d.cc +++ b/test/global-sum-pooling-1d.cc @@ -4,8 +4,6 @@ // LICENSE file in the root directory of this source tree. #include -#include -#include #include #include #include @@ -17,11 +15,8 @@ #include #include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" #include "xnnpack/math.h" #include "xnnpack/node-type.h" -#include "xnnpack/operator.h" #include "xnnpack/subgraph.h" #include "replicable_random_device.h" @@ -43,7 +38,8 @@ template class GlobalSumPooling1DTest : public ::testing::Test { input_width = input_dims[input_dims.size() - 2]; channels = input_dims[input_dims.size() - 1]; - input = std::vector(XNN_EXTRA_BYTES / sizeof(T) + NumElements(input_dims)); + input = + std::vector(XNN_EXTRA_BYTES / sizeof(T) + NumElements(input_dims)); operator_output = std::vector(NumElements(output_dims)); subgraph_output = std::vector(operator_output.size()); } @@ -58,7 +54,8 @@ template class GlobalSumPooling1DTest : public ::testing::Test { size_t NumElements(std::vector& dims) { - return std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies()); + return std::accumulate( + dims.begin(), dims.end(), size_t(1), std::multiplies()); } xnnpack::ReplicableRandomDevice rng; @@ -111,14 +108,14 @@ TEST_F(GlobalSumPooling1DTestF16, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_sum_pooling_1d); + ASSERT_EQ(node->type, xnn_node_type_static_sum); ASSERT_EQ(node->compute_type, xnn_compute_type_fp16); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 1); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } @@ -150,284 +147,13 @@ TEST_F(GlobalSumPooling1DTestF32, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_sum_pooling_1d); + ASSERT_EQ(node->type, xnn_node_type_static_sum); ASSERT_EQ(node->compute_type, xnn_compute_type_fp32); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 1); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } - -TEST_F(GlobalSumPooling1DTestF16, matches_operator_api) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Call operator API. - const xnn_status status = xnn_create_global_sum_pooling_nwc_f16( - output_min, output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_sum_pooling_nwc_f16( - op, batch_size, input_width, channels, channels, channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_sum_pooling_nwc_f16(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_sum_pooling_1d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalSumPooling1DTestF32, matches_operator_api) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Call operator API. - const xnn_status status = xnn_create_global_sum_pooling_nwc_f32( - output_min, output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_sum_pooling_nwc_f32( - op, batch_size, input_width, channels, channels, channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_sum_pooling_nwc_f32(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_sum_pooling_1d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalSumPooling1DTestF32, reshape_output_no_keep_dims) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_sum_pooling_1d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - const size_t num_input_dims = input_dims.size(); - const size_t num_batch_dims = num_input_dims - 2; - const struct xnn_node* node = &subgraph->nodes[0]; - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 2], input_dims[num_input_dims - 1]); - - input_dims[0] += 2; - input_dims[num_input_dims - 1] += 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 2], input_dims[num_input_dims - 1]); - - input_dims[0] -= 2; - input_dims[num_input_dims - 1] -= 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 2], input_dims[num_input_dims - 1]); -} - -TEST_F(GlobalSumPooling1DTestF32, reshape_output_keep_dims) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_sum_pooling_1d(subgraph, output_min, output_max, input_id, output_id, XNN_FLAG_KEEP_DIMS)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - const size_t num_input_dims = input_dims.size(); - const size_t num_batch_dims = num_input_dims - 2; - const struct xnn_node* node = &subgraph->nodes[0]; - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); - input_dims[0] += 2; - input_dims[num_input_dims - 1] += 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); - input_dims[0] -= 2; - input_dims[num_input_dims - 1] -= 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); - - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); -} diff --git a/test/global-sum-pooling-2d.cc b/test/global-sum-pooling-2d.cc index d23a0eff24d..cfdd2bad94c 100644 --- a/test/global-sum-pooling-2d.cc +++ b/test/global-sum-pooling-2d.cc @@ -4,8 +4,6 @@ // LICENSE file in the root directory of this source tree. #include -#include -#include #include #include #include @@ -17,11 +15,8 @@ #include #include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" #include "xnnpack/math.h" #include "xnnpack/node-type.h" -#include "xnnpack/operator.h" #include "xnnpack/subgraph.h" #include "replicable_random_device.h" @@ -36,14 +31,16 @@ template class GlobalSumPooling2DTest : public ::testing::Test { output_dims = input_dims; output_dims[output_dims.size() - 3] = 1; output_dims[output_dims.size() - 2] = 1; - input = std::vector(XNN_EXTRA_BYTES / sizeof(T) + NumElements(input_dims)); + input = + std::vector(XNN_EXTRA_BYTES / sizeof(T) + NumElements(input_dims)); operator_output = std::vector(NumElements(output_dims)); subgraph_output = std::vector(operator_output.size()); batch_size = 1; for (size_t i = 0; i < input_dims.size() - 3; i++) { batch_size *= input_dims[i]; } - input_width = input_dims[input_dims.size() - 3] * input_dims[input_dims.size() - 2]; + input_width = + input_dims[input_dims.size() - 3] * input_dims[input_dims.size() - 2]; channels = input_dims[input_dims.size() - 1]; } @@ -55,7 +52,8 @@ template class GlobalSumPooling2DTest : public ::testing::Test { size_t NumElements(std::vector& dims) { - return std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies()); + return std::accumulate( + dims.begin(), dims.end(), size_t(1), std::multiplies()); } xnnpack::ReplicableRandomDevice rng; @@ -108,14 +106,15 @@ TEST_F(GlobalSumPooling2DTestF16, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_sum_pooling_2d); + ASSERT_EQ(node->type, xnn_node_type_static_sum); ASSERT_EQ(node->compute_type, xnn_compute_type_fp16); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 2); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 3); + ASSERT_EQ(node->params.reduce.reduction_axes[1], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } @@ -147,286 +146,14 @@ TEST_F(GlobalSumPooling2DTestF32, define) ASSERT_EQ(subgraph->num_nodes, 1); const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_global_sum_pooling_2d); + ASSERT_EQ(node->type, xnn_node_type_static_sum); ASSERT_EQ(node->compute_type, xnn_compute_type_fp32); - ASSERT_EQ(node->activation.output_min, output_min); - ASSERT_EQ(node->activation.output_max, output_max); ASSERT_EQ(node->num_inputs, 1); ASSERT_EQ(node->inputs[0], input_id); ASSERT_EQ(node->num_outputs, 1); ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->params.reduce.num_reduction_axes, 2); + ASSERT_EQ(node->params.reduce.reduction_axes[0], input_dims.size() - 3); + ASSERT_EQ(node->params.reduce.reduction_axes[1], input_dims.size() - 2); ASSERT_EQ(node->flags, 0); } - -TEST_F(GlobalSumPooling2DTestF16, matches_operator_api) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Call operator API. - const xnn_status status = xnn_create_global_sum_pooling_nwc_f16( - output_min, output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_sum_pooling_nwc_f16( - op, batch_size, input_width, channels, - /*input_stride=*/channels, /*output_stride=*/ channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_sum_pooling_nwc_f16(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_sum_pooling_2d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalSumPooling2DTestF32, matches_operator_api) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_operator_t op = nullptr; - - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Call operator API. - const xnn_status status = xnn_create_global_sum_pooling_nwc_f32( - output_min, output_max, /*flags=*/0, &op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ( - xnn_status_success, xnn_reshape_global_sum_pooling_nwc_f32( - op, batch_size, input_width, channels, - /*input_stride=*/channels, /*output_stride=*/ channels, - &workspace_size, &workspace_alignment, /*threadpool=*/nullptr)); - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - - std::vector> workspace(workspace_size); - ASSERT_EQ( - xnn_status_success, - xnn_setup_global_sum_pooling_nwc_f32(op, workspace.data(), input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_sum_pooling_2d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(GlobalSumPooling2DTestF32, reshape_output_no_keep_dims) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_sum_pooling_2d(subgraph, output_min, output_max, input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - const size_t num_input_dims = input_dims.size(); - const size_t num_batch_dims = num_input_dims - 3; - const struct xnn_node* node = &subgraph->nodes[0]; - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 3], input_dims[num_input_dims - 1]); - - input_dims[0] += 2; - input_dims[num_input_dims - 1] += 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 3], input_dims[num_input_dims - 1]); - - input_dims[0] -= 2; - input_dims[num_input_dims - 1] -= 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 3], input_dims[num_input_dims - 1]); -} - -TEST_F(GlobalSumPooling2DTestF32, reshape_output_keep_dims) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, - /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, - /*external_id=*/1, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - ASSERT_EQ( - xnn_status_success, - xnn_define_global_sum_pooling_2d(subgraph, output_min, output_max, input_id, output_id, XNN_FLAG_KEEP_DIMS)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - const size_t num_input_dims = input_dims.size(); - const size_t num_batch_dims = num_input_dims - 3; - const struct xnn_node* node = &subgraph->nodes[0]; - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); - input_dims[0] += 2; - input_dims[num_input_dims - 1] += 3; - - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); - - input_dims[0] -= 2; - input_dims[num_input_dims - 1] -= 3; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_dims.size(), input_dims.data())); - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); - for (size_t i = 0; i < num_batch_dims; ++i) { - ASSERT_EQ(output_shape->dim[i], input_dims[i]); - } - for (size_t i = num_batch_dims; i < num_input_dims - 1; ++i) { - ASSERT_EQ(output_shape->dim[i], 1); - } - ASSERT_EQ(output_shape->dim[num_input_dims - 1], input_dims[num_input_dims - 1]); -} diff --git a/test/global-sum-pooling-nwc.cc b/test/global-sum-pooling-nwc.cc deleted file mode 100644 index 8b56eb44bff..00000000000 --- a/test/global-sum-pooling-nwc.cc +++ /dev/null @@ -1,483 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// All rights reserved. -// -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include - -#include -#include "xnnpack/config.h" -#include "global-sum-pooling-operator-tester.h" - -TEST(GLOBAL_SUM_POOLING_NWC_F16, unit_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, unit_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, unit_batch_small_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, unit_batch_small_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, unit_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, unit_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, unit_batch_large_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, unit_batch_large_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, small_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, small_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, small_batch_small_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, small_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, small_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F16, small_batch_large_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f16_gavgpool_config(); - if (gavgpool_config == nullptr) { - GTEST_SKIP(); // F16 unsupported. - } - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxF16(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, unit_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, unit_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, unit_batch_small_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, unit_batch_small_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, unit_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, unit_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, unit_batch_large_width_with_qmin) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmin(128) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, unit_batch_large_width_with_qmax) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(1) - .width(width) - .channels(channels) - .qmax(128) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, small_batch_small_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, small_batch_small_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, small_batch_small_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = 1; width <= spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, small_batch_large_width) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, small_batch_large_width_with_input_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .input_stride(5 * channels) - .TestNWCxF32(); - } - } -} - -TEST(GLOBAL_SUM_POOLING_NWC_F32, small_batch_large_width_with_output_stride) { - const struct xnn_gavgpool_config* gavgpool_config = xnn_init_f32_gavgpool_config(); - ASSERT_NE(gavgpool_config, nullptr); - const uint32_t spatial_tile = std::max(gavgpool_config->row_tile, 1); - for (size_t channels = 1; channels <= 100; channels += 15) { - for (size_t width = spatial_tile; width <= 4 * spatial_tile; width++) { - GlobalSumPoolingOperatorTester() - .batch_size(3) - .width(width) - .channels(channels) - .output_stride(5 * channels) - .TestNWCxF32(); - } - } -} diff --git a/test/global-sum-pooling-operator-tester.h b/test/global-sum-pooling-operator-tester.h deleted file mode 100644 index 1bf0bfd0612..00000000000 --- a/test/global-sum-pooling-operator-tester.h +++ /dev/null @@ -1,301 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// All rights reserved. -// -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "xnnpack.h" -#include "xnnpack/aligned-allocator.h" -#include "xnnpack/common.h" -#include "xnnpack/math.h" -#include "replicable_random_device.h" - -class GlobalSumPoolingOperatorTester { - public: - GlobalSumPoolingOperatorTester& channels(size_t channels) { - assert(channels != 0); - this->channels_ = channels; - return *this; - } - - size_t channels() const { - return this->channels_; - } - - GlobalSumPoolingOperatorTester& width(size_t width) { - assert(width != 0); - this->width_ = width; - return *this; - } - - size_t width() const { - return this->width_; - } - - GlobalSumPoolingOperatorTester& input_stride(size_t input_stride) { - assert(input_stride != 0); - this->input_stride_ = input_stride; - return *this; - } - - size_t input_stride() const { - if (this->input_stride_ == 0) { - return channels(); - } else { - assert(this->input_stride_ >= channels()); - return this->input_stride_; - } - } - - GlobalSumPoolingOperatorTester& output_stride(size_t output_stride) { - assert(output_stride != 0); - this->output_stride_ = output_stride; - return *this; - } - - size_t output_stride() const { - if (this->output_stride_ == 0) { - return channels(); - } else { - assert(this->output_stride_ >= channels()); - return this->output_stride_; - } - } - - GlobalSumPoolingOperatorTester& batch_size(size_t batch_size) { - assert(batch_size != 0); - this->batch_size_ = batch_size; - return *this; - } - - size_t batch_size() const { - return this->batch_size_; - } - - GlobalSumPoolingOperatorTester& qmin(uint8_t qmin) { - this->qmin_ = qmin; - return *this; - } - - uint8_t qmin() const { - return this->qmin_; - } - - GlobalSumPoolingOperatorTester& qmax(uint8_t qmax) { - this->qmax_ = qmax; - return *this; - } - - uint8_t qmax() const { - return this->qmax_; - } - - GlobalSumPoolingOperatorTester& iterations(size_t iterations) { - this->iterations_ = iterations; - return *this; - } - - size_t iterations() const { - return this->iterations_; - } - - void TestNWCxF16() const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_real_distribution f32dist(1.0e-3f, 1.0f); - - std::vector input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(xnn_float16)); - std::vector output(batch_size() * output_stride()); - std::vector output_ref(batch_size() * channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Compute reference results, without clamping. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t j = 0; j < channels(); j++) { - float acc = 0.0f; - for (size_t k = 0; k < width(); k++) { - acc += input[(i * width() + k) * input_stride() + j]; - } - output_ref[i * channels() + j] = acc; - } - } - - // Compute clamping parameters. - const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_range = accumulated_max - accumulated_min; - const float scaled_min = xnn_float16(accumulated_min + accumulated_range / 255.0f * float(qmin())); - const float scaled_max = xnn_float16(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())); - const float output_min = scaled_min == scaled_max ? -std::numeric_limits::infinity() : scaled_min; - const float output_max = scaled_min == scaled_max ? +std::numeric_limits::infinity() : scaled_max; - - // Clamp reference results. - for (float& value : output_ref) { - value = std::max(std::min(value, output_max), output_min); - } - - // Create, setup, run, and destroy Global Sum Pooling operator. - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - xnn_operator_t global_sum_pooling_op = nullptr; - - xnn_status status = xnn_create_global_sum_pooling_nwc_f16( - output_min, output_max, - 0, &global_sum_pooling_op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, global_sum_pooling_op); - - // Smart pointer to automatically delete global_sum_pooling_op. - std::unique_ptr auto_global_sum_pooling_op(global_sum_pooling_op, xnn_delete_operator); - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ(xnn_status_success, - xnn_reshape_global_sum_pooling_nwc_f16( - global_sum_pooling_op, - batch_size(), width(), - channels(), input_stride(), output_stride(), - &workspace_size, &workspace_alignment, - /*threadpool=*/nullptr)); - - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - std::vector> workspace(workspace_size); - - ASSERT_EQ(xnn_status_success, - xnn_setup_global_sum_pooling_nwc_f16( - global_sum_pooling_op, - workspace.data(), - input.data(), output.data())); - - ASSERT_EQ(xnn_status_success, - xnn_run_operator(global_sum_pooling_op, /*threadpool=*/nullptr)); - - // Verify results. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t c = 0; c < channels(); c++) { - EXPECT_LE(output[i * output_stride() + c], output_max); - EXPECT_GE(output[i * output_stride() + c], output_min); - EXPECT_NEAR(output[i * output_stride() + c], output_ref[i * channels() + c], std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f)) - << "at batch index " << i << " / " << batch_size() - << ", channel " << c << " / " << channels(); - } - } - } - } - - void TestNWCxF32() const { - xnnpack::ReplicableRandomDevice rng; - std::uniform_real_distribution f32dist; - - std::vector input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); - std::vector output(batch_size() * output_stride()); - std::vector output_ref(batch_size() * channels()); - for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - // Compute reference results, without clamping. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t j = 0; j < channels(); j++) { - float acc = 0.0f; - for (size_t k = 0; k < width(); k++) { - acc += input[(i * width() + k) * input_stride() + j]; - } - output_ref[i * channels() + j] = acc; - } - } - - // Compute clamping parameters. - const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); - const float accumulated_range = accumulated_max - accumulated_min; - const float output_min = accumulated_range == 0.0f ? - -std::numeric_limits::infinity() : - accumulated_min + accumulated_range / 255.0f * float(qmin()); - const float output_max = accumulated_range == 0.0f ? - +std::numeric_limits::infinity() : - accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); - - // Clamp reference results. - for (float& value : output_ref) { - value = std::max(std::min(value, output_max), output_min); - } - - // Create, setup, run, and destroy Global Sum Pooling operator. - ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - xnn_operator_t global_sum_pooling_op = nullptr; - - xnn_status status = xnn_create_global_sum_pooling_nwc_f32( - output_min, output_max, - 0, &global_sum_pooling_op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, global_sum_pooling_op); - - // Smart pointer to automatically delete global_sum_pooling_op. - std::unique_ptr auto_global_sum_pooling_op(global_sum_pooling_op, xnn_delete_operator); - - size_t workspace_size = 0; - size_t workspace_alignment = 0; - ASSERT_EQ(xnn_status_success, - xnn_reshape_global_sum_pooling_nwc_f32( - global_sum_pooling_op, - batch_size(), width(), - channels(), input_stride(), output_stride(), - &workspace_size, &workspace_alignment, - /*threadpool=*/nullptr)); - - ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); - std::vector> workspace(workspace_size); - - ASSERT_EQ(xnn_status_success, - xnn_setup_global_sum_pooling_nwc_f32( - global_sum_pooling_op, - workspace.data(), - input.data(), output.data())); - - ASSERT_EQ(xnn_status_success, - xnn_run_operator(global_sum_pooling_op, /*threadpool=*/nullptr)); - - // Verify results. - for (size_t i = 0; i < batch_size(); i++) { - for (size_t c = 0; c < channels(); c++) { - EXPECT_LE(output[i * output_stride() + c], output_max); - EXPECT_GE(output[i * output_stride() + c], output_min); - EXPECT_NEAR(output[i * output_stride() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-6f) - << "at batch index " << i << " / " << batch_size() - << ", channel " << c << " / " << channels(); - } - } - } - } - - private: - size_t batch_size_{1}; - size_t width_{1}; - size_t channels_{1}; - size_t input_stride_{0}; - size_t output_stride_{0}; - uint8_t qmin_{0}; - uint8_t qmax_{255}; - size_t iterations_{1}; -}; diff --git a/tools/generate-gavgpool-cw-test.py b/tools/generate-gavgpool-cw-test.py deleted file mode 100755 index bc0378a4580..00000000000 --- a/tools/generate-gavgpool-cw-test.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python -# Copyright 2023 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import argparse -import codecs -import math -import os -import re -import sys -import yaml - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from primes import next_prime -import xngen -import xnncommon - - -parser = argparse.ArgumentParser(description='GAvgPoolCW microkernel test generator') -parser.add_argument("-s", "--spec", metavar="FILE", required=True, - help="Specification (YAML) file") -parser.add_argument("-o", "--output", metavar="FILE", required=True, - help='Output (C++ source) file') -parser.set_defaults(defines=list()) - - -def split_ukernel_name(name): - match = re.fullmatch(r"xnn_(f16|f32)_gavgpool_cw_ukernel__(.+)_u(\d+)(v)?", name) - if match is None: - raise ValueError("Unexpected microkernel name: " + name) - - element_tile = int(match.group(3)) - - arch, isa, assembly = xnncommon.parse_target_name(target_name=match.group(2)) - return element_tile, arch, isa - - -AVGPOOL_TEST_TEMPLATE = """\ -TEST(${TEST_NAME}, elements_eq_${ELEMENT_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - GAvgPoolCWMicrokernelTester() - .elements(${ELEMENT_TILE}) - .channels(${CHANNEL_TILE}) - .Test(${", ".join(TEST_ARGS)}); -} - -TEST(${TEST_NAME}, elements_gt_${ELEMENT_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = ${ELEMENT_TILE + 1}; elements < ${ELEMENT_TILE * 2}; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(${CHANNEL_TILE}) - .Test(${", ".join(TEST_ARGS)}); - } -} - -$if ELEMENT_TILE > 1: - TEST(${TEST_NAME}, elements_lt_${ELEMENT_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = 1; elements < ${ELEMENT_TILE}; elements++) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(${CHANNEL_TILE}) - .Test(${", ".join(TEST_ARGS)}); - } - } - -TEST(${TEST_NAME}, elements_div_${ELEMENT_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = ${ELEMENT_TILE * 2}; elements < ${ELEMENT_TILE * 5}; elements += ${ELEMENT_TILE}) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(${CHANNEL_TILE}) - .Test(${", ".join(TEST_ARGS)}); - } -} - -TEST(${TEST_NAME}, channels_gt_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t channels = ${CHANNEL_TILE + 1}; channels < ${CHANNEL_TILE * 4}; channels++) { - GAvgPoolCWMicrokernelTester() - .elements(${ELEMENT_TILE}) - .channels(channels) - .Test(${", ".join(TEST_ARGS)}); - } -} - -TEST(${TEST_NAME}, qmin) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = 1; elements < ${ELEMENT_TILE * 2}; elements += ${1 if ELEMENT_TILE < 4 else 3}) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(${CHANNEL_TILE * 4}) - .qmin(128) - .Test(${", ".join(TEST_ARGS)}); - } -} - -TEST(${TEST_NAME}, qmax) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = 1; elements < ${ELEMENT_TILE * 2}; elements += ${1 if ELEMENT_TILE < 4 else 3}) { - GAvgPoolCWMicrokernelTester() - .elements(elements) - .channels(${CHANNEL_TILE * 4}) - .qmax(128) - .Test(${", ".join(TEST_ARGS)}); - } -} -""" - - -def generate_test_cases(ukernel, init_fn, element_tile, isa): - """Generates all tests cases for a GAVGPOOL micro-kernel. - - Args: - ukernel: C name of the micro-kernel function. - init_fn: C name of the function to initialize microkernel parameters. - element_tile: Number of elements/pixels processed per one iteration of the inner - loops of the micro-kernel. - isa: instruction set required to run the micro-kernel. Generated unit test - will skip execution if the host processor doesn't support this ISA. - - Returns: - Code for the test case. - """ - _, test_name = ukernel.split("_", 1) - _, datatype, ukernel_type, _ = ukernel.split("_", 3) - test_args = [ukernel, init_fn] - return xngen.preprocess(AVGPOOL_TEST_TEMPLATE, { - "TEST_NAME": test_name.upper().replace("UKERNEL_", ""), - "TEST_ARGS": test_args, - "DATATYPE": datatype, - "CHANNEL_TILE": 1, # All microkernels process one channel at a time. - "ELEMENT_TILE": element_tile, - "ISA_CHECK": xnncommon.generate_isa_check_macro(isa), - "next_prime": next_prime, - }) - - -def main(args): - options = parser.parse_args(args) - - with codecs.open(options.spec, "r", encoding="utf-8") as spec_file: - spec_yaml = yaml.safe_load(spec_file) - if not isinstance(spec_yaml, list): - raise ValueError("expected a list of micro-kernels in the spec") - - tests = """\ -// Copyright 2023 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. -// -// Auto-generated file. Do not edit! -// Specification: {specification} -// Generator: {generator} - - -#include -#include "xnnpack/common.h" -#include "xnnpack/gavgpool.h" -#include "xnnpack/isa-checks.h" -#include "xnnpack/microparams-init.h" -#include "gavgpool-cw-microkernel-tester.h" -""".format(specification=options.spec, generator=sys.argv[0]) - - for ukernel_spec in spec_yaml: - name = ukernel_spec["name"] - init_fn = ukernel_spec.get("init") - element_tile, arch, isa = split_ukernel_name(name) - - test_case = generate_test_cases(name, init_fn, element_tile, isa) - tests += "\n\n" + xnncommon.postprocess_test_case(test_case, arch, isa) - - xnncommon.overwrite_if_changed(options.output, tests) - - -if __name__ == "__main__": - main(sys.argv[1:])