From 0e576bd7e965298f32f2b2de203c3da4877fda15 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Tue, 15 Oct 2024 18:30:24 -0700 Subject: [PATCH] Add `fill_uniform_random_bits` and use it to speed up tests/benchmarks. PiperOrigin-RevId: 686310094 --- bench/BUILD.bazel | 15 +---- bench/average-pooling.cc | 3 +- bench/channel-shuffle.cc | 3 +- bench/convolution.cc | 11 ++- bench/deconvolution.cc | 7 +- bench/f32-qc4w-gemm.cc | 6 +- bench/gemm-benchmark.cc | 97 ++++++--------------------- bench/max-pooling.cc | 3 +- bench/packw-benchmark.h | 13 +--- bench/qs8-gemm.cc | 6 +- bench/qu8-gemm.cc | 15 ++--- bench/softmax.cc | 3 +- bench/x8-lut.cc | 6 +- src/xnnpack/buffer.h | 20 ++++++ test/depth-to-space-operator-tester.h | 5 +- test/gemm-microkernel-tester.cc | 32 +++------ test/lut-microkernel-tester.h | 8 +-- test/lut-norm-microkernel-tester.h | 8 +-- test/tanh-operator-tester.h | 8 +-- test/vunary-microkernel-tester.cc | 11 +-- test/xx-fill.cc | 2 +- test/xx-pad.cc | 4 +- test/zip-microkernel-tester.h | 12 +--- 23 files changed, 92 insertions(+), 206 deletions(-) diff --git a/bench/BUILD.bazel b/bench/BUILD.bazel index 2bb53c48823..9e0a6862e39 100644 --- a/bench/BUILD.bazel +++ b/bench/BUILD.bazel @@ -265,7 +265,6 @@ xnnpack_benchmark( srcs = [ "%s.cc" % kernel.replace("_", "-"), ], - tags = xnnpack_slow_benchmark_tags(), deps = MICROKERNEL_BENCHMARK_DEPS, ) for kernel in [ "xx_transposev", @@ -293,7 +292,6 @@ xnnpack_benchmark( srcs = [ "qs8-dwconv.cc", ], - tags = xnnpack_slow_benchmark_tags(), deps = MICROKERNEL_BENCHMARK_DEPS + [ ":dwconv", "//:indirection", @@ -487,7 +485,6 @@ xnnpack_benchmark( "bgemm.h", "x8-packq.cc", ], - tags = xnnpack_slow_benchmark_tags(), deps = MICROKERNEL_BENCHMARK_DEPS + [ ":packq_benchmark", "//:allocator", @@ -501,7 +498,6 @@ xnnpack_benchmark( "packw-benchmark.h", "x8-packw.cc", ], - tags = xnnpack_slow_benchmark_tags(), deps = MICROKERNEL_BENCHMARK_DEPS + [ "//:allocator", ], @@ -514,7 +510,6 @@ xnnpack_benchmark( "packw-benchmark.h", "qs8-packw.cc", ], - tags = xnnpack_slow_benchmark_tags(), deps = MICROKERNEL_BENCHMARK_DEPS + [ "//:allocator", ], @@ -527,7 +522,6 @@ xnnpack_benchmark( "packw-benchmark.h", "x16-packw.cc", ], - tags = xnnpack_slow_benchmark_tags(), deps = MICROKERNEL_BENCHMARK_DEPS + [ "//:allocator", ], @@ -540,7 +534,6 @@ xnnpack_benchmark( "packw-benchmark.h", "x32-packw.cc", ], - tags = xnnpack_slow_benchmark_tags(), deps = MICROKERNEL_BENCHMARK_DEPS + [ "//:allocator", ], @@ -602,9 +595,7 @@ xnnpack_benchmark( name = "convolution_bench", srcs = ["convolution.cc"], copts = xnnpack_optional_tflite_copts(), - tags = xnnpack_slow_benchmark_tags() + [ - "nowin32", - ], + tags = xnnpack_slow_benchmark_tags() + ["nowin32"], deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(), ) @@ -649,8 +640,6 @@ xnnpack_benchmark( name = "scaled_dot_product_attention_bench", srcs = ["scaled-dot-product-attention.cc"], copts = xnnpack_optional_tflite_copts(), - tags = xnnpack_slow_benchmark_tags() + [ - "nowin32", - ], + tags = xnnpack_slow_benchmark_tags() + ["nowin32"], deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(), ) diff --git a/bench/average-pooling.cc b/bench/average-pooling.cc index c09e1b56bd4..fa939165fc3 100644 --- a/bench/average-pooling.cc +++ b/bench/average-pooling.cc @@ -40,13 +40,12 @@ static void xnnpack_average_pooling_qu8(benchmark::State& state, const char* net std::random_device random_device; auto rng = std::mt19937(random_device()); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1; const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1; xnnpack::Buffer input(batch_size * input_height * input_width * channels + XNN_EXTRA_BYTES / sizeof(uint8_t)); - std::generate(input.begin(), input.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); xnnpack::Buffer output(batch_size * output_height * output_width * channels); xnn_status status = xnn_initialize(nullptr /* allocator */); diff --git a/bench/channel-shuffle.cc b/bench/channel-shuffle.cc index e41daca7453..61d138849c2 100644 --- a/bench/channel-shuffle.cc +++ b/bench/channel-shuffle.cc @@ -27,11 +27,10 @@ static void channel_shuffle_x8(benchmark::State& state, const char* net) { std::random_device random_device; auto rng = std::mt19937(random_device()); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); xnnpack::Buffer input(XNN_EXTRA_BYTES / sizeof(uint8_t) + batch_size * groups * group_channels); xnnpack::Buffer output(batch_size * groups * group_channels); - std::generate(input.begin(), input.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); xnn_status status = xnn_initialize(nullptr /* allocator */); if (status != xnn_status_success) { diff --git a/bench/convolution.cc b/bench/convolution.cc index c75b059089e..0011ed3545c 100644 --- a/bench/convolution.cc +++ b/bench/convolution.cc @@ -48,7 +48,6 @@ void xnnpack_convolution_qu8(benchmark::State& state, const char* net) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); const size_t output_pixel_stride = groups * group_output_channels; const size_t input_pixel_stride = groups * group_input_channels; @@ -62,9 +61,9 @@ void xnnpack_convolution_qu8(benchmark::State& state, const char* net) { const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1; xnnpack::Buffer input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(uint8_t)); - std::generate(input.begin(), input.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); xnnpack::Buffer kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels); - std::generate(kernel.begin(), kernel.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(kernel.data(), kernel.size(), rng); xnnpack::Buffer bias(groups * group_output_channels); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride; @@ -187,8 +186,6 @@ void xnnpack_convolution_qs8(benchmark::State& state, const char* net) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), std::ref(rng)); const size_t output_pixel_stride = groups * group_output_channels; const size_t input_pixel_stride = groups * group_input_channels; @@ -202,9 +199,9 @@ void xnnpack_convolution_qs8(benchmark::State& state, const char* net) { const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1; xnnpack::Buffer input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(int8_t)); - std::generate(input.begin(), input.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); xnnpack::Buffer kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels); - std::generate(kernel.begin(), kernel.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(kernel.data(), kernel.size(), rng); xnnpack::Buffer bias(groups * group_output_channels); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride; diff --git a/bench/deconvolution.cc b/bench/deconvolution.cc index 243c910dd78..63e3f02b78e 100644 --- a/bench/deconvolution.cc +++ b/bench/deconvolution.cc @@ -46,9 +46,6 @@ void xnnpack_deconvolution_qu8(benchmark::State& state, const char* net) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1; const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1; @@ -60,9 +57,9 @@ void xnnpack_deconvolution_qu8(benchmark::State& state, const char* net) { const size_t output_width = std::max(stride_width * (input_width - 1) + adjustment + effective_kernel_width, padding_width) - padding_width; xnnpack::Buffer input(XNN_EXTRA_BYTES + batch_size * input_height * input_width * input_channels); - std::generate(input.begin(), input.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); xnnpack::Buffer kernel(output_channels * kernel_height * kernel_width * input_channels); - std::generate(kernel.begin(), kernel.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(kernel.data(), kernel.size(), rng); xnnpack::Buffer bias(output_channels); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); const size_t output_elements = batch_size * output_height * output_width * output_channels; diff --git a/bench/f32-qc4w-gemm.cc b/bench/f32-qc4w-gemm.cc index 289081fac5a..70d76a784b1 100644 --- a/bench/f32-qc4w-gemm.cc +++ b/bench/f32-qc4w-gemm.cc @@ -46,15 +46,11 @@ static void GEMMBenchmark(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); auto f32rng = std::bind(std::uniform_real_distribution(), std::ref(rng)); - auto u8rng = std::bind( - std::uniform_int_distribution( - std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES / sizeof(float)); std::generate(a.begin(), a.end(), std::ref(f32rng)); xnnpack::Buffer k(nc * kc * sizeof(uint8_t) / 2 /* int4_t */); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer b(nc); std::generate(b.begin(), b.end(), std::ref(f32rng)); diff --git a/bench/gemm-benchmark.cc b/bench/gemm-benchmark.cc index f5e3b65184e..dee22586271 100644 --- a/bench/gemm-benchmark.cc +++ b/bench/gemm-benchmark.cc @@ -46,15 +46,11 @@ void GEMMBenchmark(benchmark::State& state, xnn_qs8_gemm_minmax_ukernel_fn gemm, auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto i8rng = std::bind(std::uniform_int_distribution( - -std::numeric_limits::max(), - std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES / sizeof(int8_t)); - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc); - std::generate(k.begin(), k.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer b(nc); std::generate(b.begin(), b.end(), std::ref(i32rng)); @@ -136,15 +132,11 @@ void GEMMBenchmark(benchmark::State& state, auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto i8rng = std::bind(std::uniform_int_distribution( - -std::numeric_limits::max(), - std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES / sizeof(int8_t)); - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc); - std::generate(k.begin(), k.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer b(nc); std::generate(b.begin(), b.end(), std::ref(i32rng)); @@ -222,15 +214,11 @@ void GEMMBenchmark(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto i8rng = std::bind(std::uniform_int_distribution( - -std::numeric_limits::max(), - std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES / sizeof(int8_t)); - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc); - std::generate(k.begin(), k.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer quantization_params( mc + XNN_EXTRA_QUANTIZATION_PARAMS); @@ -304,15 +292,11 @@ void GEMMBenchmark(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto i8rng = std::bind(std::uniform_int_distribution( - -std::numeric_limits::max(), - std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES / sizeof(int8_t)); - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc); - std::generate(k.begin(), k.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer quantization_params( mc + XNN_EXTRA_QUANTIZATION_PARAMS); @@ -384,13 +368,6 @@ void GEMMBenchmark(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto i8rng = std::bind(std::uniform_int_distribution( - -std::numeric_limits::max(), - std::numeric_limits::max()), - std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution( - 0, std::numeric_limits::max()), - std::ref(rng)); auto scalerng = std::bind(std::uniform_real_distribution(0.5f, 2.f), std::ref(rng)); @@ -404,11 +381,11 @@ void GEMMBenchmark(benchmark::State& state, const size_t packed_n = round_up_po2(nc, nr); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES); - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc / 2); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer kernel_scale2d(nc * k2 / bl); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); std::generate(kernel_scale2d.begin(), kernel_scale2d.end(), [&]() { return scalerng(); }); @@ -489,18 +466,11 @@ void GEMMBenchmark(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto i8rng = std::bind(std::uniform_int_distribution( - -std::numeric_limits::max(), - std::numeric_limits::max()), - std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution( - 0, std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES); - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc / 2); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer quantization_params( mc + XNN_EXTRA_QUANTIZATION_PARAMS); @@ -574,13 +544,6 @@ void GEMMBenchmark(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto i8rng = std::bind(std::uniform_int_distribution( - -std::numeric_limits::max(), - std::numeric_limits::max()), - std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution( - 0, std::numeric_limits::max()), - std::ref(rng)); auto scalerng = std::bind(std::uniform_real_distribution(0.5f, 2.f), std::ref(rng)); @@ -594,11 +557,11 @@ void GEMMBenchmark(benchmark::State& state, const size_t packed_n = round_up_po2(nc, nr); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES); - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc / 2); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer kernel_scale2d(nc * k2 / bl); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); std::generate(kernel_scale2d.begin(), kernel_scale2d.end(), [&]() { return scalerng(); }); @@ -678,18 +641,11 @@ void GEMMBenchmark(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto i8rng = std::bind(std::uniform_int_distribution( - -std::numeric_limits::max(), - std::numeric_limits::max()), - std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution( - 0, std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES); - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc / 2); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer quantization_params( mc + XNN_EXTRA_QUANTIZATION_PARAMS); @@ -764,14 +720,11 @@ void GEMMBenchmark(benchmark::State& state, auto rng = std::mt19937(random_device()); auto f32rng = std::bind(std::uniform_real_distribution(-10.0f, 10.0f), std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution( - 0, std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES); std::generate(a.begin(), a.end(), std::ref(f32rng)); xnnpack::Buffer k(nc * kc / 2); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); // Create a fake `gemm_config` for the packing functions. struct xnn_gemm_config gemm_config; @@ -880,9 +833,6 @@ void GEMMBenchmark(benchmark::State& state, auto rng = std::mt19937(random_device()); auto f32rng = std::bind(std::uniform_real_distribution(-10.0f, 10.0f), std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution( - 0, std::numeric_limits::max()), - std::ref(rng)); auto scalerng = std::bind(std::uniform_real_distribution(0.5f, 2.f), std::ref(rng)); @@ -891,7 +841,7 @@ void GEMMBenchmark(benchmark::State& state, xnnpack::Buffer a(mc * k2); std::generate(a.begin(), a.end(), std::ref(f32rng)); xnnpack::Buffer k(nc * k2 / 2); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); // Create a fake `gemm_config` for the packing functions. struct xnn_gemm_config gemm_config; @@ -1001,14 +951,11 @@ void GEMMBenchmark(benchmark::State& state, xnn_qu8_gemm_minmax_ukernel_fn gemm, auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution( - 0, std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES / sizeof(uint8_t)); - std::generate(a.begin(), a.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer b(nc); std::generate(b.begin(), b.end(), std::ref(i32rng)); diff --git a/bench/max-pooling.cc b/bench/max-pooling.cc index 56d28d53f8c..c281d5dc1a1 100644 --- a/bench/max-pooling.cc +++ b/bench/max-pooling.cc @@ -31,14 +31,13 @@ void max_pooling_u8(benchmark::State& state, const char* net) { std::random_device random_device; auto rng = std::mt19937(random_device()); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1; const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1; xnnpack::Buffer input( batch_size * input_height * input_width * channels + XNN_EXTRA_BYTES); - std::generate(input.begin(), input.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); xnnpack::Buffer output(batch_size * output_height * output_width * channels); xnn_status status = xnn_initialize(nullptr /* allocator */); diff --git a/bench/packw-benchmark.h b/bench/packw-benchmark.h index 0124cf10b2d..8204e51ccfc 100644 --- a/bench/packw-benchmark.h +++ b/bench/packw-benchmark.h @@ -34,9 +34,6 @@ static void x8_packw(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); // Computer num_buffers that fit cache with source weights + packed_weights. const size_t num_buffers = 1 + @@ -45,7 +42,7 @@ static void x8_packw(benchmark::State& state, xnnpack::Buffer weights(num_buffers * batch * dim_n * dim_k); - std::generate(weights.begin(), weights.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(weights.data(), weights.size(), rng); xnnpack::Buffer packed_weights( num_buffers * batch * (rounded_n * rounded_k + rounded_n * sizeof(uint32_t))); @@ -97,9 +94,6 @@ static void qs8_packw(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); // Computer num_buffers that fit cache with source weights + packed_weights. const size_t num_buffers = 1 + @@ -108,7 +102,7 @@ static void qs8_packw(benchmark::State& state, xnnpack::Buffer weights(num_buffers * batch * dim_n * dim_k); - std::generate(weights.begin(), weights.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(weights.data(), weights.size(), rng); xnnpack::Buffer packed_weights( num_buffers * batch * (rounded_n * rounded_k + rounded_n * sizeof(uint32_t))); @@ -160,7 +154,6 @@ static void x16_packw(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); - auto u16rng = std::bind(std::uniform_int_distribution(), std::ref(rng)); // Computer num_buffers that fit cache with source weights + packed_weights. const size_t num_buffers = 1 + @@ -169,7 +162,7 @@ static void x16_packw(benchmark::State& state, xnnpack::Buffer weights(num_buffers * batch * dim_n * dim_k); - std::generate(weights.begin(), weights.end(), std::ref(u16rng)); + xnnpack::fill_uniform_random_bits(weights.data(), weights.size(), rng); xnnpack::Buffer packed_weights( num_buffers * batch * (rounded_n * rounded_k + rounded_n)); diff --git a/bench/qs8-gemm.cc b/bench/qs8-gemm.cc index 298929c537d..1590c078f59 100644 --- a/bench/qs8-gemm.cc +++ b/bench/qs8-gemm.cc @@ -20,6 +20,7 @@ #include "ruy/ruy.h" #endif // BENCHMARK_RUY +#include "xnnpack/buffer.h" #include "xnnpack/isa-checks.h" #include "xnnpack/gemm.h" #include "xnnpack/microfnptr.h" @@ -36,16 +37,15 @@ static void RuyBenchmark(benchmark::State& state, size_t threads) std::random_device random_device; auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); const size_t num_buffers = 1 + benchmark::utils::DivideRoundUp(benchmark::utils::GetMaxCacheSize(), nc * (sizeof(int8_t) * (mc + kc) + sizeof(int32_t))); xnnpack::Buffer a(mc * kc); - std::generate(a.begin(), a.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(num_buffers * nc * kc); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer b(num_buffers * nc); std::generate(b.begin(), b.end(), std::ref(i32rng)); xnnpack::Buffer c(num_buffers * nc * mc); diff --git a/bench/qu8-gemm.cc b/bench/qu8-gemm.cc index 99e473329a0..89514b12aa0 100644 --- a/bench/qu8-gemm.cc +++ b/bench/qu8-gemm.cc @@ -55,12 +55,11 @@ static void GEMMBenchmark(benchmark::State& state, std::random_device random_device; auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); xnnpack::Buffer a(mc * kc + XNN_EXTRA_BYTES / sizeof(uint8_t)); - std::generate(a.begin(), a.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(nc * kc); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer b(nc); std::generate(b.begin(), b.end(), std::ref(i32rng)); @@ -156,10 +155,9 @@ static void GemmlowpBenchmark(benchmark::State& state, uint32_t threads) std::random_device random_device; auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); xnnpack::Buffer a(mc * kc); - std::generate(a.begin(), a.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); const size_t kElements = nc * kc; const size_t bElements = nc; @@ -169,7 +167,7 @@ static void GemmlowpBenchmark(benchmark::State& state, uint32_t threads) kElements * sizeof(uint8_t) + bElements * sizeof(int32_t) + c_elements * sizeof(uint8_t)); xnnpack::Buffer k(kElements * num_buffers); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer b(bElements * num_buffers); std::generate(b.begin(), b.end(), std::ref(i32rng)); xnnpack::Buffer c(c_elements * num_buffers); @@ -220,16 +218,15 @@ static void RuyBenchmark(benchmark::State& state, size_t threads) std::random_device random_device; auto rng = std::mt19937(random_device()); auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); const size_t num_buffers = 1 + benchmark::utils::DivideRoundUp(benchmark::utils::GetMaxCacheSize(), nc * (sizeof(uint8_t) * (mc + kc) + sizeof(int32_t))); xnnpack::Buffer a(mc * kc); - std::generate(a.begin(), a.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); xnnpack::Buffer k(num_buffers * nc * kc); - std::generate(k.begin(), k.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(k.data(), k.size(), rng); xnnpack::Buffer b(num_buffers * nc); std::generate(b.begin(), b.end(), std::ref(i32rng)); xnnpack::Buffer c(num_buffers * nc * mc); diff --git a/bench/softmax.cc b/bench/softmax.cc index 47d4cf50d6e..5672efe6ab4 100644 --- a/bench/softmax.cc +++ b/bench/softmax.cc @@ -37,11 +37,10 @@ static void xnnpack_softmax_qu8(benchmark::State& state) { std::random_device random_device; auto rng = std::mt19937(random_device()); - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); xnnpack::Buffer input(batch_size * channels); xnnpack::Buffer output(batch_size * channels); - std::generate(input.begin(), input.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); xnn_status status = xnn_initialize(nullptr /* allocator */); if (status != xnn_status_success) { diff --git a/bench/x8-lut.cc b/bench/x8-lut.cc index d1030732717..29d1263e648 100644 --- a/bench/x8-lut.cc +++ b/bench/x8-lut.cc @@ -34,10 +34,8 @@ static void x8_lut( std::random_device random_device; auto rng = std::mt19937(random_device()); - auto u8rng = std::bind( - std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); - std::generate(input.begin(), input.end(), std::ref(u8rng)); - std::generate(table.begin(), table.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); + xnnpack::fill_uniform_random_bits(table.data(), table.size(), rng); for (auto _ : state) { lut(num_elements * sizeof(uint8_t), input.data(), output.data(), table.data()); diff --git a/src/xnnpack/buffer.h b/src/xnnpack/buffer.h index 087cfdf13a5..d32deb65b79 100644 --- a/src/xnnpack/buffer.h +++ b/src/xnnpack/buffer.h @@ -111,6 +111,26 @@ class Buffer { } }; +// This is a faster way of generating random numbers, by generating as many +// random values as possible for each call to rng(). Assumes that rng() returns +// entirely random bits. +template +void fill_uniform_random_bits(T* data, size_t size, Rng& rng) { + using RngT = decltype(rng()); + RngT* data_rng_t = reinterpret_cast(data); + size_t size_bytes = size * sizeof(T); + size_t i = 0; + // Fill with as many RngT as we can. + for (; i + sizeof(RngT) <= size_bytes; i += sizeof(RngT)) { + *data_rng_t++ = rng(); + } + // Fill the remaining bytes. + char* data_char = reinterpret_cast(data_rng_t); + for (; i < size_bytes; ++i) { + *data_char++ = rng() & 0xff; + } +} + }; // namespace xnnpack #endif // __XNNPACK_TEST_BUFFER_H_ diff --git a/test/depth-to-space-operator-tester.h b/test/depth-to-space-operator-tester.h index 42fcb1ca809..ae2a6264d2f 100644 --- a/test/depth-to-space-operator-tester.h +++ b/test/depth-to-space-operator-tester.h @@ -122,16 +122,13 @@ class DepthToSpaceOperatorTester { void TestNHWCxX8() const { xnnpack::ReplicableRandomDevice rng; - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer input( (batch_size() * input_height() * input_width() - 1) * input_channels_stride() + input_channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); xnnpack::Buffer output( (batch_size() * output_height() * output_width() - 1) * output_channels_stride() + output_channels()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(input.begin(), input.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); // Create, setup, run, and destroy Depth To Space operator. ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); diff --git a/test/gemm-microkernel-tester.cc b/test/gemm-microkernel-tester.cc index e20a47ae85d..cb92bf8bc3b 100644 --- a/test/gemm-microkernel-tester.cc +++ b/test/gemm-microkernel-tester.cc @@ -401,8 +401,6 @@ void GemmMicrokernelTester::Test( xnnpack::ReplicableRandomDevice rng; auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind( - std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); xnnpack::Buffer a((m() - 1) * a_stride() + k() + XNN_EXTRA_BYTES / sizeof(uint8_t)); xnnpack::Buffer b(n() * k()); @@ -414,8 +412,8 @@ void GemmMicrokernelTester::Test( xnnpack::Buffer c_ref(m() * n()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(a.begin(), a.end(), std::ref(u8rng)); - std::generate(b.begin(), b.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); + xnnpack::fill_uniform_random_bits(b.data(), b.size(), rng); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); std::fill(packed_w.begin(), packed_w.end(), b_zero_point()); @@ -487,8 +485,6 @@ void GemmMicrokernelTester::Test( xnnpack::ReplicableRandomDevice rng; auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto u8rng = std::bind( - std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); xnnpack::Buffer a((mr() - 1) * a_stride() + k() + XNN_EXTRA_BYTES / sizeof(uint8_t)); xnnpack::Buffer b(n() * ks() * k()); @@ -504,8 +500,8 @@ void GemmMicrokernelTester::Test( for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(a.begin(), a.end(), std::ref(u8rng)); - std::generate(b.begin(), b.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); + xnnpack::fill_uniform_random_bits(b.data(), b.size(), rng); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); std::fill(packed_w.begin(), packed_w.end(), b_zero_point()); @@ -604,9 +600,6 @@ void GemmMicrokernelTester::Test( xnnpack::ReplicableRandomDevice rng; auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); auto w8rng = std::bind( std::uniform_int_distribution(-std::numeric_limits::max(), std::numeric_limits::max()), std::ref(rng)); @@ -623,7 +616,7 @@ void GemmMicrokernelTester::Test( xnnpack::Buffer c_ref(m() * n()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); std::generate(b.begin(), b.end(), std::ref(w8rng)); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); @@ -711,9 +704,6 @@ void GemmMicrokernelTester::Test( xnnpack::ReplicableRandomDevice rng; auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); auto w8rng = std::bind( std::uniform_int_distribution(-std::numeric_limits::max(), std::numeric_limits::max()), std::ref(rng)); @@ -733,7 +723,7 @@ void GemmMicrokernelTester::Test( for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); std::generate(b.begin(), b.end(), std::ref(w8rng)); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); @@ -2015,9 +2005,6 @@ void GemmMicrokernelTester::Test( xnnpack::ReplicableRandomDevice rng; auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); auto w8rng = std::bind( std::uniform_int_distribution(-std::numeric_limits::max(), std::numeric_limits::max()), std::ref(rng)); @@ -2032,7 +2019,7 @@ void GemmMicrokernelTester::Test( xnnpack::Buffer c_ref(m() * n()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); std::generate(b.begin(), b.end(), std::ref(w8rng)); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); @@ -2106,9 +2093,6 @@ void GemmMicrokernelTester::Test( xnnpack::ReplicableRandomDevice rng; auto i32rng = std::bind(std::uniform_int_distribution(-10000, 10000), std::ref(rng)); - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); auto w8rng = std::bind( std::uniform_int_distribution(-std::numeric_limits::max(), std::numeric_limits::max()), std::ref(rng)); @@ -2127,7 +2111,7 @@ void GemmMicrokernelTester::Test( {//for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(a.begin(), a.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(a.data(), a.size(), rng); std::generate(b.begin(), b.end(), std::ref(w8rng)); std::generate(bias.begin(), bias.end(), std::ref(i32rng)); diff --git a/test/lut-microkernel-tester.h b/test/lut-microkernel-tester.h index b93822f4f02..80d3f54cc14 100644 --- a/test/lut-microkernel-tester.h +++ b/test/lut-microkernel-tester.h @@ -58,18 +58,16 @@ class LUTMicrokernelTester { void Test(xnn_x8_lut_ukernel_fn lut) const { xnnpack::ReplicableRandomDevice rng; - auto u8rng = std::bind( - std::uniform_int_distribution(0, std::numeric_limits::max()), std::ref(rng)); xnnpack::Buffer x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); XNN_ALIGN(64) std::array t; xnnpack::Buffer y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0)); xnnpack::Buffer y_ref(batch_size()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(x.begin(), x.end(), std::ref(u8rng)); - std::generate(t.begin(), t.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(x.data(), x.size(), rng); + xnnpack::fill_uniform_random_bits(t.data(), t.size(), rng); if (inplace()) { - std::generate(y.begin(), y.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(y.data(), y.size(), rng); } const uint8_t* x_data = x.data(); if (inplace()) { diff --git a/test/lut-norm-microkernel-tester.h b/test/lut-norm-microkernel-tester.h index 08647886f5e..a2ad539a1d5 100644 --- a/test/lut-norm-microkernel-tester.h +++ b/test/lut-norm-microkernel-tester.h @@ -55,10 +55,6 @@ class LUTNormMicrokernelTester { void Test(xnn_u8_lut32norm_ukernel_fn lutnorm) const { xnnpack::ReplicableRandomDevice rng; - auto u8rng = [&rng]() { - return std::uniform_int_distribution( - 0, std::numeric_limits::max())(rng); - }; auto u32rng = [&]() { return std::uniform_int_distribution( 1, std::numeric_limits::max() / (257 * n()))(rng); @@ -69,10 +65,10 @@ class LUTNormMicrokernelTester { xnnpack::Buffer y(n()); xnnpack::Buffer y_ref(n()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(x.begin(), x.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(x.data(), x.size(), rng); std::generate(t.begin(), t.end(), std::ref(u32rng)); if (inplace()) { - std::generate(y.begin(), y.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(y.data(), y.size(), rng); } const uint8_t* x_data = inplace() ? y.data() : x.data(); diff --git a/test/tanh-operator-tester.h b/test/tanh-operator-tester.h index 5d658989c21..017e4602cd0 100644 --- a/test/tanh-operator-tester.h +++ b/test/tanh-operator-tester.h @@ -265,15 +265,12 @@ class TanhOperatorTester { void TestQS8() const { xnnpack::ReplicableRandomDevice rng; - auto i8rng = std::bind( - std::uniform_int_distribution(std::numeric_limits::min(), std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer input((batch_size() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); xnnpack::Buffer output((batch_size() - 1) * output_stride() + channels()); xnnpack::Buffer output_ref(batch_size() * channels()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(input.begin(), input.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); // Compute reference results. for (size_t i = 0; i < batch_size(); i++) { @@ -320,13 +317,12 @@ class TanhOperatorTester { void TestQU8() const { xnnpack::ReplicableRandomDevice rng; - auto u8rng = std::bind(std::uniform_int_distribution(0, std::numeric_limits::max()), rng); xnnpack::Buffer input((batch_size() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); xnnpack::Buffer output((batch_size() - 1) * output_stride() + channels()); xnnpack::Buffer output_ref(batch_size() * channels()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(input.begin(), input.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); // Compute reference results. for (size_t i = 0; i < batch_size(); i++) { diff --git a/test/vunary-microkernel-tester.cc b/test/vunary-microkernel-tester.cc index 59f1ac8b928..130053b82ee 100644 --- a/test/vunary-microkernel-tester.cc +++ b/test/vunary-microkernel-tester.cc @@ -355,17 +355,13 @@ void VUnaryMicrokernelTester::Test(xnn_s8_vclamp_ukernel_fn vclamp, xnn_init_s8_minmax_params_fn init_params, Default) const { xnnpack::ReplicableRandomDevice rng; - auto i8rng = std::bind(std::uniform_int_distribution( - std::numeric_limits::min(), - std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer x(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t)); xnnpack::Buffer y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0)); xnnpack::Buffer y_ref(batch_size()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(x.begin(), x.end(), std::ref(i8rng)); + xnnpack::fill_uniform_random_bits(x.data(), x.size(), rng); if (inplace()) { std::copy(x.cbegin(), x.cend(), y.begin()); } @@ -399,16 +395,13 @@ void VUnaryMicrokernelTester::Test(xnn_u8_vclamp_ukernel_fn vclamp, xnn_init_u8_minmax_params_fn init_params, Default) const { xnnpack::ReplicableRandomDevice rng; - auto u8rng = std::bind(std::uniform_int_distribution( - 0, std::numeric_limits::max()), - std::ref(rng)); xnnpack::Buffer x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); xnnpack::Buffer y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0)); xnnpack::Buffer y_ref(batch_size()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(x.begin(), x.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(x.data(), x.size(), rng); if (inplace()) { std::copy(x.cbegin(), x.cend(), y.begin()); } diff --git a/test/xx-fill.cc b/test/xx-fill.cc index c67a8ea6e67..c22c5c1fcb1 100644 --- a/test/xx-fill.cc +++ b/test/xx-fill.cc @@ -76,7 +76,7 @@ class FillMicrokernelTester { xnnpack::Buffer output((rows() - 1) * output_stride() + channels()); xnnpack::Buffer output_copy(output.size()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(output.begin(), output.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(output.data(), output.size(), rng); std::copy(output.cbegin(), output.cend(), output_copy.begin()); std::array fill_pattern; std::generate(fill_pattern.begin(), fill_pattern.end(), std::ref(u8rng)); diff --git a/test/xx-pad.cc b/test/xx-pad.cc index 5bae8e4c2c3..776bba6419c 100644 --- a/test/xx-pad.cc +++ b/test/xx-pad.cc @@ -114,8 +114,8 @@ class PadMicrokernelTester { (pre_padding() + input_channels() + post_padding()) + (rows() - 1) * output_stride()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(input.begin(), input.end(), std::ref(u8rng)); - std::generate(output.begin(), output.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); + xnnpack::fill_uniform_random_bits(output.data(), output.size(), rng); std::array fill_pattern; std::generate(fill_pattern.begin(), fill_pattern.end(), std::ref(u8rng)); uint32_t fill_value = 0; diff --git a/test/zip-microkernel-tester.h b/test/zip-microkernel-tester.h index 3ccd308e5ce..ac0620b8266 100644 --- a/test/zip-microkernel-tester.h +++ b/test/zip-microkernel-tester.h @@ -56,16 +56,12 @@ class ZipMicrokernelTester { void Test(xnn_x8_zipc_ukernel_fn zip) const { xnnpack::ReplicableRandomDevice rng; - auto u8rng = [&rng]() { - return std::uniform_int_distribution( - 0, std::numeric_limits::max())(rng); - }; xnnpack::Buffer x(n() * g()); xnnpack::Buffer x_ref(g() * n()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(x.begin(), x.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(x.data(), x.size(), rng); // Call optimized micro-kernel. zip(n() * sizeof(uint8_t), x.data(), x_ref.data()); @@ -82,16 +78,12 @@ class ZipMicrokernelTester { void Test(xnn_x8_zipv_ukernel_fn zip) const { xnnpack::ReplicableRandomDevice rng; - auto u8rng = [&rng]() { - return std::uniform_int_distribution( - 0, std::numeric_limits::max())(rng); - }; xnnpack::Buffer x(n() * g()); xnnpack::Buffer x_ref(g() * n()); for (size_t iteration = 0; iteration < iterations(); iteration++) { - std::generate(x.begin(), x.end(), std::ref(u8rng)); + xnnpack::fill_uniform_random_bits(x.data(), x.size(), rng); // Call optimized micro-kernel. zip(n() * sizeof(uint8_t), g(), x.data(), x_ref.data());