diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt index 60d6fe6ce6..02d231b9fb 100644 --- a/driver/CMakeLists.txt +++ b/driver/CMakeLists.txt @@ -31,6 +31,7 @@ add_executable(MIOpenDriver conv_common.cpp dm_activ.cpp dm_adam.cpp + dm_adaptiveavgpool.cpp dm_addlayernorm.cpp dm_bnorm.cpp dm_cat.cpp diff --git a/driver/adaptiveavgpool_driver.hpp b/driver/adaptiveavgpool_driver.hpp new file mode 100644 index 0000000000..062c56fdce --- /dev/null +++ b/driver/adaptiveavgpool_driver.hpp @@ -0,0 +1,486 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include "InputFlags.hpp" +#include "driver.hpp" +#include "mloAdaptiveAvgPoolHost.hpp" +#include "random.hpp" +#include "tensor_driver.hpp" +#include "timer.hpp" + +#include <../test/tensor_holder.hpp> +#include <../test/verify.hpp> + +#include <miopen/env.hpp> +#include <miopen/handle.hpp> +#include <miopen/miopen.h> +#include <miopen/tensor.hpp> +#include <vector> + +template <typename Tgpu, typename Tref> +class AdaptiveAvgPoolDriver : public Driver +{ +public: + AdaptiveAvgPoolDriver() : Driver() + { + miopenCreateTensorDescriptor(&inputDesc); + miopenCreateTensorDescriptor(&outputDesc); + miopenCreateTensorDescriptor(&inputGradDesc); + miopenCreateTensorDescriptor(&outputGradDesc); + + data_type = miopen_type<Tgpu>{}; + } + + std::vector<int> ComputeStrides(std::vector<int> input); + int AddCmdLineArgs() override; + int ParseCmdLineArgs(int argc, char* argv[]) override; + InputFlags& GetInputFlags() override { return inflags; } + + int GetandSetData() override; + + int AllocateBuffersAndCopy() override; + + int RunForwardGPU() override; + int RunForwardCPU(); + + int RunBackwardGPU() override; + int RunBackwardCPU(); + + Tref GetTolerance(); + int VerifyBackward() override; + int VerifyForward() override; + ~AdaptiveAvgPoolDriver() override + { + miopenDestroyTensorDescriptor(inputDesc); + miopenDestroyTensorDescriptor(outputDesc); + miopenDestroyTensorDescriptor(inputGradDesc); + miopenDestroyTensorDescriptor(outputGradDesc); + } + +private: + InputFlags inflags; + + int forw; + + miopenTensorDescriptor_t inputDesc; + miopenTensorDescriptor_t outputDesc; + miopenTensorDescriptor_t inputGradDesc; + miopenTensorDescriptor_t outputGradDesc; + + std::unique_ptr<GPUMem> input_dev; + std::unique_ptr<GPUMem> output_dev; + std::unique_ptr<GPUMem> input_grad_dev; + std::unique_ptr<GPUMem> output_grad_dev; + + std::vector<Tgpu> input; + std::vector<Tgpu> output; + std::vector<Tref> output_host; + std::vector<Tgpu> input_grad; + std::vector<Tref> input_grad_host; + std::vector<Tgpu> output_grad; + + size_t N = 1, C = 1, D = 1, H = 1, W = 1, OD = 1, OH = 1, OW = 1; + + std::vector<int> in_dim; + std::vector<int> out_dim; + bool isContiguous; +}; + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::ParseCmdLineArgs(int argc, char* argv[]) +{ + inflags.Parse(argc, argv); + isContiguous = inflags.GetValueInt("is-contiguous") == 1 ? true : false; + + if(inflags.GetValueInt("time") == 1) + { + miopenEnableProfiling(GetHandle(), true); + } + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::GetandSetData() +{ + in_dim = inflags.GetValueTensor("input_dims").lengths; + std::vector<int> in_stride = ComputeStrides(in_dim); + out_dim = inflags.GetValueTensor("output_dims").lengths; + if(in_dim.size() != out_dim.size() + 2) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input and output tensor sizes do not match."); + } + N = in_dim[0]; + C = in_dim[1]; + std::vector<int> out_dim_final = {N, C}; + if(in_dim.size() == 3) + { + H = in_dim[2]; + + OH = out_dim[0]; + out_dim_final.push_back(OH); + } + else if(in_dim.size() == 4) + { + H = in_dim[2]; + W = in_dim[3]; + + OH = out_dim[0]; + OW = out_dim[1]; + out_dim_final.push_back(OH); + out_dim_final.push_back(OW); + } + else if(in_dim.size() == 5) + { + D = in_dim[2]; + H = in_dim[3]; + W = in_dim[4]; + + OD = out_dim[0]; + OH = out_dim[1]; + OW = out_dim[2]; + out_dim_final.push_back(OD); + out_dim_final.push_back(OH); + out_dim_final.push_back(OW); + } + std::vector<int> out_grad_stride = ComputeStrides(out_dim_final); + if(SetTensorNd(inputDesc, in_dim, in_stride, data_type) != miopenStatusSuccess) + MIOPEN_THROW("Error parsing input tensor: " + inflags.GetValueStr("input_dims") + "."); + if(SetTensorNd(outputDesc, out_dim_final, data_type) != miopenStatusSuccess) + MIOPEN_THROW("Error parsing output tensor: " + inflags.GetValueStr("output_dims") + "."); + if(SetTensorNd(outputGradDesc, out_dim_final, out_grad_stride, data_type) != + miopenStatusSuccess) + MIOPEN_THROW("Error parsing output grad tensor: " + inflags.GetValueStr("output_dims") + + "."); + if(SetTensorNd(inputGradDesc, in_dim, data_type) != miopenStatusSuccess) + MIOPEN_THROW("Error parsing input grad tensor: " + inflags.GetValueStr("input_dims") + "."); + + return miopenStatusSuccess; +} + +// Equivalent to: tensor.tranpose(0, -1).contiguous().tranpose(0, -1) incase contiguous = False +template <typename Tgpu, typename Tref> +std::vector<int> AdaptiveAvgPoolDriver<Tgpu, Tref>::ComputeStrides(std::vector<int> inputDim) +{ + if(!isContiguous) + std::swap(inputDim.front(), inputDim.back()); + std::vector<int> strides(inputDim.size()); + strides.back() = 1; + for(int i = inputDim.size() - 2; i >= 0; --i) + strides[i] = strides[i + 1] * inputDim[i + 1]; + if(!isContiguous) + std::swap(strides.front(), strides.back()); + return strides; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::AddCmdLineArgs() +{ + inflags.AddInputFlag("forw", 'F', "1", "Run only Forward AdaptiveAvgPool (Default=1)", "int"); + inflags.AddTensorFlag( + "input_dims", + 'D', + "2x3x7x9x9", + "The dimensional lengths of the input tensor: N,C,D,H,W... Example: 2x3x7x9x9."); + inflags.AddTensorFlag( + "output_dims", + 'S', + "5x5x5", + "The dimensional lengths of the output tensor: OD,OH,OW,... Example: 5x5x5."); + inflags.AddInputFlag("is-contiguous", 'c', "1", "is-contiguous (Default=1)", "int"); + inflags.AddInputFlag("iter", 'i', "10", "Number of Iterations (Default=10)", "int"); + inflags.AddInputFlag("verify", 'V', "1", "Verify (Default=1)", "int"); + inflags.AddInputFlag("time", 't', "1", "Time (Default=1)", "int"); + inflags.AddInputFlag( + "wall", 'w', "0", "Wall-clock Time, Requires time == 1 (Default=0)", "int"); + + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::AllocateBuffersAndCopy() +{ + size_t input_sz = GetTensorSize(inputDesc); + size_t output_sz = GetTensorSize(outputDesc); + + uint32_t ctx = 0; + + input_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, input_sz, sizeof(Tgpu))); + output_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, output_sz, sizeof(Tgpu))); + input_grad_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, input_sz, sizeof(Tgpu))); + output_grad_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, output_sz, sizeof(Tgpu))); + + input = std::vector<Tgpu>(input_sz, static_cast<Tgpu>(0)); + output = std::vector<Tgpu>(output_sz, static_cast<Tgpu>(0)); + output_host = std::vector<Tref>(output_sz, static_cast<Tref>(0)); + + input_grad = std::vector<Tgpu>(input_sz, static_cast<Tgpu>(0)); + input_grad_host = std::vector<Tref>(input_sz, static_cast<Tref>(0)); + output_grad = std::vector<Tgpu>(output_sz, static_cast<Tgpu>(0)); + + int status; + + for(int i = 0; i < input_sz; i++) + { + input[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(-10.0f), static_cast<Tgpu>(10.0f)); + } + status = input_dev->ToGPU(q, input.data()); + + status |= output_dev->ToGPU(q, output.data()); + + status |= input_grad_dev->ToGPU(q, input_grad.data()); + + for(int i = 0; i < output_sz; i++) + { + output_grad[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(-1.0), static_cast<Tgpu>(1.0)); + } + status |= output_grad_dev->ToGPU(q, output_grad.data()); + + if(status != 0) + { + std::cout << "Error copying data to GPU\n" << std::endl; + return miopenStatusAllocFailed; + } + + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::RunForwardGPU() +{ + float kernel_total_time = 0.0; + float kernel_first_time = 0.0; + + Timer t; + START_TIME + + for(int i = 0; i < inflags.GetValueInt("iter"); i++) + { + auto status = miopenAdaptiveAvgPoolForward( + GetHandle(), inputDesc, input_dev->GetMem(), outputDesc, output_dev->GetMem()); + MIOPEN_THROW_IF(status != miopenStatusSuccess, "Error in miopenAdaptiveAvgPoolForward"); + + float time = 0.0; + miopenGetKernelTime(GetHandle(), &time); + kernel_total_time += time; + if(i == 0) + kernel_first_time = time; + } + + if(inflags.GetValueInt("time") == 1) + { + STOP_TIME + int iter = inflags.GetValueInt("iter"); + if(WALL_CLOCK) + std::cout << "Wall-clock Time Forward AdaptiveAvgPool Elapsed: " + << t.gettime_ms() / iter << " ms" << std::endl; + + float kernel_average_time = + iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time; + std::cout << "GPU Kernel Time Forward AdaptiveAvgPool Elapsed: " << kernel_average_time + << " ms" << std::endl; + } + + if(output_dev->FromGPU(GetStream(), output.data()) != 0) + { + std::cerr << "Error copying (output_dev) from GPU, size: " << output_dev->GetSize() + << std::endl; + return miopenStatusInternalError; + } + + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::RunForwardCPU() +{ + int status = miopenStatusSuccess; + + if(in_dim.size() == 3) + { + status = mloAdaptiveAvgPoolForward1dRunHost<Tgpu, Tref>( + inputDesc, outputDesc, input.data(), output_host.data(), C, H, OH); + MIOPEN_THROW_IF(status != miopenStatusSuccess, + "Error in mloAdaptiveAvgPoolForward1dRunHost"); + } + else if(in_dim.size() == 4) + { + status = mloAdaptiveAvgPoolForward2dRunHost<Tgpu, Tref>( + inputDesc, outputDesc, input.data(), output_host.data(), C, H, W, OH, OW); + MIOPEN_THROW_IF(status != miopenStatusSuccess, + "Error in mloAdaptiveAvgPoolForward2dRunHost"); + } + else if(in_dim.size() == 5) + { + status = mloAdaptiveAvgPoolForward3dRunHost<Tgpu, Tref>( + inputDesc, outputDesc, input.data(), output_host.data(), C, D, H, W, OD, OH, OW); + MIOPEN_THROW_IF(status != miopenStatusSuccess, + "Error in mloAdaptiveAvgPoolForward3dRunHost"); + } + return status; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::RunBackwardGPU() +{ + float kernel_total_time = 0.0; + float kernel_first_time = 0.0; + + Timer t; + START_TIME + + for(int i = 0; i < inflags.GetValueInt("iter"); i++) + { + auto status = miopenAdaptiveAvgPoolBackward(GetHandle(), + outputGradDesc, + output_grad_dev->GetMem(), + inputGradDesc, + input_grad_dev->GetMem()); + MIOPEN_THROW_IF(status != miopenStatusSuccess, "Error in miopenAdaptiveAvgPoolBackward"); + + float time = 0.0; + miopenGetKernelTime(GetHandle(), &time); + kernel_total_time += time; + if(i == 0) + kernel_first_time = time; + } + + if(inflags.GetValueInt("time") == 1) + { + STOP_TIME + int iter = inflags.GetValueInt("iter"); + if(WALL_CLOCK) + std::cout << "Wall-clock Time Backward AdaptiveAvgPool Elapsed: " + << t.gettime_ms() / iter << " ms" << std::endl; + + float kernel_average_time = + iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time; + std::cout << "GPU Kernel Time Backward AdaptiveAvgPool Elapsed: " << kernel_average_time + << " ms" << std::endl; + } + + if(input_grad_dev->FromGPU(GetStream(), input_grad.data()) != 0) + { + std::cerr << "Error copying (input_grad_dev) from GPU, size: " << input_grad_dev->GetSize() + << std::endl; + return miopenStatusInternalError; + } + + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::RunBackwardCPU() +{ + int status = miopenStatusSuccess; + + if(in_dim.size() == 3) + { + status = mloAdaptiveAvgPoolBackward1dRunHost<Tgpu, Tref>( + outputGradDesc, inputGradDesc, output_grad.data(), input_grad_host.data(), C, H, OH); + MIOPEN_THROW_IF(status != miopenStatusSuccess, + "Error in mloAdaptiveAvgPoolBackward1dRunHost"); + } + else if(in_dim.size() == 4) + { + status = mloAdaptiveAvgPoolBackward2dRunHost<Tgpu, Tref>(outputGradDesc, + inputGradDesc, + output_grad.data(), + input_grad_host.data(), + C, + H, + W, + OH, + OW); + MIOPEN_THROW_IF(status != miopenStatusSuccess, + "Error in mloAdaptiveAvgPoolBackward2dRunHost"); + } + else if(in_dim.size() == 5) + { + status = mloAdaptiveAvgPoolBackward3dRunHost<Tgpu, Tref>(outputGradDesc, + inputGradDesc, + output_grad.data(), + input_grad_host.data(), + C, + D, + H, + W, + OD, + OH, + OW); + MIOPEN_THROW_IF(status != miopenStatusSuccess, + "Error in mloAdaptiveAvgPoolBackward3dRunHost"); + } + return status; +} + +template <typename Tgpu, typename Tref> +Tref AdaptiveAvgPoolDriver<Tgpu, Tref>::GetTolerance() +{ + Tref tolerance = std::numeric_limits<Tgpu>::epsilon() * 10; + return tolerance; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::VerifyForward() +{ + RunForwardCPU(); + const Tref tolerance = GetTolerance(); + auto error = miopen::rms_range(output_host, output); + + if(!std::isfinite(error) || error > tolerance) + { + std::cout << "Forward AdaptiveAvgPool FAILED: " << error << std::endl; + return EC_VerifyFwd; + } + else + { + std::cout << "Forward AdaptiveAvgPool Verifies on CPU and GPU (err=" << error << ")" + << std::endl; + } + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tref> +int AdaptiveAvgPoolDriver<Tgpu, Tref>::VerifyBackward() +{ + RunBackwardCPU(); + const Tref tolerance = GetTolerance(); + auto error = miopen::rms_range(input_grad_host, input_grad); + + if(!std::isfinite(error) || error > tolerance) + { + std::cout << "Backward AdaptiveAvgPool FAILED: " << error << std::endl; + return EC_VerifyBwd; + } + else + { + std::cout << "Backward AdaptiveAvgPool Verifies on CPU and GPU (err=" << error << ")" + << std::endl; + } + return miopenStatusSuccess; +} diff --git a/driver/dm_adaptiveavgpool.cpp b/driver/dm_adaptiveavgpool.cpp new file mode 100644 index 0000000000..b6e53ba17e --- /dev/null +++ b/driver/dm_adaptiveavgpool.cpp @@ -0,0 +1,40 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "registry_driver_maker.hpp" +#include "adaptiveavgpool_driver.hpp" + +static Driver* makeDriver(const std::string& base_arg) +{ + if(base_arg == "adaptiveavgpool") + return new AdaptiveAvgPoolDriver<float, float>(); + if(base_arg == "adaptiveavgpoolfp16") + return new AdaptiveAvgPoolDriver<float16, float>(); + if(base_arg == "adaptiveavgpoolbfp16") + return new AdaptiveAvgPoolDriver<bfloat16, float>(); + return nullptr; +} + +REGISTER_DRIVER_MAKER(makeDriver); diff --git a/driver/driver.hpp b/driver/driver.hpp index f3ae8fa039..6b21c3fe6e 100644 --- a/driver/driver.hpp +++ b/driver/driver.hpp @@ -314,7 +314,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz) "adamw[fp16], ampadamw, transformersadamw[fp16], transformersampadamw, " "getitem[bfp16|fp16], reducecalculation[bfp16|fp16], rope[bfp16|fp16], " "prelu[bfp16|fp16], kthvalue[bfp16|fp16], glu[bfp16|fp16], softmarginloss[bfp16|fp16], " - "multimarginloss[bfp16|fp16]\n"); + "multimarginloss[bfp16|fp16], adaptiveavgpool[bfp16|fp16]\n"); exit(0); // NOLINT (concurrency-mt-unsafe) } @@ -352,6 +352,7 @@ inline std::string ParseBaseArg(int argc, char* argv[]) arg != "kthvaluebfp16" && arg != "glu" && arg != "glufp16" && arg != "glubfp16" && arg != "softmarginloss" && arg != "softmarginlossfp16" && arg != "softmarginlossbfp16" && arg != "multimarginloss" && arg != "multimarginlossfp16" && arg != "multimarginlossbfp16" && + arg != "adaptiveavgpool" && arg != "adaptiveavgpoolfp16" && arg != "adaptiveavgpoolbfp16" && arg != "--version") { printf("FAILED: Invalid Base Input Argument\n"); diff --git a/driver/mloAdaptiveAvgPoolHost.hpp b/driver/mloAdaptiveAvgPoolHost.hpp new file mode 100644 index 0000000000..73848ca38f --- /dev/null +++ b/driver/mloAdaptiveAvgPoolHost.hpp @@ -0,0 +1,292 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include <cmath> +#include <miopen/tensor.hpp> +#include <miopen/tensor_view_utils.hpp> +#include <../test/ford.hpp> + +template <typename Tgpu, typename Tcheck> +int32_t mloAdaptiveAvgPoolForward1dRunHost(const miopenTensorDescriptor_t inputDesc, + const miopenTensorDescriptor_t outputDesc, + const Tgpu* input, + Tcheck* output, + const size_t C, + const size_t H, + const size_t OH) +{ + auto numel = miopen::deref(outputDesc).GetElementSize(); + + auto input_tv = miopen::get_inner_expanded_tv<3>(miopen::deref(inputDesc)); + auto output_tv = miopen::get_inner_expanded_tv<3>(miopen::deref(outputDesc)); + + par_ford(numel)([&](size_t gid) { + size_t nc = gid / OH, oh = gid % OH; + size_t n = nc / C, c = nc % C; + + size_t h = oh * H / OH; + size_t kh = (((oh + 1) * H + OH - 1) / OH) - h; + + float sum = 0; + for(size_t ih = h; ih < (h + kh); ++ih) + { + sum += static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, ih})]); + } + + output[output_tv.get_tensor_view_idx({n, c, oh})] = static_cast<Tcheck>(sum / kh); + }); + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tcheck> +int32_t mloAdaptiveAvgPoolForward2dRunHost(const miopenTensorDescriptor_t inputDesc, + const miopenTensorDescriptor_t outputDesc, + const Tgpu* input, + Tcheck* output, + const size_t C, + const size_t H, + const size_t W, + const size_t OH, + const size_t OW) +{ + auto numel = miopen::deref(outputDesc).GetElementSize(); + + auto input_tv = miopen::get_inner_expanded_tv<4>(miopen::deref(inputDesc)); + auto output_tv = miopen::get_inner_expanded_tv<4>(miopen::deref(outputDesc)); + + par_ford(numel)([&](size_t gid) { + size_t ncoh = gid / OW, ow = gid % OW; + size_t nc = ncoh / OH, oh = ncoh % OH; + size_t n = nc / C, c = nc % C; + + size_t h = (oh * H) / OH; + size_t kh = (((oh + 1) * H + OH - 1) / OH) - h; + + size_t w = (ow * W) / OW; + size_t kw = (((ow + 1) * W + OW - 1) / OW) - w; + + float divider = static_cast<float>(kh * kw); + float sum = 0; + for(size_t ih = h; ih < (h + kh); ++ih) + { + for(size_t iw = w; iw < (w + kw); ++iw) + { + sum += static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, ih, iw})]); + } + } + + output[output_tv.get_tensor_view_idx({n, c, oh, ow})] = static_cast<Tcheck>(sum / divider); + }); + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tcheck> +int32_t mloAdaptiveAvgPoolForward3dRunHost(const miopenTensorDescriptor_t inputDesc, + const miopenTensorDescriptor_t outputDesc, + const Tgpu* input, + Tcheck* output, + const size_t C, + const size_t D, + const size_t H, + const size_t W, + const size_t OD, + const size_t OH, + const size_t OW) +{ + auto numel = miopen::deref(outputDesc).GetElementSize(); + + auto input_tv = miopen::get_inner_expanded_tv<5>(miopen::deref(inputDesc)); + auto output_tv = miopen::get_inner_expanded_tv<5>(miopen::deref(outputDesc)); + + par_ford(numel)([&](size_t gid) { + size_t ncodoh = gid / OW, ow = gid % OW; + size_t ncod = ncodoh / OH, oh = ncodoh % OH; + size_t nc = ncod / OD, od = ncod % OD; + size_t n = nc / C, c = nc % C; + + size_t d = (od * D) / OD; + size_t kd = ((od + 1) * D + OD - 1) / OD - d; + + size_t h = (oh * H) / OH; + size_t kh = ((oh + 1) * H + OH - 1) / OH - h; + + size_t w = (ow * W) / OW; + size_t kw = ((ow + 1) * W + OW - 1) / OW - w; + + float sum = 0; + for(size_t id = d; id < (d + kd); ++id) + { + for(size_t ih = h; ih < (h + kh); ++ih) + { + for(size_t iw = w; iw < (w + kw); ++iw) + { + sum += + static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, id, ih, iw})]); + } + } + } + + output[output_tv.get_tensor_view_idx({n, c, od, oh, ow})] = + static_cast<Tcheck>(sum / (kd * kh * kw)); + }); + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tcheck> +int32_t mloAdaptiveAvgPoolBackward1dRunHost(const miopenTensorDescriptor_t outputGradDesc, + const miopenTensorDescriptor_t inputGradDesc, + const Tgpu* output_grad, + Tcheck* input_grad, + const size_t C, + const size_t H, + const size_t OH) +{ + auto numel = miopen::deref(inputGradDesc).GetElementSize(); + + auto output_grad_tv = miopen::get_inner_expanded_tv<3>(miopen::deref(outputGradDesc)); + auto input_grad_tv = miopen::get_inner_expanded_tv<3>(miopen::deref(inputGradDesc)); + + par_ford(numel)([&](size_t gid) { + size_t nc = gid / H, h = gid % H; + size_t n = nc / C, c = nc % C; + + size_t oh = (h * OH) / H; + size_t koh = (((h + 1) * OH + H - 1) / H) - oh; + + float grad = 0; + for(size_t ih = oh; ih < (oh + koh); ++ih) + { + size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + grad += + static_cast<float>(output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih})]) / + kh; + } + input_grad[input_grad_tv.get_tensor_view_idx({n, c, h})] = static_cast<Tcheck>(grad); + }); + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tcheck> +int32_t mloAdaptiveAvgPoolBackward2dRunHost(const miopenTensorDescriptor_t outputGradDesc, + const miopenTensorDescriptor_t inputGradDesc, + const Tgpu* output_grad, + Tcheck* input_grad, + const size_t C, + const size_t H, + const size_t W, + const size_t OH, + const size_t OW) +{ + auto numel = miopen::deref(inputGradDesc).GetElementSize(); + + auto output_grad_tv = miopen::get_inner_expanded_tv<4>(miopen::deref(outputGradDesc)); + auto input_grad_tv = miopen::get_inner_expanded_tv<4>(miopen::deref(inputGradDesc)); + + par_ford(numel)([&](size_t gid) { + size_t nch = gid / W, w = gid % W; + size_t nc = nch / H, h = nch % H; + size_t n = nc / C, c = nc % C; + + size_t oh = (h * OH) / H; + size_t koh = ((h + 1) * OH + H - 1) / H - oh; + + size_t ow = (w * OW) / W; + size_t kow = ((w + 1) * OW + W - 1) / W - ow; + + float grad = 0; + for(size_t ih = oh; ih < (oh + koh); ++ih) + { + size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + for(size_t iw = ow; iw < (ow + kow); ++iw) + { + size_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW; + grad += static_cast<float>( + output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih, iw})]) / + (kh * kw); + } + } + + input_grad[input_grad_tv.get_tensor_view_idx({n, c, h, w})] = static_cast<Tcheck>(grad); + }); + return miopenStatusSuccess; +} + +template <typename Tgpu, typename Tcheck> +int32_t mloAdaptiveAvgPoolBackward3dRunHost(const miopenTensorDescriptor_t outputGradDesc, + const miopenTensorDescriptor_t inputGradDesc, + const Tgpu* output_grad, + Tcheck* input_grad, + const size_t C, + const size_t D, + const size_t H, + const size_t W, + const size_t OD, + const size_t OH, + const size_t OW) +{ + auto numel = miopen::deref(inputGradDesc).GetElementSize(); + + auto output_grad_tv = miopen::get_inner_expanded_tv<5>(miopen::deref(outputGradDesc)); + auto input_grad_tv = miopen::get_inner_expanded_tv<5>(miopen::deref(inputGradDesc)); + + par_ford(numel)([&](size_t gid) { + size_t ncdh = gid / W, w = gid % W; + size_t ncd = ncdh / H, h = ncdh % H; + size_t nc = ncd / D, d = ncd % D; + size_t n = nc / C, c = nc % C; + + size_t od = (d * OD) / D; + size_t kod = ((d + 1) * OD + D - 1) / D - od; + + size_t oh = (h * OH) / H; + size_t koh = ((h + 1) * OH + H - 1) / H - oh; + + size_t ow = (w * OW) / W; + size_t kow = ((w + 1) * OW + W - 1) / W - ow; + + float grad = 0; + for(size_t id = od; id < (od + kod); ++id) + { + size_t kd = ((id + 1) * D + OD - 1) / OD - (id * D) / OD; + for(size_t ih = oh; ih < (oh + koh); ++ih) + { + size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + for(size_t iw = ow; iw < (ow + kow); ++iw) + { + size_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW; + grad += + static_cast<float>( + output_grad[output_grad_tv.get_tensor_view_idx({n, c, id, ih, iw})]) / + (kd * kh * kw); + } + } + } + + input_grad[input_grad_tv.get_tensor_view_idx({n, c, d, h, w})] = static_cast<Tcheck>(grad); + }); + return miopenStatusSuccess; +} diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 56a5fc4035..f67e61d87c 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -8226,6 +8226,47 @@ MIOPEN_EXPORT miopenStatus_t miopenMultiMarginLossForward(miopenHandle_t handle, // CLOSEOUT LossFunction DOXYGEN GROUP #endif // MIOPEN_BETA_API +#ifdef MIOPEN_BETA_API +// adaptiveavgpool APIs +/** @addtogroup pooling + * + * @{ + */ + +/*! @brief Execute an adaptiveavgpool forward layer + * + * @param handle MIOpen handle (input) + * @param inputDesc Tensor descriptor for input tensor (input) + * @param input Data tensor input (input) + * @param outputDesc Tensor descriptor for output tensor (input) + * @param output Data tensor output (output) + * @return miopenStatus_t + */ +MIOPEN_EXPORT miopenStatus_t miopenAdaptiveAvgPoolForward(miopenHandle_t handle, + const miopenTensorDescriptor_t inputDesc, + const void* input, + const miopenTensorDescriptor_t outputDesc, + void* output); + +/*! @brief Execute an adaptiveavgpool backward layer + * + * @param handle MIOpen handle (input) + * @param outputGradDesc Tensor descriptor for output grad tensor (input) + * @param output_grad Data tensor output grad (input) + * @param inputGradDesc Tensor descriptor for input grad tensor (input) + * @param input_grad Data tensor input grad (output) + * @return miopenStatus_t + */ +MIOPEN_EXPORT miopenStatus_t +miopenAdaptiveAvgPoolBackward(miopenHandle_t handle, + const miopenTensorDescriptor_t outputGradDesc, + const void* output_grad, + const miopenTensorDescriptor_t inputGradDesc, + void* input_grad); +/** @} */ +// CLOSEOUT pooling DOXYGEN GROUP +#endif // MIOPEN_BETA_API + #ifdef __cplusplus } #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 36befe40ab..3c5d2acbd4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -84,6 +84,8 @@ set( MIOpen_Source activ_api.cpp adam/problem_description.cpp adam_api.cpp + adaptiveavgpool_api.cpp + adaptiveavgpool/problem_description.cpp addlayernorm_api.cpp api/find2_0_commons.cpp base64.cpp @@ -209,6 +211,12 @@ set( MIOpen_Source solver/activ/fwd_1.cpp solver/adam/adam.cpp solver/adam/transformers_adam_w.cpp + solver/adaptiveavgpool/backward_adaptiveavgpool_1d.cpp + solver/adaptiveavgpool/backward_adaptiveavgpool_2d.cpp + solver/adaptiveavgpool/backward_adaptiveavgpool_3d.cpp + solver/adaptiveavgpool/forward_adaptiveavgpool_1d.cpp + solver/adaptiveavgpool/forward_adaptiveavgpool_2d.cpp + solver/adaptiveavgpool/forward_adaptiveavgpool_3d.cpp solver/batchnorm/backward_ck.cpp solver/batchnorm/backward_per_activation.cpp solver/batchnorm/backward_per_activation_fused.cpp @@ -523,6 +531,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN ${GPU_BATCHED_TRANSPOSE_KERNEL_HIP} ${GPU_GENERAL_TENSOR_REORDER_KERNEL_HIP_SOURCE} kernels/MIOpenAdam.cpp + kernels/MIOpenAdaptiveAvgPool.cpp kernels/MIOpenCat.cpp kernels/MIOpenCheckNumerics.cpp kernels/MIOpenBatchNormActivBwdPerAct.cl @@ -674,6 +683,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN list(APPEND MIOpen_Source activ.cpp adam.cpp + adaptiveavgpool.cpp addlayernorm.cpp cat.cpp exec_utils.cpp diff --git a/src/adaptiveavgpool.cpp b/src/adaptiveavgpool.cpp new file mode 100644 index 0000000000..f5ff954740 --- /dev/null +++ b/src/adaptiveavgpool.cpp @@ -0,0 +1,98 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/kernel_cache.hpp> +#include <miopen/float_equal.hpp> +#include <miopen/tensor.hpp> +#include <miopen/adaptiveavgpool/invoke_params.hpp> +#include <miopen/adaptiveavgpool/solvers.hpp> +#include <miopen/find_solution.hpp> + +namespace miopen { + +namespace adaptiveavgpool { + +miopenStatus_t AdaptiveAvgPoolForward(Handle& handle, + const TensorDescriptor& inputDesc, + ConstData_t input, + const TensorDescriptor& outputDesc, + Data_t output) +{ + const auto problem = adaptiveavgpool::FwdProblemDescription{inputDesc, outputDesc}; + + const auto invoke_params = [&]() { + auto tmp = adaptiveavgpool::FwdInvokeParams{}; + tmp.inputDesc = &inputDesc; + tmp.outputDesc = &outputDesc; + + tmp.input = input; + tmp.output = output; + + return tmp; + }(); + const auto algo = AlgorithmName{"AdaptiveAvgPoolForward"}; + const auto solvers = + solver::SolverContainer<solver::adaptiveavgpool::AdaptiveAvgPoolForward1d, + solver::adaptiveavgpool::AdaptiveAvgPoolForward2d, + solver::adaptiveavgpool::AdaptiveAvgPoolForward3d>{}; + + solvers.ExecutePrimitive(handle, problem, algo, invoke_params); + + return miopenStatusSuccess; +} + +miopenStatus_t AdaptiveAvgPoolBackward(Handle& handle, + const TensorDescriptor& outputGradDesc, + ConstData_t output_grad, + const TensorDescriptor& inputGradDesc, + Data_t input_grad) +{ + const auto problem = adaptiveavgpool::BwdProblemDescription{outputGradDesc, inputGradDesc}; + + const auto invoke_params = [&]() { + auto tmp = adaptiveavgpool::BwdInvokeParams{}; + tmp.outputGradDesc = &outputGradDesc; + tmp.inputGradDesc = &inputGradDesc; + + tmp.output_grad = output_grad; + tmp.input_grad = input_grad; + + return tmp; + }(); + const auto algo = AlgorithmName{"AdaptiveAvgPoolBackward"}; + const auto solvers = + solver::SolverContainer<solver::adaptiveavgpool::AdaptiveAvgPoolBackward1d, + solver::adaptiveavgpool::AdaptiveAvgPoolBackward2d, + solver::adaptiveavgpool::AdaptiveAvgPoolBackward3d>{}; + + solvers.ExecutePrimitive(handle, problem, algo, invoke_params); + + return miopenStatusSuccess; +} + +} // namespace adaptiveavgpool + +} // namespace miopen diff --git a/src/adaptiveavgpool/problem_description.cpp b/src/adaptiveavgpool/problem_description.cpp new file mode 100644 index 0000000000..21800d4cd0 --- /dev/null +++ b/src/adaptiveavgpool/problem_description.cpp @@ -0,0 +1,86 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include <miopen/adaptiveavgpool/problem_description.hpp> +#include <miopen/names.hpp> +#include <sstream> + +namespace miopen { + +namespace adaptiveavgpool { + +inline std::ostream& operator<<(std::ostream& os, const std::vector<size_t>& v) +{ + os << '{'; + for(size_t i = 0; i < v.size(); ++i) + { + if(i != 0) + os << ','; + os << v[i]; + } + os << '}'; + return os; +} + +NetworkConfig FwdProblemDescription::MakeNetworkConfig() const +{ + auto input_size = inputDesc.GetLengths(); + auto output_size = outputDesc.GetLengths(); + auto input_dtype = inputDesc.GetType(); + + std::ostringstream ss; + + ss << "adaptiveavgpool_fwd"; + ss << "-input_dtype" << input_dtype; + ss << "-Is" << input_size; + ss << "-Os" << output_size; + ss << "-Con" << IsAllContiguous(); + + return NetworkConfig{ss.str()}; +} + +NetworkConfig BwdProblemDescription::MakeNetworkConfig() const +{ + auto input_grad_size = inputGradDesc.GetLengths(); + auto output_grad_size = outputGradDesc.GetLengths(); + auto input_grad_stride = inputGradDesc.GetStrides(); + auto output_grad_stride = outputGradDesc.GetStrides(); + auto input_dtype = inputGradDesc.GetType(); + + std::ostringstream ss; + + ss << "adaptiveavgpool_bwd"; + ss << "-input_dtype" << input_dtype; + ss << "-dIs" << input_grad_size; + ss << "-dOs" << output_grad_size; + ss << "-Con" << IsAllContiguous(); + + return NetworkConfig{ss.str()}; +} + +} // namespace adaptiveavgpool + +} // namespace miopen diff --git a/src/adaptiveavgpool_api.cpp b/src/adaptiveavgpool_api.cpp new file mode 100644 index 0000000000..0f27507f46 --- /dev/null +++ b/src/adaptiveavgpool_api.cpp @@ -0,0 +1,113 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/errors.hpp> +#include <miopen/handle.hpp> +#include <miopen/logger.hpp> +#include <miopen/tensor_ops.hpp> + +inline std::ostream& operator<<(std::ostream& os, const std::vector<size_t>& v) +{ + os << '{'; + for(size_t i = 0; i < v.size(); ++i) + { + if(i != 0) + os << ','; + os << v[i]; + } + os << '}'; + return os; +} + +static void LogCmdAdaptiveAvgPool(const miopenTensorDescriptor_t xDesc, + const miopenTensorDescriptor_t oDesc, + const bool is_fwd) +{ + if(miopen::IsLoggingCmd()) + { + std::stringstream ss; + auto dtype = miopen::deref(xDesc).GetType(); + if(dtype == miopenHalf) + { + ss << "adaptiveavgpoolfp16"; + } + else if(dtype == miopenFloat) + { + ss << "adaptiveavgpoolfp32"; + } + else if(dtype == miopenBFloat16) + { + ss << "adaptiveavgpoolbfp16"; + } + + MIOPEN_LOG_FUNCTION(xDesc, oDesc, is_fwd); + ss << " -Is " << miopen::deref(xDesc).GetLengths(); + ss << " -Os " << miopen::deref(oDesc).GetLengths(); + ss << " -Si " << miopen::deref(xDesc).GetStrides(); + ss << " -So " << miopen::deref(oDesc).GetStrides(); + ss << " -F " << ((is_fwd) ? "1" : "2"); + + MIOPEN_LOG_DRIVER_CMD(ss.str()); + } +} + +extern "C" miopenStatus_t miopenAdaptiveAvgPoolForward(miopenHandle_t handle, + const miopenTensorDescriptor_t inputDesc, + const void* input, + const miopenTensorDescriptor_t outputDesc, + void* output) +{ + MIOPEN_LOG_FUNCTION(handle, inputDesc, input, outputDesc, output); + + LogCmdAdaptiveAvgPool(inputDesc, outputDesc, true); + return miopen::try_([&] { + miopen::adaptiveavgpool::AdaptiveAvgPoolForward(miopen::deref(handle), + miopen::deref(inputDesc), + DataCast(input), + miopen::deref(outputDesc), + DataCast(output)); + }); +} + +extern "C" miopenStatus_t +miopenAdaptiveAvgPoolBackward(miopenHandle_t handle, + const miopenTensorDescriptor_t outputGradDesc, + const void* output_grad, + const miopenTensorDescriptor_t inputGradDesc, + void* input_grad) +{ + MIOPEN_LOG_FUNCTION(handle, outputGradDesc, output_grad, inputGradDesc, input_grad); + + LogCmdAdaptiveAvgPool(inputGradDesc, outputGradDesc, false); + return miopen::try_([&] { + miopen::adaptiveavgpool::AdaptiveAvgPoolBackward(miopen::deref(handle), + miopen::deref(outputGradDesc), + DataCast(output_grad), + miopen::deref(inputGradDesc), + DataCast(input_grad)); + }); +} diff --git a/src/include/miopen/adaptiveavgpool.hpp b/src/include/miopen/adaptiveavgpool.hpp new file mode 100644 index 0000000000..7f04af7b8d --- /dev/null +++ b/src/include/miopen/adaptiveavgpool.hpp @@ -0,0 +1,51 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once +#include <miopen/common.hpp> + +namespace miopen { + +struct Handle; +struct TensorDescriptor; + +namespace adaptiveavgpool { + +MIOPEN_INTERNALS_EXPORT miopenStatus_t AdaptiveAvgPoolForward(Handle& handle, + const TensorDescriptor& inputDesc, + ConstData_t input, + const TensorDescriptor& outputDesc, + Data_t output); + +MIOPEN_INTERNALS_EXPORT miopenStatus_t +AdaptiveAvgPoolBackward(Handle& handle, + const TensorDescriptor& outputGradDesc, + ConstData_t output_grad, + const TensorDescriptor& inputGradDesc, + Data_t input_grad); + +} // namespace adaptiveavgpool + +} // namespace miopen diff --git a/src/include/miopen/adaptiveavgpool/invoke_params.hpp b/src/include/miopen/adaptiveavgpool/invoke_params.hpp new file mode 100644 index 0000000000..b9a30f7236 --- /dev/null +++ b/src/include/miopen/adaptiveavgpool/invoke_params.hpp @@ -0,0 +1,67 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include <miopen/common.hpp> +#include <miopen/invoke_params.hpp> +#include <miopen/tensor.hpp> + +namespace miopen { +namespace adaptiveavgpool { + +struct FwdInvokeParams : public miopen::InvokeParams +{ + + FwdInvokeParams() = default; + + const TensorDescriptor* inputDesc = nullptr; + const TensorDescriptor* outputDesc = nullptr; + + ConstData_t input = nullptr; + Data_t output = nullptr; + + std::size_t GetWorkspaceSize() const { return 0; } + Data_t GetWorkspace() const { return nullptr; } +}; + +struct BwdInvokeParams : public miopen::InvokeParams +{ + + BwdInvokeParams() = default; + + const TensorDescriptor* outputGradDesc = nullptr; + const TensorDescriptor* inputGradDesc = nullptr; + + ConstData_t output_grad = nullptr; + Data_t input_grad = nullptr; + + std::size_t GetWorkspaceSize() const { return 0; } + Data_t GetWorkspace() const { return nullptr; } +}; + +} // namespace adaptiveavgpool +} // namespace miopen diff --git a/src/include/miopen/adaptiveavgpool/problem_description.hpp b/src/include/miopen/adaptiveavgpool/problem_description.hpp new file mode 100644 index 0000000000..d8b112e46e --- /dev/null +++ b/src/include/miopen/adaptiveavgpool/problem_description.hpp @@ -0,0 +1,225 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include <miopen/problem_description_base.hpp> +#include <miopen/activ.hpp> +#include <miopen/tensor.hpp> + +namespace miopen { + +struct NetworkConfig; + +namespace adaptiveavgpool { + +struct FwdProblemDescription : ProblemDescriptionBase +{ + FwdProblemDescription(const TensorDescriptor& inputDesc_, const TensorDescriptor& outputDesc_) + : inputDesc(inputDesc_), outputDesc(outputDesc_) + { + IsValidLength(); + IsValidDims(); + IsSameType(); + } + + auto GetInputDesc() const { return inputDesc; } + auto GetOutputDesc() const { return outputDesc; } + auto GetNtotal() const { return outputDesc.GetElementSize(); } + + bool IsValidLength() const + { + auto input_dims = inputDesc.GetLengths().size(); + if(outputDesc.GetLengths()[0] != inputDesc.GetLengths()[0] || + outputDesc.GetLengths()[1] != inputDesc.GetLengths()[1] || + outputDesc.GetLengths().size() != input_dims) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input and output tensor sizes do not match."); + } + + if(input_dims == 3) + { + if(outputDesc.GetLengths()[2] > inputDesc.GetLengths()[2]) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input tensor sizes are too small compare to output " + "tensor sizes."); + } + } + else if(input_dims == 4) + { + if(outputDesc.GetLengths()[2] > inputDesc.GetLengths()[2] || + outputDesc.GetLengths()[3] > inputDesc.GetLengths()[3]) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input tensor sizes are too small compare to output " + "tensor sizes."); + } + } + else if(input_dims == 5) + { + if(outputDesc.GetLengths()[2] > inputDesc.GetLengths()[2] || + outputDesc.GetLengths()[3] > inputDesc.GetLengths()[3] || + outputDesc.GetLengths()[4] > inputDesc.GetLengths()[4]) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input tensor sizes are too small compare to output " + "tensor sizes."); + } + } + + return true; + } + + bool IsValidDims() const + { + if(inputDesc.GetLengths().size() > 5 || inputDesc.GetLengths().size() < 3) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Only 3D, 4D and 5D tensors are supported."); + } + + return true; + } + + bool IsAllContiguous() const { return inputDesc.IsContiguous() && outputDesc.IsContiguous(); } + + bool IsSameType() const + { + if(inputDesc.GetType() != outputDesc.GetType()) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input and output tensor types do not match."); + } + + return true; + } + + NetworkConfig MakeNetworkConfig() const override; + +protected: + TensorDescriptor inputDesc; + TensorDescriptor outputDesc; +}; + +struct BwdProblemDescription : ProblemDescriptionBase +{ + BwdProblemDescription(const TensorDescriptor& outputGradDesc_, + const TensorDescriptor& inputGradDesc_) + : outputGradDesc(outputGradDesc_), inputGradDesc(inputGradDesc_) + { + IsValidLength(); + IsValidDims(); + IsSameType(); + } + + auto GetOutputGradDesc() const { return outputGradDesc; } + auto GetInputGradDesc() const { return inputGradDesc; } + auto GetNtotal() const { return inputGradDesc.GetElementSize(); } + + bool IsValidLength() const + { + auto input_dims = inputGradDesc.GetLengths().size(); + if(outputGradDesc.GetLengths()[0] != inputGradDesc.GetLengths()[0] || + outputGradDesc.GetLengths()[1] != inputGradDesc.GetLengths()[1] || + outputGradDesc.GetLengths().size() != input_dims) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input grad and output grad tensor sizes do not match."); + } + + if(input_dims == 3) + { + if(outputGradDesc.GetLengths()[2] > inputGradDesc.GetLengths()[2]) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input grad tensor sizes are too small compare to " + "output grad tensor sizes."); + } + } + else if(input_dims == 4) + { + if(outputGradDesc.GetLengths()[2] > inputGradDesc.GetLengths()[2] || + outputGradDesc.GetLengths()[3] > inputGradDesc.GetLengths()[3]) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input grad tensor sizes are too small compare to " + "output grad tensor sizes."); + } + } + else if(input_dims == 5) + { + if(outputGradDesc.GetLengths()[2] > inputGradDesc.GetLengths()[2] || + outputGradDesc.GetLengths()[3] > inputGradDesc.GetLengths()[3] || + outputGradDesc.GetLengths()[4] > inputGradDesc.GetLengths()[4]) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input grad tensor sizes are too small compare to " + "output grad tensor sizes."); + } + } + + return true; + } + + bool IsValidDims() const + { + if(inputGradDesc.GetLengths().size() > 5 || inputGradDesc.GetLengths().size() < 3) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Only 3D, 4D and 5D tensors are supported."); + } + + return true; + } + + bool IsAllContiguous() const + { + return inputGradDesc.IsContiguous() && outputGradDesc.IsContiguous(); + } + + bool IsSameType() const + { + if(inputGradDesc.GetType() != outputGradDesc.GetType()) + { + MIOPEN_THROW(miopenStatusBadParm, + "AdaptiveAvgPool: Input grad and output grad tensor types do not match."); + } + + return true; + } + + NetworkConfig MakeNetworkConfig() const override; + +protected: + TensorDescriptor outputGradDesc; + TensorDescriptor inputGradDesc; +}; + +} // namespace adaptiveavgpool + +} // namespace miopen diff --git a/src/include/miopen/adaptiveavgpool/solvers.hpp b/src/include/miopen/adaptiveavgpool/solvers.hpp new file mode 100644 index 0000000000..980bb1a330 --- /dev/null +++ b/src/include/miopen/adaptiveavgpool/solvers.hpp @@ -0,0 +1,160 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include <miopen/conv_solution.hpp> +#include <miopen/execution_context.hpp> +#include <miopen/solver.hpp> +#include <miopen/adaptiveavgpool/problem_description.hpp> +#include <miopen/kernel_build_params.hpp> +#include <miopen/kernel_info.hpp> +#include <miopen/mlo_internal.hpp> + +namespace miopen { + +namespace solver { + +namespace adaptiveavgpool { + +const auto make_hip_kernel = [](std::vector<size_t> localsize, + std::vector<size_t> gridsize, + std::string kernel_file, + std::string kernel_name, + KernelBuildParameters build_params) { + while(localsize.size() < 3) + localsize.push_back(1); + while(gridsize.size() < 3) + gridsize.push_back(1); + for(int i = 0; i < localsize.size(); ++i) + gridsize[i] = AlignUp(gridsize[i], localsize[i]); + return KernelInfo{ + build_params.GenerateFor(kbp::HIP{}), localsize, gridsize, kernel_file, kernel_name}; +}; + +using AdaptiveAvgPoolForward = + NonTunableSolverBase<ExecutionContext, miopen::adaptiveavgpool::FwdProblemDescription>; + +using AdaptiveAvgPoolBackward = + NonTunableSolverBase<ExecutionContext, miopen::adaptiveavgpool::BwdProblemDescription>; + +// FORWARD +struct AdaptiveAvgPoolForward1d final : AdaptiveAvgPoolForward +{ + const std::string& SolverDbId() const override + { + return GetSolverDbId<AdaptiveAvgPoolForward1d>(); + } + + bool IsApplicable(const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override; + + ConvSolution + GetSolution(const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override; +}; + +struct AdaptiveAvgPoolForward2d final : AdaptiveAvgPoolForward +{ + const std::string& SolverDbId() const override + { + return GetSolverDbId<AdaptiveAvgPoolForward2d>(); + } + + bool IsApplicable(const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override; + + ConvSolution + GetSolution(const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override; +}; + +struct AdaptiveAvgPoolForward3d final : AdaptiveAvgPoolForward +{ + const std::string& SolverDbId() const override + { + return GetSolverDbId<AdaptiveAvgPoolForward3d>(); + } + + bool IsApplicable(const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override; + + ConvSolution + GetSolution(const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override; +}; + +// BACKWARD +struct AdaptiveAvgPoolBackward1d final : AdaptiveAvgPoolBackward +{ + const std::string& SolverDbId() const override + { + return GetSolverDbId<AdaptiveAvgPoolBackward1d>(); + } + + bool IsApplicable(const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override; + + ConvSolution + GetSolution(const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override; +}; + +struct AdaptiveAvgPoolBackward2d final : AdaptiveAvgPoolBackward +{ + const std::string& SolverDbId() const override + { + return GetSolverDbId<AdaptiveAvgPoolBackward2d>(); + } + + bool IsApplicable(const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override; + + ConvSolution + GetSolution(const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override; +}; + +struct AdaptiveAvgPoolBackward3d final : AdaptiveAvgPoolBackward +{ + const std::string& SolverDbId() const override + { + return GetSolverDbId<AdaptiveAvgPoolBackward3d>(); + } + + bool IsApplicable(const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override; + + ConvSolution + GetSolution(const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override; +}; + +} // namespace adaptiveavgpool + +} // namespace solver + +} // namespace miopen diff --git a/src/include/miopen/solver_id.hpp b/src/include/miopen/solver_id.hpp index 76a13b051c..efc60720eb 100644 --- a/src/include/miopen/solver_id.hpp +++ b/src/include/miopen/solver_id.hpp @@ -64,7 +64,8 @@ enum class Primitive ReLU, Kthvalue, SoftMarginLoss, - MultiMarginLoss + MultiMarginLoss, + AdaptiveAvgPool, }; struct MIOPEN_INTERNALS_EXPORT Id diff --git a/src/kernels/MIOpenAdaptiveAvgPool.cpp b/src/kernels/MIOpenAdaptiveAvgPool.cpp new file mode 100644 index 0000000000..273ec99087 --- /dev/null +++ b/src/kernels/MIOpenAdaptiveAvgPool.cpp @@ -0,0 +1,355 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS +#include <hip/hip_fp16.h> +#include <hip/hip_runtime.h> +#endif + +#include "float_types.h" +#include "tensor_view.hpp" + +template <typename TI, typename TO> +__device__ void adaptiveAvgPoolForward1d(const TI* __restrict__ input, + TO* __restrict__ output, + uint64_t N, + uint64_t C, + uint64_t H, + uint64_t OH, + tensor_view_t<3> input_tv, + tensor_view_t<3> output_tv) +{ + uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x; + uint64_t nc = gid / OH, oh = gid % OH; + uint64_t n = nc / C, c = nc % C; + if(n >= N) + return; + + uint64_t h = oh * H / OH; + uint64_t kh = (((oh + 1) * H + OH - 1) / OH) - h; + + FLOAT_ACCUM sum = 0; + for(uint64_t ih = h; ih < (h + kh); ++ih) + { + sum += CVT_FLOAT2ACCUM(input[input_tv.get_tensor_view_idx({n, c, ih})]); + } + output[output_tv.get_tensor_view_idx({n, c, oh})] = CVT_ACCUM2FLOAT(sum / kh); +} +extern "C" __global__ void AdaptiveAvgPoolForward1d(const INPUT_TYPE* __restrict__ input, + OUTPUT_TYPE* __restrict__ output, + uint64_t N, + uint64_t C, + uint64_t H, + uint64_t OH, + tensor_view_t<3> input_tv, + tensor_view_t<3> output_tv) +{ + adaptiveAvgPoolForward1d<INPUT_TYPE, OUTPUT_TYPE>( + input, output, N, C, H, OH, input_tv, output_tv); +} + +template <typename TI, typename TO> +__device__ void adaptiveAvgPoolBackward1d(const TI* __restrict__ output_grad, + TO* __restrict__ input_grad, + uint64_t N, + uint64_t C, + uint64_t H, + uint64_t OH, + tensor_view_t<3> output_grad_tv, + tensor_view_t<3> input_grad_tv) +{ + uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x; + uint64_t nc = gid / H, h = gid % H; + uint64_t n = nc / C, c = nc % C; + if(n >= N) + return; + + uint64_t oh = (h * OH) / H; + uint64_t koh = (((h + 1) * OH + H - 1) / H) - oh; + + FLOAT_ACCUM grad = 0; + for(uint64_t ih = oh; ih < (oh + koh); ++ih) + { + uint64_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + grad += CVT_FLOAT2ACCUM(output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih})]) / kh; + } + input_grad[input_grad_tv.get_tensor_view_idx({n, c, h})] = CVT_ACCUM2FLOAT(grad); +} + +extern "C" __global__ void AdaptiveAvgPoolBackward1d(const INPUT_TYPE* __restrict__ output_grad, + OUTPUT_TYPE* __restrict__ input_grad, + uint64_t N, + uint64_t C, + uint64_t H, + uint64_t OH, + tensor_view_t<3> output_grad_tv, + tensor_view_t<3> input_grad_tv) +{ + adaptiveAvgPoolBackward1d<INPUT_TYPE, OUTPUT_TYPE>( + output_grad, input_grad, N, C, H, OH, output_grad_tv, input_grad_tv); +} + +template <typename TI, typename TO> +__device__ void adaptiveAvgPoolForward2d(const TI* __restrict__ input, + TO* __restrict__ output, + uint64_t N, + uint64_t C, + uint64_t H, + uint64_t W, + uint64_t OH, + uint64_t OW, + tensor_view_t<4> input_tv, + tensor_view_t<4> output_tv) +{ + uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x; + uint64_t ncoh = gid / OW, ow = gid % OW; + uint64_t nc = ncoh / OH, oh = ncoh % OH; + uint64_t n = nc / C, c = nc % C; + + if(n >= N) + return; + + uint64_t h = (oh * H) / OH; + uint64_t kh = (((oh + 1) * H + OH - 1) / OH) - h; + + uint64_t w = (ow * W) / OW; + uint64_t kw = (((ow + 1) * W + OW - 1) / OW) - w; + + FLOAT_ACCUM divider = static_cast<FLOAT_ACCUM>(kh * kw); + FLOAT_ACCUM sum = 0; + for(uint64_t ih = h; ih < (h + kh); ++ih) + { + for(uint64_t iw = w; iw < (w + kw); ++iw) + { + sum += CVT_FLOAT2ACCUM(input[input_tv.get_tensor_view_idx({n, c, ih, iw})]); + } + } + output[output_tv.get_tensor_view_idx({n, c, oh, ow})] = CVT_ACCUM2FLOAT(sum / divider); +} + +extern "C" __global__ void AdaptiveAvgPoolForward2d(const INPUT_TYPE* __restrict__ input, + OUTPUT_TYPE* __restrict__ output, + uint64_t N, + uint64_t C, + uint64_t H, + uint64_t W, + uint64_t OH, + uint64_t OW, + tensor_view_t<4> input_tv, + tensor_view_t<4> output_tv) +{ + adaptiveAvgPoolForward2d<INPUT_TYPE, OUTPUT_TYPE>( + input, output, N, C, H, W, OH, OW, input_tv, output_tv); +} + +template <typename TI, typename TO> +__device__ void adaptiveAvgPoolBackward2d(const TI* __restrict__ output_grad, + TO* __restrict__ input_grad, + uint64_t N, + uint64_t C, + uint64_t H, + uint64_t W, + uint64_t OH, + uint64_t OW, + tensor_view_t<4> output_grad_tv, + tensor_view_t<4> input_grad_tv) +{ + uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x; + uint64_t nch = gid / W, w = gid % W; + uint64_t nc = nch / H, h = nch % H; + uint64_t n = nc / C, c = nc % C; + + if(n >= N) + return; + + uint64_t oh = (h * OH) / H; + uint64_t koh = ((h + 1) * OH + H - 1) / H - oh; + + uint64_t ow = (w * OW) / W; + uint64_t kow = ((w + 1) * OW + W - 1) / W - ow; + + FLOAT_ACCUM grad = 0; + for(uint64_t ih = oh; ih < (oh + koh); ++ih) + { + uint64_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + for(uint64_t iw = ow; iw < (ow + kow); ++iw) + { + uint64_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW; + grad += + CVT_FLOAT2ACCUM(output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih, iw})]) / + (kh * kw); + } + } + + input_grad[input_grad_tv.get_tensor_view_idx({n, c, h, w})] = CVT_ACCUM2FLOAT(grad); +} + +extern "C" __global__ void AdaptiveAvgPoolBackward2d(const INPUT_TYPE* __restrict__ output_grad, + OUTPUT_TYPE* __restrict__ input_grad, + uint64_t N, + uint64_t C, + uint64_t H, + uint64_t W, + uint64_t OH, + uint64_t OW, + tensor_view_t<4> output_grad_tv, + tensor_view_t<4> input_grad_tv) +{ + adaptiveAvgPoolBackward2d<INPUT_TYPE, OUTPUT_TYPE>( + output_grad, input_grad, N, C, H, W, OH, OW, output_grad_tv, input_grad_tv); +} + +template <typename TI, typename TO> +__device__ void adaptiveAvgPoolForward3d(const TI* __restrict__ input, + TO* __restrict__ output, + uint64_t N, + uint64_t C, + uint64_t D, + uint64_t H, + uint64_t W, + uint64_t OD, + uint64_t OH, + uint64_t OW, + tensor_view_t<5> input_tv, + tensor_view_t<5> output_tv) +{ + uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x; + uint64_t ncodoh = gid / OW, ow = gid % OW; + uint64_t ncod = ncodoh / OH, oh = ncodoh % OH; + uint64_t nc = ncod / OD, od = ncod % OD; + uint64_t n = nc / C, c = nc % C; + + if(n >= N) + return; + uint64_t d = (od * D) / OD; + uint64_t kd = ((od + 1) * D + OD - 1) / OD - d; + + uint64_t h = (oh * H) / OH; + uint64_t kh = ((oh + 1) * H + OH - 1) / OH - h; + + uint64_t w = (ow * W) / OW; + uint64_t kw = ((ow + 1) * W + OW - 1) / OW - w; + + FLOAT_ACCUM sum = 0; + for(uint64_t id = d; id < (d + kd); ++id) + { + for(uint64_t ih = h; ih < (h + kh); ++ih) + { + for(uint64_t iw = w; iw < (w + kw); ++iw) + { + sum += CVT_FLOAT2ACCUM(input[input_tv.get_tensor_view_idx({n, c, id, ih, iw})]); + } + } + } + + output[output_tv.get_tensor_view_idx({n, c, od, oh, ow})] = + CVT_ACCUM2FLOAT(sum / (kd * kh * kw)); +} + +extern "C" __global__ void AdaptiveAvgPoolForward3d(const INPUT_TYPE* __restrict__ input, + OUTPUT_TYPE* __restrict__ output, + uint64_t N, + uint64_t C, + uint64_t D, + uint64_t H, + uint64_t W, + uint64_t OD, + uint64_t OH, + uint64_t OW, + tensor_view_t<5> input_tv, + tensor_view_t<5> output_tv) +{ + adaptiveAvgPoolForward3d<INPUT_TYPE, OUTPUT_TYPE>( + input, output, N, C, D, H, W, OD, OH, OW, input_tv, output_tv); +} + +template <typename TI, typename TO> +__device__ void adaptiveAvgPoolBackward3d(const TI* __restrict__ output_grad, + TO* __restrict__ input_grad, + uint64_t N, + uint64_t C, + uint64_t D, + uint64_t H, + uint64_t W, + uint64_t OD, + uint64_t OH, + uint64_t OW, + tensor_view_t<5> output_grad_tv, + tensor_view_t<5> input_grad_tv) +{ + uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x; + uint64_t ncdh = gid / W, w = gid % W; + uint64_t ncd = ncdh / H, h = ncdh % H; + uint64_t nc = ncd / D, d = ncd % D; + uint64_t n = nc / C, c = nc % C; + + if(n >= N) + return; + + uint64_t od = (d * OD) / D; + uint64_t kod = ((d + 1) * OD + D - 1) / D - od; + + uint64_t oh = (h * OH) / H; + uint64_t koh = ((h + 1) * OH + H - 1) / H - oh; + + uint64_t ow = (w * OW) / W; + uint64_t kow = ((w + 1) * OW + W - 1) / W - ow; + + FLOAT_ACCUM grad = 0; + for(uint64_t id = od; id < (od + kod); ++id) + { + uint64_t kd = ((id + 1) * D + OD - 1) / OD - (id * D) / OD; + for(uint64_t ih = oh; ih < (oh + koh); ++ih) + { + uint64_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + for(uint64_t iw = ow; iw < (ow + kow); ++iw) + { + uint64_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW; + grad += CVT_FLOAT2ACCUM( + output_grad[output_grad_tv.get_tensor_view_idx({n, c, id, ih, iw})]) / + (kd * kh * kw); + } + } + } + + input_grad[input_grad_tv.get_tensor_view_idx({n, c, d, h, w})] = CVT_ACCUM2FLOAT(grad); +} + +extern "C" __global__ void AdaptiveAvgPoolBackward3d(const INPUT_TYPE* __restrict__ output_grad, + OUTPUT_TYPE* __restrict__ input_grad, + uint64_t N, + uint64_t C, + uint64_t D, + uint64_t H, + uint64_t W, + uint64_t OD, + uint64_t OH, + uint64_t OW, + tensor_view_t<5> output_grad_tv, + tensor_view_t<5> input_grad_tv) +{ + adaptiveAvgPoolBackward3d<INPUT_TYPE, OUTPUT_TYPE>( + output_grad, input_grad, N, C, D, H, W, OD, OH, OW, output_grad_tv, input_grad_tv); +} diff --git a/src/solver.cpp b/src/solver.cpp index d9e2fdb1be..a9b7ffdf29 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -26,6 +26,7 @@ #include <miopen/activ/solvers.hpp> #include <miopen/adam/solvers.hpp> +#include <miopen/adaptiveavgpool/solvers.hpp> #include <miopen/batchnorm/solvers.hpp> #include <miopen/cat/solvers.hpp> #include <miopen/conv/solvers.hpp> @@ -707,6 +708,30 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry) multimarginloss::MultiMarginLossForward{}.SolverDbId()); Register(registry, ++id, Primitive::Mha, mha::MhaCKFlashAttentionV2Forward{}.SolverDbId()); + Register(registry, + ++id, + Primitive::AdaptiveAvgPool, + adaptiveavgpool::AdaptiveAvgPoolForward1d{}.SolverDbId()); + Register(registry, + ++id, + Primitive::AdaptiveAvgPool, + adaptiveavgpool::AdaptiveAvgPoolForward2d{}.SolverDbId()); + Register(registry, + ++id, + Primitive::AdaptiveAvgPool, + adaptiveavgpool::AdaptiveAvgPoolForward3d{}.SolverDbId()); + Register(registry, + ++id, + Primitive::AdaptiveAvgPool, + adaptiveavgpool::AdaptiveAvgPoolBackward1d{}.SolverDbId()); + Register(registry, + ++id, + Primitive::AdaptiveAvgPool, + adaptiveavgpool::AdaptiveAvgPoolBackward2d{}.SolverDbId()); + Register(registry, + ++id, + Primitive::AdaptiveAvgPool, + adaptiveavgpool::AdaptiveAvgPoolBackward3d{}.SolverDbId()); // IMPORTANT: New solvers should be added to the end of the function, and don't leave a white // space between this comment and the newly registered solver(s)! } diff --git a/src/solver/adaptiveavgpool/backward_adaptiveavgpool_1d.cpp b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_1d.cpp new file mode 100644 index 0000000000..1552ac8385 --- /dev/null +++ b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_1d.cpp @@ -0,0 +1,139 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include <miopen/conv_solution.hpp> +#include <miopen/execution_context.hpp> +#include <miopen/invoke_params.hpp> +#include <miopen/tensor_view_utils.hpp> +#include <miopen/adaptiveavgpool/solvers.hpp> + +#include <miopen/adaptiveavgpool/invoke_params.hpp> +#include <miopen/datatype.hpp> +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/target_properties.hpp> + +#define LOCAL_SIZE_BWD_1D 256 + +namespace miopen { + +namespace solver { + +namespace adaptiveavgpool { + +namespace { + +bool IsOverRocmBwd1d(const miopen::adaptiveavgpool::BwdProblemDescription& problem) +{ + if(!problem.IsAllContiguous()) + { + return true; + } + else + { + auto mul_nc = problem.GetOutputGradDesc().GetLengths()[0] * + problem.GetOutputGradDesc().GetLengths()[1]; + if(mul_nc < 141312) + { + return true; + } + } + return false; +} + +} // namespace + +bool AdaptiveAvgPoolBackward1d::IsApplicable( + const ExecutionContext&, const miopen::adaptiveavgpool::BwdProblemDescription& problem) const +{ + if(problem.GetInputGradDesc().GetNumDims() != 3 || + problem.GetOutputGradDesc().GetNumDims() != 3) + { + return false; + } + if(!IsOverRocmBwd1d(problem)) + { + return false; + } + if(!(problem.GetInputGradDesc().GetType() == miopenFloat || + problem.GetInputGradDesc().GetType() == miopenHalf || + problem.GetInputGradDesc().GetType() == miopenBFloat16)) + return false; + return true; +} + +ConvSolution AdaptiveAvgPoolBackward1d::GetSolution( + const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const +{ + std::ignore = context; + + auto result = ConvSolution{miopenStatusSuccess}; + auto input_dtype = miopen::GetDataType(problem.GetOutputGradDesc().GetType()); + auto output_dtype = miopen::GetDataType(problem.GetInputGradDesc().GetType()); + auto dtype = problem.GetInputGradDesc().GetType(); + uint64_t N_total = problem.GetNtotal(); + + auto build_params = KernelBuildParameters{ + {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)}, + {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)}, + {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)}, + {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)}, + {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype}, + {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}}; + + result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_BWD_1D}, + {N_total}, + "MIOpenAdaptiveAvgPool.cpp", + "AdaptiveAvgPoolBackward1d", + build_params)); + + result.invoker_factory = [](const std::vector<Kernel>& kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::BwdInvokeParams>(); + + decltype(auto) kernel = handle_.Run(kernels.front()); + + auto input_grad_tv = get_inner_expanded_tv<3>(deref(params.inputGradDesc)); + auto output_grad_tv = get_inner_expanded_tv<3>(deref(params.outputGradDesc)); + + uint64_t N = deref(params.inputGradDesc).GetLengths()[0]; + uint64_t C = deref(params.inputGradDesc).GetLengths()[1]; + uint64_t H = deref(params.inputGradDesc).GetLengths()[2]; + uint64_t OH = deref(params.outputGradDesc).GetLengths()[2]; + + kernel( + params.output_grad, params.input_grad, N, C, H, OH, output_grad_tv, input_grad_tv); + }; + }; + + return result; +} + +} // namespace adaptiveavgpool + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/adaptiveavgpool/backward_adaptiveavgpool_2d.cpp b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_2d.cpp new file mode 100644 index 0000000000..46dcef3e88 --- /dev/null +++ b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_2d.cpp @@ -0,0 +1,165 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include <miopen/conv_solution.hpp> +#include <miopen/execution_context.hpp> +#include <miopen/invoke_params.hpp> +#include <miopen/tensor_view_utils.hpp> +#include <miopen/adaptiveavgpool/solvers.hpp> + +#include <miopen/adaptiveavgpool/invoke_params.hpp> +#include <miopen/datatype.hpp> +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/target_properties.hpp> + +#define LOCAL_SIZE_BWD_2D 256 + +namespace miopen { + +namespace solver { + +namespace adaptiveavgpool { + +namespace { + +bool IsOverRocmBwd2d(const miopen::adaptiveavgpool::BwdProblemDescription& problem) +{ + if(problem.IsAllContiguous()) + { + return false; + } + else + { + auto dtype = problem.GetInputGradDesc().GetType(); + auto in_nelems = problem.GetInputGradDesc().GetElementSize(); + auto out_nelems = problem.GetOutputGradDesc().GetElementSize(); + auto in_over_out = static_cast<float>(in_nelems) / out_nelems; + + if(dtype == miopenFloat) + { + if(in_nelems > 3801600) + return true; + } + else if(dtype == miopenHalf) + { + if(in_over_out == 1 || (in_over_out >= 1024 && in_over_out <= 4096)) + return true; + } + else if(dtype == miopenBFloat16) + { + if(in_over_out < 13 || (in_over_out >= 1024 && in_over_out <= 4096)) + { + return true; + } + } + } + return false; +} + +} // namespace + +bool AdaptiveAvgPoolBackward2d::IsApplicable( + const ExecutionContext&, const miopen::adaptiveavgpool::BwdProblemDescription& problem) const +{ + if(problem.GetInputGradDesc().GetNumDims() != 4 || + problem.GetOutputGradDesc().GetNumDims() != 4) + { + return false; + } + if(!IsOverRocmBwd2d(problem)) + { + return false; + } + if(!(problem.GetInputGradDesc().GetType() == miopenFloat || + problem.GetInputGradDesc().GetType() == miopenHalf || + problem.GetInputGradDesc().GetType() == miopenBFloat16)) + return false; + return true; +} + +ConvSolution AdaptiveAvgPoolBackward2d::GetSolution( + const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const +{ + std::ignore = context; + + auto result = ConvSolution{miopenStatusSuccess}; + auto input_dtype = miopen::GetDataType(problem.GetOutputGradDesc().GetType()); + auto output_dtype = miopen::GetDataType(problem.GetInputGradDesc().GetType()); + auto dtype = problem.GetInputGradDesc().GetType(); + uint64_t N_total = problem.GetNtotal(); + + auto build_params = KernelBuildParameters{ + {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)}, + {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)}, + {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)}, + {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)}, + {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype}, + {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}}; + + result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_BWD_2D}, + {N_total}, + "MIOpenAdaptiveAvgPool.cpp", + "AdaptiveAvgPoolBackward2d", + build_params)); + + result.invoker_factory = [](const std::vector<Kernel>& kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::BwdInvokeParams>(); + + decltype(auto) kernel = handle_.Run(kernels.front()); + + auto input_grad_tv = get_inner_expanded_tv<4>(deref(params.inputGradDesc)); + auto output_grad_tv = get_inner_expanded_tv<4>(deref(params.outputGradDesc)); + + uint64_t N = deref(params.inputGradDesc).GetLengths()[0]; + uint64_t C = deref(params.inputGradDesc).GetLengths()[1]; + uint64_t H = deref(params.inputGradDesc).GetLengths()[2]; + uint64_t W = deref(params.inputGradDesc).GetLengths()[3]; + uint64_t OH = deref(params.outputGradDesc).GetLengths()[2]; + uint64_t OW = deref(params.outputGradDesc).GetLengths()[3]; + + kernel(params.output_grad, + params.input_grad, + N, + C, + H, + W, + OH, + OW, + output_grad_tv, + input_grad_tv); + }; + }; + + return result; +} + +} // namespace adaptiveavgpool + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/adaptiveavgpool/backward_adaptiveavgpool_3d.cpp b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_3d.cpp new file mode 100644 index 0000000000..c16603a530 --- /dev/null +++ b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_3d.cpp @@ -0,0 +1,150 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include <miopen/conv_solution.hpp> +#include <miopen/execution_context.hpp> +#include <miopen/invoke_params.hpp> +#include <miopen/tensor_view_utils.hpp> +#include <miopen/adaptiveavgpool/solvers.hpp> + +#include <miopen/adaptiveavgpool/invoke_params.hpp> +#include <miopen/datatype.hpp> +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/target_properties.hpp> + +#define LOCAL_SIZE_BWD_3D 256 + +namespace miopen { + +namespace solver { + +namespace adaptiveavgpool { + +namespace { + +bool IsOverRocmBwd3d(const miopen::adaptiveavgpool::BwdProblemDescription& problem) +{ + if(!problem.IsAllContiguous()) + { + return true; + } + else + { + if((problem.GetInputGradDesc().GetElementSize() / + problem.GetOutputGradDesc().GetElementSize()) == 1) + return true; + } + return false; +} + +} // namespace + +bool AdaptiveAvgPoolBackward3d::IsApplicable( + const ExecutionContext&, const miopen::adaptiveavgpool::BwdProblemDescription& problem) const +{ + if(problem.GetInputGradDesc().GetNumDims() != 5 || + problem.GetOutputGradDesc().GetNumDims() != 5) + { + return false; + } + if(!IsOverRocmBwd3d(problem)) + { + return false; + } + if(!(problem.GetInputGradDesc().GetType() == miopenFloat || + problem.GetInputGradDesc().GetType() == miopenHalf || + problem.GetInputGradDesc().GetType() == miopenBFloat16)) + return false; + return true; +} + +ConvSolution AdaptiveAvgPoolBackward3d::GetSolution( + const ExecutionContext& context, + const miopen::adaptiveavgpool::BwdProblemDescription& problem) const +{ + std::ignore = context; + + auto result = ConvSolution{miopenStatusSuccess}; + auto input_dtype = miopen::GetDataType(problem.GetOutputGradDesc().GetType()); + auto output_dtype = miopen::GetDataType(problem.GetInputGradDesc().GetType()); + auto dtype = problem.GetInputGradDesc().GetType(); + uint64_t N_total = problem.GetNtotal(); + + auto build_params = KernelBuildParameters{ + {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)}, + {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)}, + {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)}, + {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)}, + {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype}, + {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}}; + + result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_BWD_3D}, + {N_total}, + "MIOpenAdaptiveAvgPool.cpp", + "AdaptiveAvgPoolBackward3d", + build_params)); + + result.invoker_factory = [](const std::vector<Kernel>& kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::BwdInvokeParams>(); + + decltype(auto) kernel = handle_.Run(kernels.front()); + + auto input_grad_tv = get_inner_expanded_tv<5>(deref(params.inputGradDesc)); + auto output_grad_tv = get_inner_expanded_tv<5>(deref(params.outputGradDesc)); + + uint64_t N = deref(params.inputGradDesc).GetLengths()[0]; + uint64_t C = deref(params.inputGradDesc).GetLengths()[1]; + uint64_t D = deref(params.inputGradDesc).GetLengths()[2]; + uint64_t H = deref(params.inputGradDesc).GetLengths()[3]; + uint64_t W = deref(params.inputGradDesc).GetLengths()[4]; + uint64_t OD = deref(params.outputGradDesc).GetLengths()[2]; + uint64_t OH = deref(params.outputGradDesc).GetLengths()[3]; + uint64_t OW = deref(params.outputGradDesc).GetLengths()[4]; + + kernel(params.output_grad, + params.input_grad, + N, + C, + D, + H, + W, + OD, + OH, + OW, + output_grad_tv, + input_grad_tv); + }; + }; + + return result; +} + +} // namespace adaptiveavgpool + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/adaptiveavgpool/forward_adaptiveavgpool_1d.cpp b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_1d.cpp new file mode 100644 index 0000000000..f31d80c8be --- /dev/null +++ b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_1d.cpp @@ -0,0 +1,132 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include <miopen/conv_solution.hpp> +#include <miopen/execution_context.hpp> +#include <miopen/invoke_params.hpp> +#include <miopen/tensor_view_utils.hpp> +#include <miopen/adaptiveavgpool/solvers.hpp> + +#include <miopen/adaptiveavgpool/invoke_params.hpp> +#include <miopen/datatype.hpp> +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/target_properties.hpp> + +#define LOCAL_SIZE_FWD_1D 256 + +namespace miopen { + +namespace solver { + +namespace adaptiveavgpool { + +namespace { + +bool IsOverRocmFwd1d(const miopen::adaptiveavgpool::FwdProblemDescription& problem) +{ + auto in_nelems = problem.GetInputDesc().GetLengths()[-1]; + auto out_nelems = problem.GetOutputDesc().GetLengths()[-1]; + auto in_over_out = static_cast<float>(in_nelems) / out_nelems; + + if(in_over_out < 56) + { + return true; + } + return false; +} + +} // namespace + +bool AdaptiveAvgPoolForward1d::IsApplicable( + const ExecutionContext&, const miopen::adaptiveavgpool::FwdProblemDescription& problem) const +{ + if(problem.GetInputDesc().GetNumDims() != 3 || problem.GetOutputDesc().GetNumDims() != 3) + { + return false; + } + if(!IsOverRocmFwd1d(problem)) + { + return false; + } + if(!(problem.GetInputDesc().GetType() == miopenFloat || + problem.GetInputDesc().GetType() == miopenHalf || + problem.GetInputDesc().GetType() == miopenBFloat16)) + return false; + return true; +} + +ConvSolution AdaptiveAvgPoolForward1d::GetSolution( + const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const +{ + std::ignore = context; + + auto result = ConvSolution{miopenStatusSuccess}; + auto input_dtype = miopen::GetDataType(problem.GetInputDesc().GetType()); + auto output_dtype = miopen::GetDataType(problem.GetOutputDesc().GetType()); + auto dtype = problem.GetOutputDesc().GetType(); + uint64_t N_total = problem.GetNtotal(); + + auto build_params = KernelBuildParameters{ + {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)}, + {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)}, + {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)}, + {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)}, + {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype}, + {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}}; + + result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_FWD_1D}, + {N_total}, + "MIOpenAdaptiveAvgPool.cpp", + "AdaptiveAvgPoolForward1d", + build_params)); + + result.invoker_factory = [](const std::vector<Kernel>& kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::FwdInvokeParams>(); + + decltype(auto) kernel = handle_.Run(kernels.front()); + + auto input_tv = get_inner_expanded_tv<3>(deref(params.inputDesc)); + auto output_tv = get_inner_expanded_tv<3>(deref(params.outputDesc)); + + uint64_t N = deref(params.inputDesc).GetLengths()[0]; + uint64_t C = deref(params.inputDesc).GetLengths()[1]; + uint64_t H = deref(params.inputDesc).GetLengths()[2]; + uint64_t OH = deref(params.outputDesc).GetLengths()[2]; + + kernel(params.input, params.output, N, C, H, OH, input_tv, output_tv); + }; + }; + + return result; +} + +} // namespace adaptiveavgpool + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/adaptiveavgpool/forward_adaptiveavgpool_2d.cpp b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_2d.cpp new file mode 100644 index 0000000000..344071a3a4 --- /dev/null +++ b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_2d.cpp @@ -0,0 +1,140 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include <miopen/conv_solution.hpp> +#include <miopen/execution_context.hpp> +#include <miopen/invoke_params.hpp> +#include <miopen/tensor_view_utils.hpp> +#include <miopen/adaptiveavgpool/solvers.hpp> + +#include <miopen/adaptiveavgpool/invoke_params.hpp> +#include <miopen/datatype.hpp> +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/target_properties.hpp> + +#define LOCAL_SIZE_FWD_2D 256 + +namespace miopen { + +namespace solver { + +namespace adaptiveavgpool { + +namespace { + +bool IsOverRocmFwd2d(const miopen::adaptiveavgpool::FwdProblemDescription& problem) +{ + auto in_nelems = problem.GetInputDesc().GetElementSize(); + auto out_nelems = problem.GetOutputDesc().GetElementSize(); + auto in_over_out = static_cast<float>(in_nelems) / out_nelems; + + if(problem.IsAllContiguous()) + { + if((in_over_out < 13) || (in_over_out >= 100 && in_over_out <= 112)) + return true; + } + else + { + if(in_over_out < 248) + return true; + } + return false; +} + +} // namespace + +bool AdaptiveAvgPoolForward2d::IsApplicable( + const ExecutionContext&, const miopen::adaptiveavgpool::FwdProblemDescription& problem) const +{ + if(problem.GetInputDesc().GetNumDims() != 4 || problem.GetOutputDesc().GetNumDims() != 4) + { + return false; + } + if(!IsOverRocmFwd2d(problem)) + { + return false; + } + if(!(problem.GetInputDesc().GetType() == miopenFloat || + problem.GetInputDesc().GetType() == miopenHalf || + problem.GetInputDesc().GetType() == miopenBFloat16)) + return false; + return true; +} + +ConvSolution AdaptiveAvgPoolForward2d::GetSolution( + const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const +{ + std::ignore = context; + + auto result = ConvSolution{miopenStatusSuccess}; + auto input_dtype = miopen::GetDataType(problem.GetInputDesc().GetType()); + auto output_dtype = miopen::GetDataType(problem.GetOutputDesc().GetType()); + auto dtype = problem.GetOutputDesc().GetType(); + uint64_t N_total = problem.GetNtotal(); + + auto build_params = KernelBuildParameters{ + {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)}, + {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)}, + {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)}, + {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)}, + {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype}, + {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}}; + + result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_FWD_2D}, + {N_total}, + "MIOpenAdaptiveAvgPool.cpp", + "AdaptiveAvgPoolForward2d", + build_params)); + + result.invoker_factory = [](const std::vector<Kernel>& kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::FwdInvokeParams>(); + + decltype(auto) kernel = handle_.Run(kernels.front()); + + auto input_tv = get_inner_expanded_tv<4>(deref(params.inputDesc)); + auto output_tv = get_inner_expanded_tv<4>(deref(params.outputDesc)); + + uint64_t N = deref(params.inputDesc).GetLengths()[0]; + uint64_t C = deref(params.inputDesc).GetLengths()[1]; + uint64_t H = deref(params.inputDesc).GetLengths()[2]; + uint64_t W = deref(params.inputDesc).GetLengths()[3]; + uint64_t OH = deref(params.outputDesc).GetLengths()[2]; + uint64_t OW = deref(params.outputDesc).GetLengths()[3]; + + kernel(params.input, params.output, N, C, H, W, OH, OW, input_tv, output_tv); + }; + }; + + return result; +} + +} // namespace adaptiveavgpool + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/adaptiveavgpool/forward_adaptiveavgpool_3d.cpp b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_3d.cpp new file mode 100644 index 0000000000..3c4fcf552f --- /dev/null +++ b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_3d.cpp @@ -0,0 +1,142 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include <miopen/conv_solution.hpp> +#include <miopen/execution_context.hpp> +#include <miopen/invoke_params.hpp> +#include <miopen/tensor_view_utils.hpp> +#include <miopen/adaptiveavgpool/solvers.hpp> + +#include <miopen/adaptiveavgpool/invoke_params.hpp> +#include <miopen/datatype.hpp> +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/target_properties.hpp> + +#define LOCAL_SIZE_FWD_3D 256 + +namespace miopen { + +namespace solver { + +namespace adaptiveavgpool { + +namespace { + +bool IsOverRocmFwd3d(const miopen::adaptiveavgpool::FwdProblemDescription& problem) +{ + auto in_nelems = problem.GetInputDesc().GetElementSize(); + auto out_nelems = problem.GetOutputDesc().GetElementSize(); + auto in_over_out = static_cast<float>(in_nelems) / out_nelems; + + if(problem.IsAllContiguous()) + { + if(in_over_out <= 98) + return true; + } + else + { + if(in_over_out < 8000) + return true; + } + return false; +} + +} // namespace + +bool AdaptiveAvgPoolForward3d::IsApplicable( + const ExecutionContext&, const miopen::adaptiveavgpool::FwdProblemDescription& problem) const +{ + if(problem.GetInputDesc().GetNumDims() != 5 || problem.GetOutputDesc().GetNumDims() != 5) + { + return false; + } + if(!IsOverRocmFwd3d(problem)) + { + return false; + } + if(!(problem.GetInputDesc().GetType() == miopenFloat || + problem.GetInputDesc().GetType() == miopenHalf || + problem.GetInputDesc().GetType() == miopenBFloat16)) + return false; + return true; +} + +ConvSolution AdaptiveAvgPoolForward3d::GetSolution( + const ExecutionContext& context, + const miopen::adaptiveavgpool::FwdProblemDescription& problem) const +{ + std::ignore = context; + + auto result = ConvSolution{miopenStatusSuccess}; + auto input_dtype = miopen::GetDataType(problem.GetInputDesc().GetType()); + auto output_dtype = miopen::GetDataType(problem.GetOutputDesc().GetType()); + auto dtype = problem.GetOutputDesc().GetType(); + uint64_t N_total = problem.GetNtotal(); + + auto build_params = KernelBuildParameters{ + {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)}, + {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)}, + {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)}, + {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)}, + {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype}, + {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}}; + + result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_FWD_3D}, + {N_total}, + "MIOpenAdaptiveAvgPool.cpp", + "AdaptiveAvgPoolForward3d", + build_params)); + + result.invoker_factory = [](const std::vector<Kernel>& kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::FwdInvokeParams>(); + + decltype(auto) kernel = handle_.Run(kernels.front()); + + auto input_tv = get_inner_expanded_tv<5>(deref(params.inputDesc)); + auto output_tv = get_inner_expanded_tv<5>(deref(params.outputDesc)); + + uint64_t N = deref(params.inputDesc).GetLengths()[0]; + uint64_t C = deref(params.inputDesc).GetLengths()[1]; + uint64_t D = deref(params.inputDesc).GetLengths()[2]; + uint64_t H = deref(params.inputDesc).GetLengths()[3]; + uint64_t W = deref(params.inputDesc).GetLengths()[4]; + uint64_t OD = deref(params.outputDesc).GetLengths()[2]; + uint64_t OH = deref(params.outputDesc).GetLengths()[3]; + uint64_t OW = deref(params.outputDesc).GetLengths()[4]; + + kernel(params.input, params.output, N, C, D, H, W, OD, OH, OW, input_tv, output_tv); + }; + }; + + return result; +} + +} // namespace adaptiveavgpool + +} // namespace solver + +} // namespace miopen diff --git a/test/cpu_adaptiveavgpool.hpp b/test/cpu_adaptiveavgpool.hpp new file mode 100644 index 0000000000..462cbda67c --- /dev/null +++ b/test/cpu_adaptiveavgpool.hpp @@ -0,0 +1,263 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include "tensor_holder.hpp" +#include <miopen/tensor_view_utils.hpp> +#include "ford.hpp" + +template <class T> +void cpu_adaptiveavgpool_forward_1d( + tensor<T> input, tensor<T>& output, size_t C, size_t H, size_t OH) +{ + auto numel = output.desc.GetElementSize(); + + auto input_tv = miopen::get_inner_expanded_tv<3>(input.desc); + auto output_tv = miopen::get_inner_expanded_tv<3>(output.desc); + + par_ford(numel)([&](size_t gid) { + size_t nc = gid / OH, oh = gid % OH; + size_t n = nc / C, c = nc % C; + + size_t h = oh * H / OH; + size_t kh = (((oh + 1) * H + OH - 1) / OH) - h; + + float sum = 0; + for(size_t ih = h; ih < (h + kh); ++ih) + { + sum += static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, ih})]); + } + + output[output_tv.get_tensor_view_idx({n, c, oh})] = static_cast<T>(sum / kh); + }); +} + +template <class T> +void cpu_adaptiveavgpool_forward_2d( + tensor<T> input, tensor<T>& output, size_t C, size_t H, size_t W, size_t OH, size_t OW) +{ + auto numel = output.desc.GetElementSize(); + + auto input_tv = miopen::get_inner_expanded_tv<4>(input.desc); + auto output_tv = miopen::get_inner_expanded_tv<4>(output.desc); + + par_ford(numel)([&](size_t gid) { + size_t ncoh = gid / OW, ow = gid % OW; + size_t nc = ncoh / OH, oh = ncoh % OH; + size_t n = nc / C, c = nc % C; + + size_t h = (oh * H) / OH; + size_t kh = (((oh + 1) * H + OH - 1) / OH) - h; + + size_t w = (ow * W) / OW; + size_t kw = (((ow + 1) * W + OW - 1) / OW) - w; + + float divider = static_cast<float>(kh * kw); + float sum = 0; + for(size_t ih = h; ih < (h + kh); ++ih) + { + for(size_t iw = w; iw < (w + kw); ++iw) + { + sum += static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, ih, iw})]); + } + } + + output[output_tv.get_tensor_view_idx({n, c, oh, ow})] = static_cast<T>(sum / divider); + }); +} + +template <class T> +void cpu_adaptiveavgpool_forward_3d(tensor<T> input, + tensor<T>& output, + size_t C, + size_t D, + size_t H, + size_t W, + size_t OD, + size_t OH, + size_t OW) +{ + auto numel = output.desc.GetElementSize(); + + auto input_tv = miopen::get_inner_expanded_tv<5>(input.desc); + auto output_tv = miopen::get_inner_expanded_tv<5>(output.desc); + + par_ford(numel)([&](size_t gid) { + size_t ncodoh = gid / OW, ow = gid % OW; + size_t ncod = ncodoh / OH, oh = ncodoh % OH; + size_t nc = ncod / OD, od = ncod % OD; + size_t n = nc / C, c = nc % C; + + size_t d = (od * D) / OD; + size_t kd = ((od + 1) * D + OD - 1) / OD - d; + + size_t h = (oh * H) / OH; + size_t kh = ((oh + 1) * H + OH - 1) / OH - h; + + size_t w = (ow * W) / OW; + size_t kw = ((ow + 1) * W + OW - 1) / OW - w; + + float sum = 0; + for(size_t id = d; id < (d + kd); ++id) + { + for(size_t ih = h; ih < (h + kh); ++ih) + { + for(size_t iw = w; iw < (w + kw); ++iw) + { + sum += + static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, id, ih, iw})]); + } + } + } + + output[output_tv.get_tensor_view_idx({n, c, od, oh, ow})] = + static_cast<T>(sum / (kd * kh * kw)); + }); +} + +template <class T> +void cpu_adaptiveavgpool_backward_1d( + tensor<T> output_grad, tensor<T>& input_grad, size_t C, size_t H, size_t OH) +{ + auto numel = input_grad.desc.GetElementSize(); + + auto output_grad_tv = miopen::get_inner_expanded_tv<3>(output_grad.desc); + auto input_grad_tv = miopen::get_inner_expanded_tv<3>(input_grad.desc); + + par_ford(numel)([&](size_t gid) { + size_t nc = gid / H, h = gid % H; + size_t n = nc / C, c = nc % C; + + size_t oh = (h * OH) / H; + size_t koh = (((h + 1) * OH + H - 1) / H) - oh; + + float grad = 0; + for(size_t ih = oh; ih < (oh + koh); ++ih) + { + size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + grad += + static_cast<float>(output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih})]) / + kh; + } + + input_grad[input_grad_tv.get_tensor_view_idx({n, c, h})] = static_cast<T>(grad); + }); +} + +template <class T> +void cpu_adaptiveavgpool_backward_2d(tensor<T> output_grad, + tensor<T>& input_grad, + size_t C, + size_t H, + size_t W, + size_t OH, + size_t OW) +{ + auto numel = input_grad.desc.GetElementSize(); + + auto output_grad_tv = miopen::get_inner_expanded_tv<4>(output_grad.desc); + auto input_grad_tv = miopen::get_inner_expanded_tv<4>(input_grad.desc); + + par_ford(numel)([&](size_t gid) { + size_t nch = gid / W, w = gid % W; + size_t nc = nch / H, h = nch % H; + size_t n = nc / C, c = nc % C; + + size_t oh = (h * OH) / H; + size_t koh = ((h + 1) * OH + H - 1) / H - oh; + + size_t ow = (w * OW) / W; + size_t kow = ((w + 1) * OW + W - 1) / W - ow; + + float grad = 0; + for(size_t ih = oh; ih < (oh + koh); ++ih) + { + size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + for(size_t iw = ow; iw < (ow + kow); ++iw) + { + size_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW; + grad += static_cast<float>( + output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih, iw})]) / + (kh * kw); + } + } + + input_grad[input_grad_tv.get_tensor_view_idx({n, c, h, w})] = static_cast<T>(grad); + }); +} + +template <class T> +void cpu_adaptiveavgpool_backward_3d(tensor<T> output_grad, + tensor<T>& input_grad, + size_t C, + size_t D, + size_t H, + size_t W, + size_t OD, + size_t OH, + size_t OW) +{ + auto numel = input_grad.desc.GetElementSize(); + + auto output_grad_tv = miopen::get_inner_expanded_tv<5>(output_grad.desc); + auto input_grad_tv = miopen::get_inner_expanded_tv<5>(input_grad.desc); + + par_ford(numel)([&](size_t gid) { + size_t ncdh = gid / W, w = gid % W; + size_t ncd = ncdh / H, h = ncdh % H; + size_t nc = ncd / D, d = ncd % D; + size_t n = nc / C, c = nc % C; + + size_t od = (d * OD) / D; + size_t kod = ((d + 1) * OD + D - 1) / D - od; + + size_t oh = (h * OH) / H; + size_t koh = ((h + 1) * OH + H - 1) / H - oh; + + size_t ow = (w * OW) / W; + size_t kow = ((w + 1) * OW + W - 1) / W - ow; + + float grad = 0; + for(size_t id = od; id < (od + kod); ++id) + { + size_t kd = ((id + 1) * D + OD - 1) / OD - (id * D) / OD; + for(size_t ih = oh; ih < (oh + koh); ++ih) + { + size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH; + for(size_t iw = ow; iw < (ow + kow); ++iw) + { + size_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW; + grad += + static_cast<float>( + output_grad[output_grad_tv.get_tensor_view_idx({n, c, id, ih, iw})]) / + (kd * kh * kw); + } + } + } + + input_grad[input_grad_tv.get_tensor_view_idx({n, c, d, h, w})] = static_cast<T>(grad); + }); +} diff --git a/test/gtest/adaptiveavgpool.cpp b/test/gtest/adaptiveavgpool.cpp new file mode 100644 index 0000000000..b09c286b15 --- /dev/null +++ b/test/gtest/adaptiveavgpool.cpp @@ -0,0 +1,94 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "adaptiveavgpool.hpp" +#include "gtest/gtest.h" +using float16 = half_float::half; + +// FORWARD TEST +using GPU_AdaptiveAvgPool_fwd_FP32 = AdaptiveAvgPoolTestFwd<float>; +using GPU_AdaptiveAvgPool_fwd_FP16 = AdaptiveAvgPoolTestFwd<float16>; +using GPU_AdaptiveAvgPool_fwd_BFP16 = AdaptiveAvgPoolTestFwd<bfloat16>; + +TEST_P(GPU_AdaptiveAvgPool_fwd_FP32, AdaptiveAvgPoolTestFwd) +{ + RunTest(); + Verify(); +}; + +TEST_P(GPU_AdaptiveAvgPool_fwd_FP16, AdaptiveAvgPoolTestFwd) +{ + RunTest(); + Verify(); +}; + +TEST_P(GPU_AdaptiveAvgPool_fwd_BFP16, AdaptiveAvgPoolTestFwd) +{ + RunTest(); + Verify(); +}; + +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_AdaptiveAvgPool_fwd_FP32, + testing::ValuesIn(AdaptiveAvgPoolTestConfigsFwdFp32())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_AdaptiveAvgPool_fwd_FP16, + testing::ValuesIn(AdaptiveAvgPoolTestConfigsFwdFp16())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_AdaptiveAvgPool_fwd_BFP16, + testing::ValuesIn(AdaptiveAvgPoolTestConfigsFwdBfp16())); + +// BACKWARD TEST +using GPU_AdaptiveAvgPool_bwd_FP32 = AdaptiveAvgPoolTestBwd<float>; +using GPU_AdaptiveAvgPool_bwd_FP16 = AdaptiveAvgPoolTestBwd<float16>; +using GPU_AdaptiveAvgPool_bwd_BFP16 = AdaptiveAvgPoolTestBwd<bfloat16>; + +TEST_P(GPU_AdaptiveAvgPool_bwd_FP32, AdaptiveAvgPoolTestBwd) +{ + RunTest(); + Verify(); +}; + +TEST_P(GPU_AdaptiveAvgPool_bwd_FP16, AdaptiveAvgPoolTestBwd) +{ + RunTest(); + Verify(); +}; + +TEST_P(GPU_AdaptiveAvgPool_bwd_BFP16, AdaptiveAvgPoolTestBwd) +{ + RunTest(); + Verify(); +}; + +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_AdaptiveAvgPool_bwd_FP32, + testing::ValuesIn(AdaptiveAvgPoolTestConfigsBwdFp32())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_AdaptiveAvgPool_bwd_FP16, + testing::ValuesIn(AdaptiveAvgPoolTestConfigsBwdFp16())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_AdaptiveAvgPool_bwd_BFP16, + testing::ValuesIn(AdaptiveAvgPoolTestConfigsBwdBfp16())); diff --git a/test/gtest/adaptiveavgpool.hpp b/test/gtest/adaptiveavgpool.hpp new file mode 100644 index 0000000000..cf2e1fa5dd --- /dev/null +++ b/test/gtest/adaptiveavgpool.hpp @@ -0,0 +1,378 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "cpu_adaptiveavgpool.hpp" +#include "get_handle.hpp" +#include "tensor_holder.hpp" +#include "verify.hpp" +#include <gtest/gtest.h> +#include <iostream> +#include <miopen/adaptiveavgpool.hpp> +#include <miopen/miopen.h> +#include <vector> + +template <class T> +inline std::ostream& operator<<(std::ostream& os, const std::vector<T>& v) +{ + os << '{'; + for(size_t i = 0; i < v.size(); ++i) + { + if(i != 0) + os << ','; + os << v[i]; + } + os << '}'; + return os; +} + +struct AdaptiveAvgPoolTestCase +{ + std::vector<size_t> input_dims; + std::vector<size_t> output_dims; + bool is_contiguous = true; + + friend std::ostream& operator<<(std::ostream& os, const AdaptiveAvgPoolTestCase& tc) + { + return os << " input_dims:" << tc.input_dims << " output_dims:" << tc.output_dims + << "is_contiguous:" << tc.is_contiguous; + } + + std::vector<size_t> GetInput() const { return input_dims; } + std::vector<size_t> GetOutput() const { return output_dims; } + + std::vector<size_t> ComputeStrides(std::vector<size_t> inputDim) const + { + if(!is_contiguous) + std::swap(inputDim.front(), inputDim.back()); + std::vector<size_t> strides(inputDim.size()); + strides.back() = 1; + for(int i = inputDim.size() - 2; i >= 0; --i) + strides[i] = strides[i + 1] * inputDim[i + 1]; + if(!is_contiguous) + std::swap(strides.front(), strides.back()); + return strides; + } +}; + +inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsFwdFp32() +{ + return { + {{64, 768, 17}, {10}, false}, + {{64, 768, 17}, {10}, true}, + {{64, 78, 17, 17}, {10, 10}, false}, + {{64, 78, 17, 17}, {10, 10}, true}, + {{6, 18, 18, 18, 18}, {5, 5, 5}, false}, + {{6, 18, 18, 18, 18}, {5, 5, 5}, true}, + }; +} + +inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsFwdFp16() +{ + return { + {{64, 768, 17}, {10}, false}, + {{64, 768, 17}, {10}, true}, + {{64, 78, 17, 17}, {10, 10}, false}, + {{64, 78, 17, 17}, {10, 10}, true}, + {{6, 18, 18, 18, 18}, {5, 5, 5}, false}, + {{6, 18, 18, 18, 18}, {5, 5, 5}, true}, + }; +} + +inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsFwdBfp16() +{ + return { + {{64, 768, 17}, {10}, false}, + {{64, 768, 17}, {10}, true}, + {{64, 78, 17, 17}, {10, 10}, false}, + {{64, 78, 17, 17}, {10, 10}, true}, + {{6, 18, 18, 18, 18}, {5, 5, 5}, false}, + {{6, 18, 18, 18, 18}, {5, 5, 5}, true}, + }; +} + +inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsBwdFp32() +{ + return { + {{64, 768, 17}, {10}, false}, + {{64, 768, 17}, {10}, true}, + {{64, 206, 17, 17}, {10, 10}, false}, + {{6, 18, 18, 18, 18}, {5, 5, 5}, false}, + {{6, 18, 18, 18, 18}, {18, 18, 18}, true}, + }; +} + +inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsBwdFp16() +{ + return { + {{64, 768, 17}, {10}, false}, + {{64, 768, 17}, {10}, true}, + {{64, 28, 35, 35}, {35, 35}, false}, + {{6, 28, 35, 35, 35}, {10, 10, 10}, false}, + {{6, 28, 35, 35, 35}, {35, 35, 35}, true}, + }; +} + +inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsBwdBfp16() +{ + return { + {{64, 768, 17}, {10}, false}, + {{64, 768, 17}, {10}, true}, + {{64, 208, 9, 9}, {7, 7}, false}, + {{6, 18, 12, 12, 12}, {5, 5, 5}, false}, + {{6, 18, 12, 12, 12}, {12, 12, 12}, true}, + }; +} + +// FORWARD TEST +template <typename T = float> +struct AdaptiveAvgPoolTestFwd : public ::testing::TestWithParam<AdaptiveAvgPoolTestCase> +{ +protected: + void SetUp() override + { + auto&& handle = get_handle(); + adaptiveavgpool_config = GetParam(); + auto in_dim = adaptiveavgpool_config.GetInput(); + auto in_strides = adaptiveavgpool_config.ComputeStrides(in_dim); + auto out_dim = adaptiveavgpool_config.GetOutput(); + N = in_dim[0]; + C = in_dim[1]; + std::vector<size_t> out_dim_final = {N, C}; + if(in_dim.size() == 3) + { + D = 1; + H = in_dim[2]; + W = 1; + + OD = 1; + OH = out_dim[0]; + OW = 1; + out_dim_final.push_back(OH); + } + else if(in_dim.size() == 4) + { + D = 1; + H = in_dim[2]; + W = in_dim[3]; + + OD = 1; + OH = out_dim[0]; + OW = out_dim[1]; + out_dim_final.push_back(OH); + out_dim_final.push_back(OW); + } + else if(in_dim.size() == 5) + { + D = in_dim[2]; + H = in_dim[3]; + W = in_dim[4]; + + OD = out_dim[0]; + OH = out_dim[1]; + OW = out_dim[2]; + out_dim_final.push_back(OD); + out_dim_final.push_back(OH); + out_dim_final.push_back(OW); + } + + auto gen_input_value = [](auto...) { + return prng::gen_A_to_B<T>(static_cast<T>(-10.0f), static_cast<T>(10.0f)); + }; + input = tensor<T>{in_dim, in_strides}.generate(gen_input_value); + + output = tensor<T>{out_dim_final}; + std::fill(output.begin(), output.end(), std::numeric_limits<T>::quiet_NaN()); + + ref_output = tensor<T>{out_dim_final}; + std::fill(ref_output.begin(), ref_output.end(), std::numeric_limits<T>::quiet_NaN()); + + input_dev = handle.Write(input.data); + output_dev = handle.Write(output.data); + } + + void RunTest() + { + auto&& handle = get_handle(); + miopenStatus_t status; + + auto dims = input.desc.GetNumDims(); + if(dims == 3) + { + cpu_adaptiveavgpool_forward_1d<T>(input, ref_output, C, H, OH); + } + else if(dims == 4) + { + cpu_adaptiveavgpool_forward_2d<T>(input, ref_output, C, H, W, OH, OW); + } + else if(dims == 5) + { + cpu_adaptiveavgpool_forward_3d<T>(input, ref_output, C, D, H, W, OD, OH, OW); + } + status = miopen::adaptiveavgpool::AdaptiveAvgPoolForward( + handle, input.desc, input_dev.get(), output.desc, output_dev.get()); + ASSERT_EQ(status, miopenStatusSuccess); + + output.data = handle.Read<T>(output_dev, output.data.size()); + } + + void Verify() + { + double threshold = std::numeric_limits<T>::epsilon(); + + auto error = miopen::rms_range(ref_output, output); + + ASSERT_EQ(miopen::range_distance(ref_output), miopen::range_distance(output)); + EXPECT_LT(error, threshold * 10) << "Error forward Output beyond 10xthreshold : " << error + << " Tolerance: " << threshold * 10; + } + AdaptiveAvgPoolTestCase adaptiveavgpool_config; + + tensor<T> input; + tensor<T> output; + tensor<T> ref_output; + + size_t N, C, D, H, W, OD, OH, OW; + + miopen::Allocator::ManageDataPtr input_dev; + miopen::Allocator::ManageDataPtr output_dev; +}; + +// BACKWARD TEST +template <typename T = float> +struct AdaptiveAvgPoolTestBwd : public ::testing::TestWithParam<AdaptiveAvgPoolTestCase> +{ +protected: + void SetUp() override + { + auto&& handle = get_handle(); + adaptiveavgpool_config = GetParam(); + auto in_grad_dim = adaptiveavgpool_config.GetInput(); + auto out_grad_dim = adaptiveavgpool_config.GetOutput(); + N = in_grad_dim[0]; + C = in_grad_dim[1]; + std::vector<size_t> out_grad_dim_final = {N, C}; + + if(in_grad_dim.size() == 3) + { + D = 1; + H = in_grad_dim[2]; + W = 1; + + OD = 1; + OH = out_grad_dim[0]; + OW = 1; + out_grad_dim_final.push_back(OH); + } + else if(in_grad_dim.size() == 4) + { + D = 1; + H = in_grad_dim[2]; + W = in_grad_dim[3]; + + OD = 1; + OH = out_grad_dim[0]; + OW = out_grad_dim[1]; + out_grad_dim_final.push_back(OH); + out_grad_dim_final.push_back(OW); + } + else if(in_grad_dim.size() == 5) + { + D = in_grad_dim[2]; + H = in_grad_dim[3]; + W = in_grad_dim[4]; + + OD = out_grad_dim[0]; + OH = out_grad_dim[1]; + OW = out_grad_dim[2]; + out_grad_dim_final.push_back(OD); + out_grad_dim_final.push_back(OH); + out_grad_dim_final.push_back(OW); + } + auto out_grad_strides = adaptiveavgpool_config.ComputeStrides(out_grad_dim_final); + + auto gen_output_grad_value = [](auto...) { + return prng::gen_A_to_B<T>(static_cast<T>(-10.0f), static_cast<T>(10.0f)); + }; + output_grad = + tensor<T>{out_grad_dim_final, out_grad_strides}.generate(gen_output_grad_value); + + input_grad = tensor<T>{in_grad_dim}; + std::fill(input_grad.begin(), input_grad.end(), std::numeric_limits<T>::quiet_NaN()); + + ref_input_grad = tensor<T>{in_grad_dim}; + std::fill( + ref_input_grad.begin(), ref_input_grad.end(), std::numeric_limits<T>::quiet_NaN()); + + output_grad_dev = handle.Write(output_grad.data); + input_grad_dev = handle.Write(input_grad.data); + } + + void RunTest() + { + auto&& handle = get_handle(); + + miopenStatus_t status; + + auto dims = input_grad.desc.GetNumDims(); + if(dims == 3) + { + cpu_adaptiveavgpool_backward_1d<T>(output_grad, ref_input_grad, C, H, OH); + } + else if(dims == 4) + { + cpu_adaptiveavgpool_backward_2d<T>(output_grad, ref_input_grad, C, H, W, OH, OW); + } + else if(dims == 5) + { + cpu_adaptiveavgpool_backward_3d<T>(output_grad, ref_input_grad, C, D, H, W, OD, OH, OW); + } + status = miopen::adaptiveavgpool::AdaptiveAvgPoolBackward( + handle, output_grad.desc, output_grad_dev.get(), input_grad.desc, input_grad_dev.get()); + + ASSERT_EQ(status, miopenStatusSuccess); + + input_grad.data = handle.Read<T>(input_grad_dev, input_grad.data.size()); + } + + void Verify() + { + double threshold = std::numeric_limits<T>::epsilon(); + auto error = miopen::rms_range(ref_input_grad, input_grad); + ASSERT_EQ(miopen::range_distance(ref_input_grad), miopen::range_distance(input_grad)); + EXPECT_LT(error, threshold * 10) + << "Error backward Input Gradient beyond 10xthreshold : " << error + << " Tolerance: " << threshold * 10; + } + AdaptiveAvgPoolTestCase adaptiveavgpool_config; + + tensor<T> output_grad; + tensor<T> input_grad; + tensor<T> ref_input_grad; + + size_t N, C, D, H, W, OD, OH, OW; + + miopen::Allocator::ManageDataPtr output_grad_dev; + miopen::Allocator::ManageDataPtr input_grad_dev; +};