diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt
index 60d6fe6ce6..02d231b9fb 100644
--- a/driver/CMakeLists.txt
+++ b/driver/CMakeLists.txt
@@ -31,6 +31,7 @@ add_executable(MIOpenDriver
     conv_common.cpp
     dm_activ.cpp
     dm_adam.cpp
+    dm_adaptiveavgpool.cpp
     dm_addlayernorm.cpp
     dm_bnorm.cpp
     dm_cat.cpp
diff --git a/driver/adaptiveavgpool_driver.hpp b/driver/adaptiveavgpool_driver.hpp
new file mode 100644
index 0000000000..062c56fdce
--- /dev/null
+++ b/driver/adaptiveavgpool_driver.hpp
@@ -0,0 +1,486 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#include "InputFlags.hpp"
+#include "driver.hpp"
+#include "mloAdaptiveAvgPoolHost.hpp"
+#include "random.hpp"
+#include "tensor_driver.hpp"
+#include "timer.hpp"
+
+#include <../test/tensor_holder.hpp>
+#include <../test/verify.hpp>
+
+#include <miopen/env.hpp>
+#include <miopen/handle.hpp>
+#include <miopen/miopen.h>
+#include <miopen/tensor.hpp>
+#include <vector>
+
+template <typename Tgpu, typename Tref>
+class AdaptiveAvgPoolDriver : public Driver
+{
+public:
+    AdaptiveAvgPoolDriver() : Driver()
+    {
+        miopenCreateTensorDescriptor(&inputDesc);
+        miopenCreateTensorDescriptor(&outputDesc);
+        miopenCreateTensorDescriptor(&inputGradDesc);
+        miopenCreateTensorDescriptor(&outputGradDesc);
+
+        data_type = miopen_type<Tgpu>{};
+    }
+
+    std::vector<int> ComputeStrides(std::vector<int> input);
+    int AddCmdLineArgs() override;
+    int ParseCmdLineArgs(int argc, char* argv[]) override;
+    InputFlags& GetInputFlags() override { return inflags; }
+
+    int GetandSetData() override;
+
+    int AllocateBuffersAndCopy() override;
+
+    int RunForwardGPU() override;
+    int RunForwardCPU();
+
+    int RunBackwardGPU() override;
+    int RunBackwardCPU();
+
+    Tref GetTolerance();
+    int VerifyBackward() override;
+    int VerifyForward() override;
+    ~AdaptiveAvgPoolDriver() override
+    {
+        miopenDestroyTensorDescriptor(inputDesc);
+        miopenDestroyTensorDescriptor(outputDesc);
+        miopenDestroyTensorDescriptor(inputGradDesc);
+        miopenDestroyTensorDescriptor(outputGradDesc);
+    }
+
+private:
+    InputFlags inflags;
+
+    int forw;
+
+    miopenTensorDescriptor_t inputDesc;
+    miopenTensorDescriptor_t outputDesc;
+    miopenTensorDescriptor_t inputGradDesc;
+    miopenTensorDescriptor_t outputGradDesc;
+
+    std::unique_ptr<GPUMem> input_dev;
+    std::unique_ptr<GPUMem> output_dev;
+    std::unique_ptr<GPUMem> input_grad_dev;
+    std::unique_ptr<GPUMem> output_grad_dev;
+
+    std::vector<Tgpu> input;
+    std::vector<Tgpu> output;
+    std::vector<Tref> output_host;
+    std::vector<Tgpu> input_grad;
+    std::vector<Tref> input_grad_host;
+    std::vector<Tgpu> output_grad;
+
+    size_t N = 1, C = 1, D = 1, H = 1, W = 1, OD = 1, OH = 1, OW = 1;
+
+    std::vector<int> in_dim;
+    std::vector<int> out_dim;
+    bool isContiguous;
+};
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::ParseCmdLineArgs(int argc, char* argv[])
+{
+    inflags.Parse(argc, argv);
+    isContiguous = inflags.GetValueInt("is-contiguous") == 1 ? true : false;
+
+    if(inflags.GetValueInt("time") == 1)
+    {
+        miopenEnableProfiling(GetHandle(), true);
+    }
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::GetandSetData()
+{
+    in_dim                     = inflags.GetValueTensor("input_dims").lengths;
+    std::vector<int> in_stride = ComputeStrides(in_dim);
+    out_dim                    = inflags.GetValueTensor("output_dims").lengths;
+    if(in_dim.size() != out_dim.size() + 2)
+    {
+        MIOPEN_THROW(miopenStatusBadParm,
+                     "AdaptiveAvgPool: Input and output tensor sizes do not match.");
+    }
+    N                              = in_dim[0];
+    C                              = in_dim[1];
+    std::vector<int> out_dim_final = {N, C};
+    if(in_dim.size() == 3)
+    {
+        H = in_dim[2];
+
+        OH = out_dim[0];
+        out_dim_final.push_back(OH);
+    }
+    else if(in_dim.size() == 4)
+    {
+        H = in_dim[2];
+        W = in_dim[3];
+
+        OH = out_dim[0];
+        OW = out_dim[1];
+        out_dim_final.push_back(OH);
+        out_dim_final.push_back(OW);
+    }
+    else if(in_dim.size() == 5)
+    {
+        D = in_dim[2];
+        H = in_dim[3];
+        W = in_dim[4];
+
+        OD = out_dim[0];
+        OH = out_dim[1];
+        OW = out_dim[2];
+        out_dim_final.push_back(OD);
+        out_dim_final.push_back(OH);
+        out_dim_final.push_back(OW);
+    }
+    std::vector<int> out_grad_stride = ComputeStrides(out_dim_final);
+    if(SetTensorNd(inputDesc, in_dim, in_stride, data_type) != miopenStatusSuccess)
+        MIOPEN_THROW("Error parsing input tensor: " + inflags.GetValueStr("input_dims") + ".");
+    if(SetTensorNd(outputDesc, out_dim_final, data_type) != miopenStatusSuccess)
+        MIOPEN_THROW("Error parsing output tensor: " + inflags.GetValueStr("output_dims") + ".");
+    if(SetTensorNd(outputGradDesc, out_dim_final, out_grad_stride, data_type) !=
+       miopenStatusSuccess)
+        MIOPEN_THROW("Error parsing output grad tensor: " + inflags.GetValueStr("output_dims") +
+                     ".");
+    if(SetTensorNd(inputGradDesc, in_dim, data_type) != miopenStatusSuccess)
+        MIOPEN_THROW("Error parsing input grad tensor: " + inflags.GetValueStr("input_dims") + ".");
+
+    return miopenStatusSuccess;
+}
+
+// Equivalent to: tensor.tranpose(0, -1).contiguous().tranpose(0, -1) incase contiguous = False
+template <typename Tgpu, typename Tref>
+std::vector<int> AdaptiveAvgPoolDriver<Tgpu, Tref>::ComputeStrides(std::vector<int> inputDim)
+{
+    if(!isContiguous)
+        std::swap(inputDim.front(), inputDim.back());
+    std::vector<int> strides(inputDim.size());
+    strides.back() = 1;
+    for(int i = inputDim.size() - 2; i >= 0; --i)
+        strides[i] = strides[i + 1] * inputDim[i + 1];
+    if(!isContiguous)
+        std::swap(strides.front(), strides.back());
+    return strides;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::AddCmdLineArgs()
+{
+    inflags.AddInputFlag("forw", 'F', "1", "Run only Forward AdaptiveAvgPool (Default=1)", "int");
+    inflags.AddTensorFlag(
+        "input_dims",
+        'D',
+        "2x3x7x9x9",
+        "The dimensional lengths of the input tensor: N,C,D,H,W... Example: 2x3x7x9x9.");
+    inflags.AddTensorFlag(
+        "output_dims",
+        'S',
+        "5x5x5",
+        "The dimensional lengths of the output tensor: OD,OH,OW,... Example: 5x5x5.");
+    inflags.AddInputFlag("is-contiguous", 'c', "1", "is-contiguous (Default=1)", "int");
+    inflags.AddInputFlag("iter", 'i', "10", "Number of Iterations (Default=10)", "int");
+    inflags.AddInputFlag("verify", 'V', "1", "Verify (Default=1)", "int");
+    inflags.AddInputFlag("time", 't', "1", "Time (Default=1)", "int");
+    inflags.AddInputFlag(
+        "wall", 'w', "0", "Wall-clock Time, Requires time == 1 (Default=0)", "int");
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
+{
+    size_t input_sz  = GetTensorSize(inputDesc);
+    size_t output_sz = GetTensorSize(outputDesc);
+
+    uint32_t ctx = 0;
+
+    input_dev       = std::unique_ptr<GPUMem>(new GPUMem(ctx, input_sz, sizeof(Tgpu)));
+    output_dev      = std::unique_ptr<GPUMem>(new GPUMem(ctx, output_sz, sizeof(Tgpu)));
+    input_grad_dev  = std::unique_ptr<GPUMem>(new GPUMem(ctx, input_sz, sizeof(Tgpu)));
+    output_grad_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, output_sz, sizeof(Tgpu)));
+
+    input       = std::vector<Tgpu>(input_sz, static_cast<Tgpu>(0));
+    output      = std::vector<Tgpu>(output_sz, static_cast<Tgpu>(0));
+    output_host = std::vector<Tref>(output_sz, static_cast<Tref>(0));
+
+    input_grad      = std::vector<Tgpu>(input_sz, static_cast<Tgpu>(0));
+    input_grad_host = std::vector<Tref>(input_sz, static_cast<Tref>(0));
+    output_grad     = std::vector<Tgpu>(output_sz, static_cast<Tgpu>(0));
+
+    int status;
+
+    for(int i = 0; i < input_sz; i++)
+    {
+        input[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(-10.0f), static_cast<Tgpu>(10.0f));
+    }
+    status = input_dev->ToGPU(q, input.data());
+
+    status |= output_dev->ToGPU(q, output.data());
+
+    status |= input_grad_dev->ToGPU(q, input_grad.data());
+
+    for(int i = 0; i < output_sz; i++)
+    {
+        output_grad[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(-1.0), static_cast<Tgpu>(1.0));
+    }
+    status |= output_grad_dev->ToGPU(q, output_grad.data());
+
+    if(status != 0)
+    {
+        std::cout << "Error copying data to GPU\n" << std::endl;
+        return miopenStatusAllocFailed;
+    }
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::RunForwardGPU()
+{
+    float kernel_total_time = 0.0;
+    float kernel_first_time = 0.0;
+
+    Timer t;
+    START_TIME
+
+    for(int i = 0; i < inflags.GetValueInt("iter"); i++)
+    {
+        auto status = miopenAdaptiveAvgPoolForward(
+            GetHandle(), inputDesc, input_dev->GetMem(), outputDesc, output_dev->GetMem());
+        MIOPEN_THROW_IF(status != miopenStatusSuccess, "Error in miopenAdaptiveAvgPoolForward");
+
+        float time = 0.0;
+        miopenGetKernelTime(GetHandle(), &time);
+        kernel_total_time += time;
+        if(i == 0)
+            kernel_first_time = time;
+    }
+
+    if(inflags.GetValueInt("time") == 1)
+    {
+        STOP_TIME
+        int iter = inflags.GetValueInt("iter");
+        if(WALL_CLOCK)
+            std::cout << "Wall-clock Time Forward AdaptiveAvgPool Elapsed: "
+                      << t.gettime_ms() / iter << " ms" << std::endl;
+
+        float kernel_average_time =
+            iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time;
+        std::cout << "GPU Kernel Time Forward AdaptiveAvgPool Elapsed: " << kernel_average_time
+                  << " ms" << std::endl;
+    }
+
+    if(output_dev->FromGPU(GetStream(), output.data()) != 0)
+    {
+        std::cerr << "Error copying (output_dev) from GPU, size: " << output_dev->GetSize()
+                  << std::endl;
+        return miopenStatusInternalError;
+    }
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::RunForwardCPU()
+{
+    int status = miopenStatusSuccess;
+
+    if(in_dim.size() == 3)
+    {
+        status = mloAdaptiveAvgPoolForward1dRunHost<Tgpu, Tref>(
+            inputDesc, outputDesc, input.data(), output_host.data(), C, H, OH);
+        MIOPEN_THROW_IF(status != miopenStatusSuccess,
+                        "Error in mloAdaptiveAvgPoolForward1dRunHost");
+    }
+    else if(in_dim.size() == 4)
+    {
+        status = mloAdaptiveAvgPoolForward2dRunHost<Tgpu, Tref>(
+            inputDesc, outputDesc, input.data(), output_host.data(), C, H, W, OH, OW);
+        MIOPEN_THROW_IF(status != miopenStatusSuccess,
+                        "Error in mloAdaptiveAvgPoolForward2dRunHost");
+    }
+    else if(in_dim.size() == 5)
+    {
+        status = mloAdaptiveAvgPoolForward3dRunHost<Tgpu, Tref>(
+            inputDesc, outputDesc, input.data(), output_host.data(), C, D, H, W, OD, OH, OW);
+        MIOPEN_THROW_IF(status != miopenStatusSuccess,
+                        "Error in mloAdaptiveAvgPoolForward3dRunHost");
+    }
+    return status;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::RunBackwardGPU()
+{
+    float kernel_total_time = 0.0;
+    float kernel_first_time = 0.0;
+
+    Timer t;
+    START_TIME
+
+    for(int i = 0; i < inflags.GetValueInt("iter"); i++)
+    {
+        auto status = miopenAdaptiveAvgPoolBackward(GetHandle(),
+                                                    outputGradDesc,
+                                                    output_grad_dev->GetMem(),
+                                                    inputGradDesc,
+                                                    input_grad_dev->GetMem());
+        MIOPEN_THROW_IF(status != miopenStatusSuccess, "Error in miopenAdaptiveAvgPoolBackward");
+
+        float time = 0.0;
+        miopenGetKernelTime(GetHandle(), &time);
+        kernel_total_time += time;
+        if(i == 0)
+            kernel_first_time = time;
+    }
+
+    if(inflags.GetValueInt("time") == 1)
+    {
+        STOP_TIME
+        int iter = inflags.GetValueInt("iter");
+        if(WALL_CLOCK)
+            std::cout << "Wall-clock Time Backward AdaptiveAvgPool Elapsed: "
+                      << t.gettime_ms() / iter << " ms" << std::endl;
+
+        float kernel_average_time =
+            iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time;
+        std::cout << "GPU Kernel Time Backward AdaptiveAvgPool Elapsed: " << kernel_average_time
+                  << " ms" << std::endl;
+    }
+
+    if(input_grad_dev->FromGPU(GetStream(), input_grad.data()) != 0)
+    {
+        std::cerr << "Error copying (input_grad_dev) from GPU, size: " << input_grad_dev->GetSize()
+                  << std::endl;
+        return miopenStatusInternalError;
+    }
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::RunBackwardCPU()
+{
+    int status = miopenStatusSuccess;
+
+    if(in_dim.size() == 3)
+    {
+        status = mloAdaptiveAvgPoolBackward1dRunHost<Tgpu, Tref>(
+            outputGradDesc, inputGradDesc, output_grad.data(), input_grad_host.data(), C, H, OH);
+        MIOPEN_THROW_IF(status != miopenStatusSuccess,
+                        "Error in mloAdaptiveAvgPoolBackward1dRunHost");
+    }
+    else if(in_dim.size() == 4)
+    {
+        status = mloAdaptiveAvgPoolBackward2dRunHost<Tgpu, Tref>(outputGradDesc,
+                                                                 inputGradDesc,
+                                                                 output_grad.data(),
+                                                                 input_grad_host.data(),
+                                                                 C,
+                                                                 H,
+                                                                 W,
+                                                                 OH,
+                                                                 OW);
+        MIOPEN_THROW_IF(status != miopenStatusSuccess,
+                        "Error in mloAdaptiveAvgPoolBackward2dRunHost");
+    }
+    else if(in_dim.size() == 5)
+    {
+        status = mloAdaptiveAvgPoolBackward3dRunHost<Tgpu, Tref>(outputGradDesc,
+                                                                 inputGradDesc,
+                                                                 output_grad.data(),
+                                                                 input_grad_host.data(),
+                                                                 C,
+                                                                 D,
+                                                                 H,
+                                                                 W,
+                                                                 OD,
+                                                                 OH,
+                                                                 OW);
+        MIOPEN_THROW_IF(status != miopenStatusSuccess,
+                        "Error in mloAdaptiveAvgPoolBackward3dRunHost");
+    }
+    return status;
+}
+
+template <typename Tgpu, typename Tref>
+Tref AdaptiveAvgPoolDriver<Tgpu, Tref>::GetTolerance()
+{
+    Tref tolerance = std::numeric_limits<Tgpu>::epsilon() * 10;
+    return tolerance;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::VerifyForward()
+{
+    RunForwardCPU();
+    const Tref tolerance = GetTolerance();
+    auto error           = miopen::rms_range(output_host, output);
+
+    if(!std::isfinite(error) || error > tolerance)
+    {
+        std::cout << "Forward AdaptiveAvgPool FAILED: " << error << std::endl;
+        return EC_VerifyFwd;
+    }
+    else
+    {
+        std::cout << "Forward AdaptiveAvgPool Verifies on CPU and GPU (err=" << error << ")"
+                  << std::endl;
+    }
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int AdaptiveAvgPoolDriver<Tgpu, Tref>::VerifyBackward()
+{
+    RunBackwardCPU();
+    const Tref tolerance = GetTolerance();
+    auto error           = miopen::rms_range(input_grad_host, input_grad);
+
+    if(!std::isfinite(error) || error > tolerance)
+    {
+        std::cout << "Backward AdaptiveAvgPool FAILED: " << error << std::endl;
+        return EC_VerifyBwd;
+    }
+    else
+    {
+        std::cout << "Backward AdaptiveAvgPool Verifies on CPU and GPU (err=" << error << ")"
+                  << std::endl;
+    }
+    return miopenStatusSuccess;
+}
diff --git a/driver/dm_adaptiveavgpool.cpp b/driver/dm_adaptiveavgpool.cpp
new file mode 100644
index 0000000000..b6e53ba17e
--- /dev/null
+++ b/driver/dm_adaptiveavgpool.cpp
@@ -0,0 +1,40 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include "registry_driver_maker.hpp"
+#include "adaptiveavgpool_driver.hpp"
+
+static Driver* makeDriver(const std::string& base_arg)
+{
+    if(base_arg == "adaptiveavgpool")
+        return new AdaptiveAvgPoolDriver<float, float>();
+    if(base_arg == "adaptiveavgpoolfp16")
+        return new AdaptiveAvgPoolDriver<float16, float>();
+    if(base_arg == "adaptiveavgpoolbfp16")
+        return new AdaptiveAvgPoolDriver<bfloat16, float>();
+    return nullptr;
+}
+
+REGISTER_DRIVER_MAKER(makeDriver);
diff --git a/driver/driver.hpp b/driver/driver.hpp
index f3ae8fa039..6b21c3fe6e 100644
--- a/driver/driver.hpp
+++ b/driver/driver.hpp
@@ -314,7 +314,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz)
            "adamw[fp16], ampadamw, transformersadamw[fp16], transformersampadamw, "
            "getitem[bfp16|fp16], reducecalculation[bfp16|fp16], rope[bfp16|fp16], "
            "prelu[bfp16|fp16], kthvalue[bfp16|fp16], glu[bfp16|fp16], softmarginloss[bfp16|fp16], "
-           "multimarginloss[bfp16|fp16]\n");
+           "multimarginloss[bfp16|fp16], adaptiveavgpool[bfp16|fp16]\n");
     exit(0); // NOLINT (concurrency-mt-unsafe)
 }
 
@@ -352,6 +352,7 @@ inline std::string ParseBaseArg(int argc, char* argv[])
        arg != "kthvaluebfp16" && arg != "glu" && arg != "glufp16" && arg != "glubfp16" &&
        arg != "softmarginloss" && arg != "softmarginlossfp16" && arg != "softmarginlossbfp16" &&
        arg != "multimarginloss" && arg != "multimarginlossfp16" && arg != "multimarginlossbfp16" &&
+       arg != "adaptiveavgpool" && arg != "adaptiveavgpoolfp16" && arg != "adaptiveavgpoolbfp16" &&
        arg != "--version")
     {
         printf("FAILED: Invalid Base Input Argument\n");
diff --git a/driver/mloAdaptiveAvgPoolHost.hpp b/driver/mloAdaptiveAvgPoolHost.hpp
new file mode 100644
index 0000000000..73848ca38f
--- /dev/null
+++ b/driver/mloAdaptiveAvgPoolHost.hpp
@@ -0,0 +1,292 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#include <cmath>
+#include <miopen/tensor.hpp>
+#include <miopen/tensor_view_utils.hpp>
+#include <../test/ford.hpp>
+
+template <typename Tgpu, typename Tcheck>
+int32_t mloAdaptiveAvgPoolForward1dRunHost(const miopenTensorDescriptor_t inputDesc,
+                                           const miopenTensorDescriptor_t outputDesc,
+                                           const Tgpu* input,
+                                           Tcheck* output,
+                                           const size_t C,
+                                           const size_t H,
+                                           const size_t OH)
+{
+    auto numel = miopen::deref(outputDesc).GetElementSize();
+
+    auto input_tv  = miopen::get_inner_expanded_tv<3>(miopen::deref(inputDesc));
+    auto output_tv = miopen::get_inner_expanded_tv<3>(miopen::deref(outputDesc));
+
+    par_ford(numel)([&](size_t gid) {
+        size_t nc = gid / OH, oh = gid % OH;
+        size_t n = nc / C, c = nc % C;
+
+        size_t h  = oh * H / OH;
+        size_t kh = (((oh + 1) * H + OH - 1) / OH) - h;
+
+        float sum = 0;
+        for(size_t ih = h; ih < (h + kh); ++ih)
+        {
+            sum += static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, ih})]);
+        }
+
+        output[output_tv.get_tensor_view_idx({n, c, oh})] = static_cast<Tcheck>(sum / kh);
+    });
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tcheck>
+int32_t mloAdaptiveAvgPoolForward2dRunHost(const miopenTensorDescriptor_t inputDesc,
+                                           const miopenTensorDescriptor_t outputDesc,
+                                           const Tgpu* input,
+                                           Tcheck* output,
+                                           const size_t C,
+                                           const size_t H,
+                                           const size_t W,
+                                           const size_t OH,
+                                           const size_t OW)
+{
+    auto numel = miopen::deref(outputDesc).GetElementSize();
+
+    auto input_tv  = miopen::get_inner_expanded_tv<4>(miopen::deref(inputDesc));
+    auto output_tv = miopen::get_inner_expanded_tv<4>(miopen::deref(outputDesc));
+
+    par_ford(numel)([&](size_t gid) {
+        size_t ncoh = gid / OW, ow = gid % OW;
+        size_t nc = ncoh / OH, oh = ncoh % OH;
+        size_t n = nc / C, c = nc % C;
+
+        size_t h  = (oh * H) / OH;
+        size_t kh = (((oh + 1) * H + OH - 1) / OH) - h;
+
+        size_t w  = (ow * W) / OW;
+        size_t kw = (((ow + 1) * W + OW - 1) / OW) - w;
+
+        float divider = static_cast<float>(kh * kw);
+        float sum     = 0;
+        for(size_t ih = h; ih < (h + kh); ++ih)
+        {
+            for(size_t iw = w; iw < (w + kw); ++iw)
+            {
+                sum += static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, ih, iw})]);
+            }
+        }
+
+        output[output_tv.get_tensor_view_idx({n, c, oh, ow})] = static_cast<Tcheck>(sum / divider);
+    });
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tcheck>
+int32_t mloAdaptiveAvgPoolForward3dRunHost(const miopenTensorDescriptor_t inputDesc,
+                                           const miopenTensorDescriptor_t outputDesc,
+                                           const Tgpu* input,
+                                           Tcheck* output,
+                                           const size_t C,
+                                           const size_t D,
+                                           const size_t H,
+                                           const size_t W,
+                                           const size_t OD,
+                                           const size_t OH,
+                                           const size_t OW)
+{
+    auto numel = miopen::deref(outputDesc).GetElementSize();
+
+    auto input_tv  = miopen::get_inner_expanded_tv<5>(miopen::deref(inputDesc));
+    auto output_tv = miopen::get_inner_expanded_tv<5>(miopen::deref(outputDesc));
+
+    par_ford(numel)([&](size_t gid) {
+        size_t ncodoh = gid / OW, ow = gid % OW;
+        size_t ncod = ncodoh / OH, oh = ncodoh % OH;
+        size_t nc = ncod / OD, od = ncod % OD;
+        size_t n = nc / C, c = nc % C;
+
+        size_t d  = (od * D) / OD;
+        size_t kd = ((od + 1) * D + OD - 1) / OD - d;
+
+        size_t h  = (oh * H) / OH;
+        size_t kh = ((oh + 1) * H + OH - 1) / OH - h;
+
+        size_t w  = (ow * W) / OW;
+        size_t kw = ((ow + 1) * W + OW - 1) / OW - w;
+
+        float sum = 0;
+        for(size_t id = d; id < (d + kd); ++id)
+        {
+            for(size_t ih = h; ih < (h + kh); ++ih)
+            {
+                for(size_t iw = w; iw < (w + kw); ++iw)
+                {
+                    sum +=
+                        static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, id, ih, iw})]);
+                }
+            }
+        }
+
+        output[output_tv.get_tensor_view_idx({n, c, od, oh, ow})] =
+            static_cast<Tcheck>(sum / (kd * kh * kw));
+    });
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tcheck>
+int32_t mloAdaptiveAvgPoolBackward1dRunHost(const miopenTensorDescriptor_t outputGradDesc,
+                                            const miopenTensorDescriptor_t inputGradDesc,
+                                            const Tgpu* output_grad,
+                                            Tcheck* input_grad,
+                                            const size_t C,
+                                            const size_t H,
+                                            const size_t OH)
+{
+    auto numel = miopen::deref(inputGradDesc).GetElementSize();
+
+    auto output_grad_tv = miopen::get_inner_expanded_tv<3>(miopen::deref(outputGradDesc));
+    auto input_grad_tv  = miopen::get_inner_expanded_tv<3>(miopen::deref(inputGradDesc));
+
+    par_ford(numel)([&](size_t gid) {
+        size_t nc = gid / H, h = gid % H;
+        size_t n = nc / C, c = nc % C;
+
+        size_t oh  = (h * OH) / H;
+        size_t koh = (((h + 1) * OH + H - 1) / H) - oh;
+
+        float grad = 0;
+        for(size_t ih = oh; ih < (oh + koh); ++ih)
+        {
+            size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+            grad +=
+                static_cast<float>(output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih})]) /
+                kh;
+        }
+        input_grad[input_grad_tv.get_tensor_view_idx({n, c, h})] = static_cast<Tcheck>(grad);
+    });
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tcheck>
+int32_t mloAdaptiveAvgPoolBackward2dRunHost(const miopenTensorDescriptor_t outputGradDesc,
+                                            const miopenTensorDescriptor_t inputGradDesc,
+                                            const Tgpu* output_grad,
+                                            Tcheck* input_grad,
+                                            const size_t C,
+                                            const size_t H,
+                                            const size_t W,
+                                            const size_t OH,
+                                            const size_t OW)
+{
+    auto numel = miopen::deref(inputGradDesc).GetElementSize();
+
+    auto output_grad_tv = miopen::get_inner_expanded_tv<4>(miopen::deref(outputGradDesc));
+    auto input_grad_tv  = miopen::get_inner_expanded_tv<4>(miopen::deref(inputGradDesc));
+
+    par_ford(numel)([&](size_t gid) {
+        size_t nch = gid / W, w = gid % W;
+        size_t nc = nch / H, h = nch % H;
+        size_t n = nc / C, c = nc % C;
+
+        size_t oh  = (h * OH) / H;
+        size_t koh = ((h + 1) * OH + H - 1) / H - oh;
+
+        size_t ow  = (w * OW) / W;
+        size_t kow = ((w + 1) * OW + W - 1) / W - ow;
+
+        float grad = 0;
+        for(size_t ih = oh; ih < (oh + koh); ++ih)
+        {
+            size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+            for(size_t iw = ow; iw < (ow + kow); ++iw)
+            {
+                size_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW;
+                grad += static_cast<float>(
+                            output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih, iw})]) /
+                        (kh * kw);
+            }
+        }
+
+        input_grad[input_grad_tv.get_tensor_view_idx({n, c, h, w})] = static_cast<Tcheck>(grad);
+    });
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tcheck>
+int32_t mloAdaptiveAvgPoolBackward3dRunHost(const miopenTensorDescriptor_t outputGradDesc,
+                                            const miopenTensorDescriptor_t inputGradDesc,
+                                            const Tgpu* output_grad,
+                                            Tcheck* input_grad,
+                                            const size_t C,
+                                            const size_t D,
+                                            const size_t H,
+                                            const size_t W,
+                                            const size_t OD,
+                                            const size_t OH,
+                                            const size_t OW)
+{
+    auto numel = miopen::deref(inputGradDesc).GetElementSize();
+
+    auto output_grad_tv = miopen::get_inner_expanded_tv<5>(miopen::deref(outputGradDesc));
+    auto input_grad_tv  = miopen::get_inner_expanded_tv<5>(miopen::deref(inputGradDesc));
+
+    par_ford(numel)([&](size_t gid) {
+        size_t ncdh = gid / W, w = gid % W;
+        size_t ncd = ncdh / H, h = ncdh % H;
+        size_t nc = ncd / D, d = ncd % D;
+        size_t n = nc / C, c = nc % C;
+
+        size_t od  = (d * OD) / D;
+        size_t kod = ((d + 1) * OD + D - 1) / D - od;
+
+        size_t oh  = (h * OH) / H;
+        size_t koh = ((h + 1) * OH + H - 1) / H - oh;
+
+        size_t ow  = (w * OW) / W;
+        size_t kow = ((w + 1) * OW + W - 1) / W - ow;
+
+        float grad = 0;
+        for(size_t id = od; id < (od + kod); ++id)
+        {
+            size_t kd = ((id + 1) * D + OD - 1) / OD - (id * D) / OD;
+            for(size_t ih = oh; ih < (oh + koh); ++ih)
+            {
+                size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+                for(size_t iw = ow; iw < (ow + kow); ++iw)
+                {
+                    size_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW;
+                    grad +=
+                        static_cast<float>(
+                            output_grad[output_grad_tv.get_tensor_view_idx({n, c, id, ih, iw})]) /
+                        (kd * kh * kw);
+                }
+            }
+        }
+
+        input_grad[input_grad_tv.get_tensor_view_idx({n, c, d, h, w})] = static_cast<Tcheck>(grad);
+    });
+    return miopenStatusSuccess;
+}
diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h
index 56a5fc4035..f67e61d87c 100644
--- a/include/miopen/miopen.h
+++ b/include/miopen/miopen.h
@@ -8226,6 +8226,47 @@ MIOPEN_EXPORT miopenStatus_t miopenMultiMarginLossForward(miopenHandle_t handle,
 // CLOSEOUT LossFunction DOXYGEN GROUP
 #endif // MIOPEN_BETA_API
 
+#ifdef MIOPEN_BETA_API
+// adaptiveavgpool APIs
+/** @addtogroup pooling
+ *
+ *  @{
+ */
+
+/*! @brief Execute an adaptiveavgpool forward layer
+ *
+ * @param handle                   MIOpen handle (input)
+ * @param inputDesc                Tensor descriptor for input tensor (input)
+ * @param input                    Data tensor input (input)
+ * @param outputDesc               Tensor descriptor for output tensor (input)
+ * @param output                   Data tensor output (output)
+ * @return                         miopenStatus_t
+ */
+MIOPEN_EXPORT miopenStatus_t miopenAdaptiveAvgPoolForward(miopenHandle_t handle,
+                                                          const miopenTensorDescriptor_t inputDesc,
+                                                          const void* input,
+                                                          const miopenTensorDescriptor_t outputDesc,
+                                                          void* output);
+
+/*! @brief Execute an adaptiveavgpool backward layer
+ *
+ * @param handle                   MIOpen handle (input)
+ * @param outputGradDesc           Tensor descriptor for output grad tensor (input)
+ * @param output_grad              Data tensor output grad (input)
+ * @param inputGradDesc            Tensor descriptor for input grad tensor (input)
+ * @param input_grad               Data tensor input grad (output)
+ * @return                         miopenStatus_t
+ */
+MIOPEN_EXPORT miopenStatus_t
+miopenAdaptiveAvgPoolBackward(miopenHandle_t handle,
+                              const miopenTensorDescriptor_t outputGradDesc,
+                              const void* output_grad,
+                              const miopenTensorDescriptor_t inputGradDesc,
+                              void* input_grad);
+/** @} */
+// CLOSEOUT pooling DOXYGEN GROUP
+#endif // MIOPEN_BETA_API
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 36befe40ab..3c5d2acbd4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -84,6 +84,8 @@ set( MIOpen_Source
     activ_api.cpp
     adam/problem_description.cpp
     adam_api.cpp
+    adaptiveavgpool_api.cpp
+    adaptiveavgpool/problem_description.cpp
     addlayernorm_api.cpp
     api/find2_0_commons.cpp
     base64.cpp
@@ -209,6 +211,12 @@ set( MIOpen_Source
     solver/activ/fwd_1.cpp
     solver/adam/adam.cpp
     solver/adam/transformers_adam_w.cpp
+    solver/adaptiveavgpool/backward_adaptiveavgpool_1d.cpp
+    solver/adaptiveavgpool/backward_adaptiveavgpool_2d.cpp
+    solver/adaptiveavgpool/backward_adaptiveavgpool_3d.cpp
+    solver/adaptiveavgpool/forward_adaptiveavgpool_1d.cpp
+    solver/adaptiveavgpool/forward_adaptiveavgpool_2d.cpp
+    solver/adaptiveavgpool/forward_adaptiveavgpool_3d.cpp
     solver/batchnorm/backward_ck.cpp
     solver/batchnorm/backward_per_activation.cpp
     solver/batchnorm/backward_per_activation_fused.cpp
@@ -523,6 +531,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         ${GPU_BATCHED_TRANSPOSE_KERNEL_HIP}
         ${GPU_GENERAL_TENSOR_REORDER_KERNEL_HIP_SOURCE}
         kernels/MIOpenAdam.cpp
+        kernels/MIOpenAdaptiveAvgPool.cpp
         kernels/MIOpenCat.cpp
         kernels/MIOpenCheckNumerics.cpp
         kernels/MIOpenBatchNormActivBwdPerAct.cl
@@ -674,6 +683,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
     list(APPEND MIOpen_Source
         activ.cpp
         adam.cpp
+        adaptiveavgpool.cpp
         addlayernorm.cpp
         cat.cpp
         exec_utils.cpp
diff --git a/src/adaptiveavgpool.cpp b/src/adaptiveavgpool.cpp
new file mode 100644
index 0000000000..f5ff954740
--- /dev/null
+++ b/src/adaptiveavgpool.cpp
@@ -0,0 +1,98 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/kernel_cache.hpp>
+#include <miopen/float_equal.hpp>
+#include <miopen/tensor.hpp>
+#include <miopen/adaptiveavgpool/invoke_params.hpp>
+#include <miopen/adaptiveavgpool/solvers.hpp>
+#include <miopen/find_solution.hpp>
+
+namespace miopen {
+
+namespace adaptiveavgpool {
+
+miopenStatus_t AdaptiveAvgPoolForward(Handle& handle,
+                                      const TensorDescriptor& inputDesc,
+                                      ConstData_t input,
+                                      const TensorDescriptor& outputDesc,
+                                      Data_t output)
+{
+    const auto problem = adaptiveavgpool::FwdProblemDescription{inputDesc, outputDesc};
+
+    const auto invoke_params = [&]() {
+        auto tmp       = adaptiveavgpool::FwdInvokeParams{};
+        tmp.inputDesc  = &inputDesc;
+        tmp.outputDesc = &outputDesc;
+
+        tmp.input  = input;
+        tmp.output = output;
+
+        return tmp;
+    }();
+    const auto algo = AlgorithmName{"AdaptiveAvgPoolForward"};
+    const auto solvers =
+        solver::SolverContainer<solver::adaptiveavgpool::AdaptiveAvgPoolForward1d,
+                                solver::adaptiveavgpool::AdaptiveAvgPoolForward2d,
+                                solver::adaptiveavgpool::AdaptiveAvgPoolForward3d>{};
+
+    solvers.ExecutePrimitive(handle, problem, algo, invoke_params);
+
+    return miopenStatusSuccess;
+}
+
+miopenStatus_t AdaptiveAvgPoolBackward(Handle& handle,
+                                       const TensorDescriptor& outputGradDesc,
+                                       ConstData_t output_grad,
+                                       const TensorDescriptor& inputGradDesc,
+                                       Data_t input_grad)
+{
+    const auto problem = adaptiveavgpool::BwdProblemDescription{outputGradDesc, inputGradDesc};
+
+    const auto invoke_params = [&]() {
+        auto tmp           = adaptiveavgpool::BwdInvokeParams{};
+        tmp.outputGradDesc = &outputGradDesc;
+        tmp.inputGradDesc  = &inputGradDesc;
+
+        tmp.output_grad = output_grad;
+        tmp.input_grad  = input_grad;
+
+        return tmp;
+    }();
+    const auto algo = AlgorithmName{"AdaptiveAvgPoolBackward"};
+    const auto solvers =
+        solver::SolverContainer<solver::adaptiveavgpool::AdaptiveAvgPoolBackward1d,
+                                solver::adaptiveavgpool::AdaptiveAvgPoolBackward2d,
+                                solver::adaptiveavgpool::AdaptiveAvgPoolBackward3d>{};
+
+    solvers.ExecutePrimitive(handle, problem, algo, invoke_params);
+
+    return miopenStatusSuccess;
+}
+
+} // namespace adaptiveavgpool
+
+} // namespace miopen
diff --git a/src/adaptiveavgpool/problem_description.cpp b/src/adaptiveavgpool/problem_description.cpp
new file mode 100644
index 0000000000..21800d4cd0
--- /dev/null
+++ b/src/adaptiveavgpool/problem_description.cpp
@@ -0,0 +1,86 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/adaptiveavgpool/problem_description.hpp>
+#include <miopen/names.hpp>
+#include <sstream>
+
+namespace miopen {
+
+namespace adaptiveavgpool {
+
+inline std::ostream& operator<<(std::ostream& os, const std::vector<size_t>& v)
+{
+    os << '{';
+    for(size_t i = 0; i < v.size(); ++i)
+    {
+        if(i != 0)
+            os << ',';
+        os << v[i];
+    }
+    os << '}';
+    return os;
+}
+
+NetworkConfig FwdProblemDescription::MakeNetworkConfig() const
+{
+    auto input_size  = inputDesc.GetLengths();
+    auto output_size = outputDesc.GetLengths();
+    auto input_dtype = inputDesc.GetType();
+
+    std::ostringstream ss;
+
+    ss << "adaptiveavgpool_fwd";
+    ss << "-input_dtype" << input_dtype;
+    ss << "-Is" << input_size;
+    ss << "-Os" << output_size;
+    ss << "-Con" << IsAllContiguous();
+
+    return NetworkConfig{ss.str()};
+}
+
+NetworkConfig BwdProblemDescription::MakeNetworkConfig() const
+{
+    auto input_grad_size    = inputGradDesc.GetLengths();
+    auto output_grad_size   = outputGradDesc.GetLengths();
+    auto input_grad_stride  = inputGradDesc.GetStrides();
+    auto output_grad_stride = outputGradDesc.GetStrides();
+    auto input_dtype        = inputGradDesc.GetType();
+
+    std::ostringstream ss;
+
+    ss << "adaptiveavgpool_bwd";
+    ss << "-input_dtype" << input_dtype;
+    ss << "-dIs" << input_grad_size;
+    ss << "-dOs" << output_grad_size;
+    ss << "-Con" << IsAllContiguous();
+
+    return NetworkConfig{ss.str()};
+}
+
+} // namespace adaptiveavgpool
+
+} // namespace miopen
diff --git a/src/adaptiveavgpool_api.cpp b/src/adaptiveavgpool_api.cpp
new file mode 100644
index 0000000000..0f27507f46
--- /dev/null
+++ b/src/adaptiveavgpool_api.cpp
@@ -0,0 +1,113 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/errors.hpp>
+#include <miopen/handle.hpp>
+#include <miopen/logger.hpp>
+#include <miopen/tensor_ops.hpp>
+
+inline std::ostream& operator<<(std::ostream& os, const std::vector<size_t>& v)
+{
+    os << '{';
+    for(size_t i = 0; i < v.size(); ++i)
+    {
+        if(i != 0)
+            os << ',';
+        os << v[i];
+    }
+    os << '}';
+    return os;
+}
+
+static void LogCmdAdaptiveAvgPool(const miopenTensorDescriptor_t xDesc,
+                                  const miopenTensorDescriptor_t oDesc,
+                                  const bool is_fwd)
+{
+    if(miopen::IsLoggingCmd())
+    {
+        std::stringstream ss;
+        auto dtype = miopen::deref(xDesc).GetType();
+        if(dtype == miopenHalf)
+        {
+            ss << "adaptiveavgpoolfp16";
+        }
+        else if(dtype == miopenFloat)
+        {
+            ss << "adaptiveavgpoolfp32";
+        }
+        else if(dtype == miopenBFloat16)
+        {
+            ss << "adaptiveavgpoolbfp16";
+        }
+
+        MIOPEN_LOG_FUNCTION(xDesc, oDesc, is_fwd);
+        ss << " -Is " << miopen::deref(xDesc).GetLengths();
+        ss << " -Os " << miopen::deref(oDesc).GetLengths();
+        ss << " -Si " << miopen::deref(xDesc).GetStrides();
+        ss << " -So " << miopen::deref(oDesc).GetStrides();
+        ss << " -F " << ((is_fwd) ? "1" : "2");
+
+        MIOPEN_LOG_DRIVER_CMD(ss.str());
+    }
+}
+
+extern "C" miopenStatus_t miopenAdaptiveAvgPoolForward(miopenHandle_t handle,
+                                                       const miopenTensorDescriptor_t inputDesc,
+                                                       const void* input,
+                                                       const miopenTensorDescriptor_t outputDesc,
+                                                       void* output)
+{
+    MIOPEN_LOG_FUNCTION(handle, inputDesc, input, outputDesc, output);
+
+    LogCmdAdaptiveAvgPool(inputDesc, outputDesc, true);
+    return miopen::try_([&] {
+        miopen::adaptiveavgpool::AdaptiveAvgPoolForward(miopen::deref(handle),
+                                                        miopen::deref(inputDesc),
+                                                        DataCast(input),
+                                                        miopen::deref(outputDesc),
+                                                        DataCast(output));
+    });
+}
+
+extern "C" miopenStatus_t
+miopenAdaptiveAvgPoolBackward(miopenHandle_t handle,
+                              const miopenTensorDescriptor_t outputGradDesc,
+                              const void* output_grad,
+                              const miopenTensorDescriptor_t inputGradDesc,
+                              void* input_grad)
+{
+    MIOPEN_LOG_FUNCTION(handle, outputGradDesc, output_grad, inputGradDesc, input_grad);
+
+    LogCmdAdaptiveAvgPool(inputGradDesc, outputGradDesc, false);
+    return miopen::try_([&] {
+        miopen::adaptiveavgpool::AdaptiveAvgPoolBackward(miopen::deref(handle),
+                                                         miopen::deref(outputGradDesc),
+                                                         DataCast(output_grad),
+                                                         miopen::deref(inputGradDesc),
+                                                         DataCast(input_grad));
+    });
+}
diff --git a/src/include/miopen/adaptiveavgpool.hpp b/src/include/miopen/adaptiveavgpool.hpp
new file mode 100644
index 0000000000..7f04af7b8d
--- /dev/null
+++ b/src/include/miopen/adaptiveavgpool.hpp
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+#include <miopen/common.hpp>
+
+namespace miopen {
+
+struct Handle;
+struct TensorDescriptor;
+
+namespace adaptiveavgpool {
+
+MIOPEN_INTERNALS_EXPORT miopenStatus_t AdaptiveAvgPoolForward(Handle& handle,
+                                                              const TensorDescriptor& inputDesc,
+                                                              ConstData_t input,
+                                                              const TensorDescriptor& outputDesc,
+                                                              Data_t output);
+
+MIOPEN_INTERNALS_EXPORT miopenStatus_t
+AdaptiveAvgPoolBackward(Handle& handle,
+                        const TensorDescriptor& outputGradDesc,
+                        ConstData_t output_grad,
+                        const TensorDescriptor& inputGradDesc,
+                        Data_t input_grad);
+
+} // namespace adaptiveavgpool
+
+} // namespace miopen
diff --git a/src/include/miopen/adaptiveavgpool/invoke_params.hpp b/src/include/miopen/adaptiveavgpool/invoke_params.hpp
new file mode 100644
index 0000000000..b9a30f7236
--- /dev/null
+++ b/src/include/miopen/adaptiveavgpool/invoke_params.hpp
@@ -0,0 +1,67 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/common.hpp>
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+namespace adaptiveavgpool {
+
+struct FwdInvokeParams : public miopen::InvokeParams
+{
+
+    FwdInvokeParams() = default;
+
+    const TensorDescriptor* inputDesc  = nullptr;
+    const TensorDescriptor* outputDesc = nullptr;
+
+    ConstData_t input = nullptr;
+    Data_t output     = nullptr;
+
+    std::size_t GetWorkspaceSize() const { return 0; }
+    Data_t GetWorkspace() const { return nullptr; }
+};
+
+struct BwdInvokeParams : public miopen::InvokeParams
+{
+
+    BwdInvokeParams() = default;
+
+    const TensorDescriptor* outputGradDesc = nullptr;
+    const TensorDescriptor* inputGradDesc  = nullptr;
+
+    ConstData_t output_grad = nullptr;
+    Data_t input_grad       = nullptr;
+
+    std::size_t GetWorkspaceSize() const { return 0; }
+    Data_t GetWorkspace() const { return nullptr; }
+};
+
+} // namespace adaptiveavgpool
+} // namespace miopen
diff --git a/src/include/miopen/adaptiveavgpool/problem_description.hpp b/src/include/miopen/adaptiveavgpool/problem_description.hpp
new file mode 100644
index 0000000000..d8b112e46e
--- /dev/null
+++ b/src/include/miopen/adaptiveavgpool/problem_description.hpp
@@ -0,0 +1,225 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/problem_description_base.hpp>
+#include <miopen/activ.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+
+struct NetworkConfig;
+
+namespace adaptiveavgpool {
+
+struct FwdProblemDescription : ProblemDescriptionBase
+{
+    FwdProblemDescription(const TensorDescriptor& inputDesc_, const TensorDescriptor& outputDesc_)
+        : inputDesc(inputDesc_), outputDesc(outputDesc_)
+    {
+        IsValidLength();
+        IsValidDims();
+        IsSameType();
+    }
+
+    auto GetInputDesc() const { return inputDesc; }
+    auto GetOutputDesc() const { return outputDesc; }
+    auto GetNtotal() const { return outputDesc.GetElementSize(); }
+
+    bool IsValidLength() const
+    {
+        auto input_dims = inputDesc.GetLengths().size();
+        if(outputDesc.GetLengths()[0] != inputDesc.GetLengths()[0] ||
+           outputDesc.GetLengths()[1] != inputDesc.GetLengths()[1] ||
+           outputDesc.GetLengths().size() != input_dims)
+        {
+            MIOPEN_THROW(miopenStatusBadParm,
+                         "AdaptiveAvgPool: Input and output tensor sizes do not match.");
+        }
+
+        if(input_dims == 3)
+        {
+            if(outputDesc.GetLengths()[2] > inputDesc.GetLengths()[2])
+            {
+                MIOPEN_THROW(miopenStatusBadParm,
+                             "AdaptiveAvgPool: Input tensor sizes are too small compare to output "
+                             "tensor sizes.");
+            }
+        }
+        else if(input_dims == 4)
+        {
+            if(outputDesc.GetLengths()[2] > inputDesc.GetLengths()[2] ||
+               outputDesc.GetLengths()[3] > inputDesc.GetLengths()[3])
+            {
+                MIOPEN_THROW(miopenStatusBadParm,
+                             "AdaptiveAvgPool: Input tensor sizes are too small compare to output "
+                             "tensor sizes.");
+            }
+        }
+        else if(input_dims == 5)
+        {
+            if(outputDesc.GetLengths()[2] > inputDesc.GetLengths()[2] ||
+               outputDesc.GetLengths()[3] > inputDesc.GetLengths()[3] ||
+               outputDesc.GetLengths()[4] > inputDesc.GetLengths()[4])
+            {
+                MIOPEN_THROW(miopenStatusBadParm,
+                             "AdaptiveAvgPool: Input tensor sizes are too small compare to output "
+                             "tensor sizes.");
+            }
+        }
+
+        return true;
+    }
+
+    bool IsValidDims() const
+    {
+        if(inputDesc.GetLengths().size() > 5 || inputDesc.GetLengths().size() < 3)
+        {
+            MIOPEN_THROW(miopenStatusBadParm,
+                         "AdaptiveAvgPool: Only 3D, 4D and 5D tensors are supported.");
+        }
+
+        return true;
+    }
+
+    bool IsAllContiguous() const { return inputDesc.IsContiguous() && outputDesc.IsContiguous(); }
+
+    bool IsSameType() const
+    {
+        if(inputDesc.GetType() != outputDesc.GetType())
+        {
+            MIOPEN_THROW(miopenStatusBadParm,
+                         "AdaptiveAvgPool: Input and output tensor types do not match.");
+        }
+
+        return true;
+    }
+
+    NetworkConfig MakeNetworkConfig() const override;
+
+protected:
+    TensorDescriptor inputDesc;
+    TensorDescriptor outputDesc;
+};
+
+struct BwdProblemDescription : ProblemDescriptionBase
+{
+    BwdProblemDescription(const TensorDescriptor& outputGradDesc_,
+                          const TensorDescriptor& inputGradDesc_)
+        : outputGradDesc(outputGradDesc_), inputGradDesc(inputGradDesc_)
+    {
+        IsValidLength();
+        IsValidDims();
+        IsSameType();
+    }
+
+    auto GetOutputGradDesc() const { return outputGradDesc; }
+    auto GetInputGradDesc() const { return inputGradDesc; }
+    auto GetNtotal() const { return inputGradDesc.GetElementSize(); }
+
+    bool IsValidLength() const
+    {
+        auto input_dims = inputGradDesc.GetLengths().size();
+        if(outputGradDesc.GetLengths()[0] != inputGradDesc.GetLengths()[0] ||
+           outputGradDesc.GetLengths()[1] != inputGradDesc.GetLengths()[1] ||
+           outputGradDesc.GetLengths().size() != input_dims)
+        {
+            MIOPEN_THROW(miopenStatusBadParm,
+                         "AdaptiveAvgPool: Input grad and output grad tensor sizes do not match.");
+        }
+
+        if(input_dims == 3)
+        {
+            if(outputGradDesc.GetLengths()[2] > inputGradDesc.GetLengths()[2])
+            {
+                MIOPEN_THROW(miopenStatusBadParm,
+                             "AdaptiveAvgPool: Input grad tensor sizes are too small compare to "
+                             "output grad tensor sizes.");
+            }
+        }
+        else if(input_dims == 4)
+        {
+            if(outputGradDesc.GetLengths()[2] > inputGradDesc.GetLengths()[2] ||
+               outputGradDesc.GetLengths()[3] > inputGradDesc.GetLengths()[3])
+            {
+                MIOPEN_THROW(miopenStatusBadParm,
+                             "AdaptiveAvgPool: Input grad tensor sizes are too small compare to "
+                             "output grad tensor sizes.");
+            }
+        }
+        else if(input_dims == 5)
+        {
+            if(outputGradDesc.GetLengths()[2] > inputGradDesc.GetLengths()[2] ||
+               outputGradDesc.GetLengths()[3] > inputGradDesc.GetLengths()[3] ||
+               outputGradDesc.GetLengths()[4] > inputGradDesc.GetLengths()[4])
+            {
+                MIOPEN_THROW(miopenStatusBadParm,
+                             "AdaptiveAvgPool: Input grad tensor sizes are too small compare to "
+                             "output grad tensor sizes.");
+            }
+        }
+
+        return true;
+    }
+
+    bool IsValidDims() const
+    {
+        if(inputGradDesc.GetLengths().size() > 5 || inputGradDesc.GetLengths().size() < 3)
+        {
+            MIOPEN_THROW(miopenStatusBadParm,
+                         "AdaptiveAvgPool: Only 3D, 4D and 5D tensors are supported.");
+        }
+
+        return true;
+    }
+
+    bool IsAllContiguous() const
+    {
+        return inputGradDesc.IsContiguous() && outputGradDesc.IsContiguous();
+    }
+
+    bool IsSameType() const
+    {
+        if(inputGradDesc.GetType() != outputGradDesc.GetType())
+        {
+            MIOPEN_THROW(miopenStatusBadParm,
+                         "AdaptiveAvgPool: Input grad and output grad tensor types do not match.");
+        }
+
+        return true;
+    }
+
+    NetworkConfig MakeNetworkConfig() const override;
+
+protected:
+    TensorDescriptor outputGradDesc;
+    TensorDescriptor inputGradDesc;
+};
+
+} // namespace adaptiveavgpool
+
+} // namespace miopen
diff --git a/src/include/miopen/adaptiveavgpool/solvers.hpp b/src/include/miopen/adaptiveavgpool/solvers.hpp
new file mode 100644
index 0000000000..980bb1a330
--- /dev/null
+++ b/src/include/miopen/adaptiveavgpool/solvers.hpp
@@ -0,0 +1,160 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/conv_solution.hpp>
+#include <miopen/execution_context.hpp>
+#include <miopen/solver.hpp>
+#include <miopen/adaptiveavgpool/problem_description.hpp>
+#include <miopen/kernel_build_params.hpp>
+#include <miopen/kernel_info.hpp>
+#include <miopen/mlo_internal.hpp>
+
+namespace miopen {
+
+namespace solver {
+
+namespace adaptiveavgpool {
+
+const auto make_hip_kernel = [](std::vector<size_t> localsize,
+                                std::vector<size_t> gridsize,
+                                std::string kernel_file,
+                                std::string kernel_name,
+                                KernelBuildParameters build_params) {
+    while(localsize.size() < 3)
+        localsize.push_back(1);
+    while(gridsize.size() < 3)
+        gridsize.push_back(1);
+    for(int i = 0; i < localsize.size(); ++i)
+        gridsize[i] = AlignUp(gridsize[i], localsize[i]);
+    return KernelInfo{
+        build_params.GenerateFor(kbp::HIP{}), localsize, gridsize, kernel_file, kernel_name};
+};
+
+using AdaptiveAvgPoolForward =
+    NonTunableSolverBase<ExecutionContext, miopen::adaptiveavgpool::FwdProblemDescription>;
+
+using AdaptiveAvgPoolBackward =
+    NonTunableSolverBase<ExecutionContext, miopen::adaptiveavgpool::BwdProblemDescription>;
+
+// FORWARD
+struct AdaptiveAvgPoolForward1d final : AdaptiveAvgPoolForward
+{
+    const std::string& SolverDbId() const override
+    {
+        return GetSolverDbId<AdaptiveAvgPoolForward1d>();
+    }
+
+    bool IsApplicable(const ExecutionContext& context,
+                      const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override;
+
+    ConvSolution
+    GetSolution(const ExecutionContext& context,
+                const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override;
+};
+
+struct AdaptiveAvgPoolForward2d final : AdaptiveAvgPoolForward
+{
+    const std::string& SolverDbId() const override
+    {
+        return GetSolverDbId<AdaptiveAvgPoolForward2d>();
+    }
+
+    bool IsApplicable(const ExecutionContext& context,
+                      const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override;
+
+    ConvSolution
+    GetSolution(const ExecutionContext& context,
+                const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override;
+};
+
+struct AdaptiveAvgPoolForward3d final : AdaptiveAvgPoolForward
+{
+    const std::string& SolverDbId() const override
+    {
+        return GetSolverDbId<AdaptiveAvgPoolForward3d>();
+    }
+
+    bool IsApplicable(const ExecutionContext& context,
+                      const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override;
+
+    ConvSolution
+    GetSolution(const ExecutionContext& context,
+                const miopen::adaptiveavgpool::FwdProblemDescription& problem) const override;
+};
+
+// BACKWARD
+struct AdaptiveAvgPoolBackward1d final : AdaptiveAvgPoolBackward
+{
+    const std::string& SolverDbId() const override
+    {
+        return GetSolverDbId<AdaptiveAvgPoolBackward1d>();
+    }
+
+    bool IsApplicable(const ExecutionContext& context,
+                      const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override;
+
+    ConvSolution
+    GetSolution(const ExecutionContext& context,
+                const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override;
+};
+
+struct AdaptiveAvgPoolBackward2d final : AdaptiveAvgPoolBackward
+{
+    const std::string& SolverDbId() const override
+    {
+        return GetSolverDbId<AdaptiveAvgPoolBackward2d>();
+    }
+
+    bool IsApplicable(const ExecutionContext& context,
+                      const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override;
+
+    ConvSolution
+    GetSolution(const ExecutionContext& context,
+                const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override;
+};
+
+struct AdaptiveAvgPoolBackward3d final : AdaptiveAvgPoolBackward
+{
+    const std::string& SolverDbId() const override
+    {
+        return GetSolverDbId<AdaptiveAvgPoolBackward3d>();
+    }
+
+    bool IsApplicable(const ExecutionContext& context,
+                      const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override;
+
+    ConvSolution
+    GetSolution(const ExecutionContext& context,
+                const miopen::adaptiveavgpool::BwdProblemDescription& problem) const override;
+};
+
+} // namespace adaptiveavgpool
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/src/include/miopen/solver_id.hpp b/src/include/miopen/solver_id.hpp
index 76a13b051c..efc60720eb 100644
--- a/src/include/miopen/solver_id.hpp
+++ b/src/include/miopen/solver_id.hpp
@@ -64,7 +64,8 @@ enum class Primitive
     ReLU,
     Kthvalue,
     SoftMarginLoss,
-    MultiMarginLoss
+    MultiMarginLoss,
+    AdaptiveAvgPool,
 };
 
 struct MIOPEN_INTERNALS_EXPORT Id
diff --git a/src/kernels/MIOpenAdaptiveAvgPool.cpp b/src/kernels/MIOpenAdaptiveAvgPool.cpp
new file mode 100644
index 0000000000..273ec99087
--- /dev/null
+++ b/src/kernels/MIOpenAdaptiveAvgPool.cpp
@@ -0,0 +1,355 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+#include <hip/hip_fp16.h>
+#include <hip/hip_runtime.h>
+#endif
+
+#include "float_types.h"
+#include "tensor_view.hpp"
+
+template <typename TI, typename TO>
+__device__ void adaptiveAvgPoolForward1d(const TI* __restrict__ input,
+                                         TO* __restrict__ output,
+                                         uint64_t N,
+                                         uint64_t C,
+                                         uint64_t H,
+                                         uint64_t OH,
+                                         tensor_view_t<3> input_tv,
+                                         tensor_view_t<3> output_tv)
+{
+    uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+    uint64_t nc = gid / OH, oh = gid % OH;
+    uint64_t n = nc / C, c = nc % C;
+    if(n >= N)
+        return;
+
+    uint64_t h  = oh * H / OH;
+    uint64_t kh = (((oh + 1) * H + OH - 1) / OH) - h;
+
+    FLOAT_ACCUM sum = 0;
+    for(uint64_t ih = h; ih < (h + kh); ++ih)
+    {
+        sum += CVT_FLOAT2ACCUM(input[input_tv.get_tensor_view_idx({n, c, ih})]);
+    }
+    output[output_tv.get_tensor_view_idx({n, c, oh})] = CVT_ACCUM2FLOAT(sum / kh);
+}
+extern "C" __global__ void AdaptiveAvgPoolForward1d(const INPUT_TYPE* __restrict__ input,
+                                                    OUTPUT_TYPE* __restrict__ output,
+                                                    uint64_t N,
+                                                    uint64_t C,
+                                                    uint64_t H,
+                                                    uint64_t OH,
+                                                    tensor_view_t<3> input_tv,
+                                                    tensor_view_t<3> output_tv)
+{
+    adaptiveAvgPoolForward1d<INPUT_TYPE, OUTPUT_TYPE>(
+        input, output, N, C, H, OH, input_tv, output_tv);
+}
+
+template <typename TI, typename TO>
+__device__ void adaptiveAvgPoolBackward1d(const TI* __restrict__ output_grad,
+                                          TO* __restrict__ input_grad,
+                                          uint64_t N,
+                                          uint64_t C,
+                                          uint64_t H,
+                                          uint64_t OH,
+                                          tensor_view_t<3> output_grad_tv,
+                                          tensor_view_t<3> input_grad_tv)
+{
+    uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+    uint64_t nc = gid / H, h = gid % H;
+    uint64_t n = nc / C, c = nc % C;
+    if(n >= N)
+        return;
+
+    uint64_t oh  = (h * OH) / H;
+    uint64_t koh = (((h + 1) * OH + H - 1) / H) - oh;
+
+    FLOAT_ACCUM grad = 0;
+    for(uint64_t ih = oh; ih < (oh + koh); ++ih)
+    {
+        uint64_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+        grad += CVT_FLOAT2ACCUM(output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih})]) / kh;
+    }
+    input_grad[input_grad_tv.get_tensor_view_idx({n, c, h})] = CVT_ACCUM2FLOAT(grad);
+}
+
+extern "C" __global__ void AdaptiveAvgPoolBackward1d(const INPUT_TYPE* __restrict__ output_grad,
+                                                     OUTPUT_TYPE* __restrict__ input_grad,
+                                                     uint64_t N,
+                                                     uint64_t C,
+                                                     uint64_t H,
+                                                     uint64_t OH,
+                                                     tensor_view_t<3> output_grad_tv,
+                                                     tensor_view_t<3> input_grad_tv)
+{
+    adaptiveAvgPoolBackward1d<INPUT_TYPE, OUTPUT_TYPE>(
+        output_grad, input_grad, N, C, H, OH, output_grad_tv, input_grad_tv);
+}
+
+template <typename TI, typename TO>
+__device__ void adaptiveAvgPoolForward2d(const TI* __restrict__ input,
+                                         TO* __restrict__ output,
+                                         uint64_t N,
+                                         uint64_t C,
+                                         uint64_t H,
+                                         uint64_t W,
+                                         uint64_t OH,
+                                         uint64_t OW,
+                                         tensor_view_t<4> input_tv,
+                                         tensor_view_t<4> output_tv)
+{
+    uint64_t gid  = threadIdx.x + blockIdx.x * blockDim.x;
+    uint64_t ncoh = gid / OW, ow = gid % OW;
+    uint64_t nc = ncoh / OH, oh = ncoh % OH;
+    uint64_t n = nc / C, c = nc % C;
+
+    if(n >= N)
+        return;
+
+    uint64_t h  = (oh * H) / OH;
+    uint64_t kh = (((oh + 1) * H + OH - 1) / OH) - h;
+
+    uint64_t w  = (ow * W) / OW;
+    uint64_t kw = (((ow + 1) * W + OW - 1) / OW) - w;
+
+    FLOAT_ACCUM divider = static_cast<FLOAT_ACCUM>(kh * kw);
+    FLOAT_ACCUM sum     = 0;
+    for(uint64_t ih = h; ih < (h + kh); ++ih)
+    {
+        for(uint64_t iw = w; iw < (w + kw); ++iw)
+        {
+            sum += CVT_FLOAT2ACCUM(input[input_tv.get_tensor_view_idx({n, c, ih, iw})]);
+        }
+    }
+    output[output_tv.get_tensor_view_idx({n, c, oh, ow})] = CVT_ACCUM2FLOAT(sum / divider);
+}
+
+extern "C" __global__ void AdaptiveAvgPoolForward2d(const INPUT_TYPE* __restrict__ input,
+                                                    OUTPUT_TYPE* __restrict__ output,
+                                                    uint64_t N,
+                                                    uint64_t C,
+                                                    uint64_t H,
+                                                    uint64_t W,
+                                                    uint64_t OH,
+                                                    uint64_t OW,
+                                                    tensor_view_t<4> input_tv,
+                                                    tensor_view_t<4> output_tv)
+{
+    adaptiveAvgPoolForward2d<INPUT_TYPE, OUTPUT_TYPE>(
+        input, output, N, C, H, W, OH, OW, input_tv, output_tv);
+}
+
+template <typename TI, typename TO>
+__device__ void adaptiveAvgPoolBackward2d(const TI* __restrict__ output_grad,
+                                          TO* __restrict__ input_grad,
+                                          uint64_t N,
+                                          uint64_t C,
+                                          uint64_t H,
+                                          uint64_t W,
+                                          uint64_t OH,
+                                          uint64_t OW,
+                                          tensor_view_t<4> output_grad_tv,
+                                          tensor_view_t<4> input_grad_tv)
+{
+    uint64_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+    uint64_t nch = gid / W, w = gid % W;
+    uint64_t nc = nch / H, h = nch % H;
+    uint64_t n = nc / C, c = nc % C;
+
+    if(n >= N)
+        return;
+
+    uint64_t oh  = (h * OH) / H;
+    uint64_t koh = ((h + 1) * OH + H - 1) / H - oh;
+
+    uint64_t ow  = (w * OW) / W;
+    uint64_t kow = ((w + 1) * OW + W - 1) / W - ow;
+
+    FLOAT_ACCUM grad = 0;
+    for(uint64_t ih = oh; ih < (oh + koh); ++ih)
+    {
+        uint64_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+        for(uint64_t iw = ow; iw < (ow + kow); ++iw)
+        {
+            uint64_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW;
+            grad +=
+                CVT_FLOAT2ACCUM(output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih, iw})]) /
+                (kh * kw);
+        }
+    }
+
+    input_grad[input_grad_tv.get_tensor_view_idx({n, c, h, w})] = CVT_ACCUM2FLOAT(grad);
+}
+
+extern "C" __global__ void AdaptiveAvgPoolBackward2d(const INPUT_TYPE* __restrict__ output_grad,
+                                                     OUTPUT_TYPE* __restrict__ input_grad,
+                                                     uint64_t N,
+                                                     uint64_t C,
+                                                     uint64_t H,
+                                                     uint64_t W,
+                                                     uint64_t OH,
+                                                     uint64_t OW,
+                                                     tensor_view_t<4> output_grad_tv,
+                                                     tensor_view_t<4> input_grad_tv)
+{
+    adaptiveAvgPoolBackward2d<INPUT_TYPE, OUTPUT_TYPE>(
+        output_grad, input_grad, N, C, H, W, OH, OW, output_grad_tv, input_grad_tv);
+}
+
+template <typename TI, typename TO>
+__device__ void adaptiveAvgPoolForward3d(const TI* __restrict__ input,
+                                         TO* __restrict__ output,
+                                         uint64_t N,
+                                         uint64_t C,
+                                         uint64_t D,
+                                         uint64_t H,
+                                         uint64_t W,
+                                         uint64_t OD,
+                                         uint64_t OH,
+                                         uint64_t OW,
+                                         tensor_view_t<5> input_tv,
+                                         tensor_view_t<5> output_tv)
+{
+    uint64_t gid    = threadIdx.x + blockIdx.x * blockDim.x;
+    uint64_t ncodoh = gid / OW, ow = gid % OW;
+    uint64_t ncod = ncodoh / OH, oh = ncodoh % OH;
+    uint64_t nc = ncod / OD, od = ncod % OD;
+    uint64_t n = nc / C, c = nc % C;
+
+    if(n >= N)
+        return;
+    uint64_t d  = (od * D) / OD;
+    uint64_t kd = ((od + 1) * D + OD - 1) / OD - d;
+
+    uint64_t h  = (oh * H) / OH;
+    uint64_t kh = ((oh + 1) * H + OH - 1) / OH - h;
+
+    uint64_t w  = (ow * W) / OW;
+    uint64_t kw = ((ow + 1) * W + OW - 1) / OW - w;
+
+    FLOAT_ACCUM sum = 0;
+    for(uint64_t id = d; id < (d + kd); ++id)
+    {
+        for(uint64_t ih = h; ih < (h + kh); ++ih)
+        {
+            for(uint64_t iw = w; iw < (w + kw); ++iw)
+            {
+                sum += CVT_FLOAT2ACCUM(input[input_tv.get_tensor_view_idx({n, c, id, ih, iw})]);
+            }
+        }
+    }
+
+    output[output_tv.get_tensor_view_idx({n, c, od, oh, ow})] =
+        CVT_ACCUM2FLOAT(sum / (kd * kh * kw));
+}
+
+extern "C" __global__ void AdaptiveAvgPoolForward3d(const INPUT_TYPE* __restrict__ input,
+                                                    OUTPUT_TYPE* __restrict__ output,
+                                                    uint64_t N,
+                                                    uint64_t C,
+                                                    uint64_t D,
+                                                    uint64_t H,
+                                                    uint64_t W,
+                                                    uint64_t OD,
+                                                    uint64_t OH,
+                                                    uint64_t OW,
+                                                    tensor_view_t<5> input_tv,
+                                                    tensor_view_t<5> output_tv)
+{
+    adaptiveAvgPoolForward3d<INPUT_TYPE, OUTPUT_TYPE>(
+        input, output, N, C, D, H, W, OD, OH, OW, input_tv, output_tv);
+}
+
+template <typename TI, typename TO>
+__device__ void adaptiveAvgPoolBackward3d(const TI* __restrict__ output_grad,
+                                          TO* __restrict__ input_grad,
+                                          uint64_t N,
+                                          uint64_t C,
+                                          uint64_t D,
+                                          uint64_t H,
+                                          uint64_t W,
+                                          uint64_t OD,
+                                          uint64_t OH,
+                                          uint64_t OW,
+                                          tensor_view_t<5> output_grad_tv,
+                                          tensor_view_t<5> input_grad_tv)
+{
+    uint64_t gid  = threadIdx.x + blockIdx.x * blockDim.x;
+    uint64_t ncdh = gid / W, w = gid % W;
+    uint64_t ncd = ncdh / H, h = ncdh % H;
+    uint64_t nc = ncd / D, d = ncd % D;
+    uint64_t n = nc / C, c = nc % C;
+
+    if(n >= N)
+        return;
+
+    uint64_t od  = (d * OD) / D;
+    uint64_t kod = ((d + 1) * OD + D - 1) / D - od;
+
+    uint64_t oh  = (h * OH) / H;
+    uint64_t koh = ((h + 1) * OH + H - 1) / H - oh;
+
+    uint64_t ow  = (w * OW) / W;
+    uint64_t kow = ((w + 1) * OW + W - 1) / W - ow;
+
+    FLOAT_ACCUM grad = 0;
+    for(uint64_t id = od; id < (od + kod); ++id)
+    {
+        uint64_t kd = ((id + 1) * D + OD - 1) / OD - (id * D) / OD;
+        for(uint64_t ih = oh; ih < (oh + koh); ++ih)
+        {
+            uint64_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+            for(uint64_t iw = ow; iw < (ow + kow); ++iw)
+            {
+                uint64_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW;
+                grad += CVT_FLOAT2ACCUM(
+                            output_grad[output_grad_tv.get_tensor_view_idx({n, c, id, ih, iw})]) /
+                        (kd * kh * kw);
+            }
+        }
+    }
+
+    input_grad[input_grad_tv.get_tensor_view_idx({n, c, d, h, w})] = CVT_ACCUM2FLOAT(grad);
+}
+
+extern "C" __global__ void AdaptiveAvgPoolBackward3d(const INPUT_TYPE* __restrict__ output_grad,
+                                                     OUTPUT_TYPE* __restrict__ input_grad,
+                                                     uint64_t N,
+                                                     uint64_t C,
+                                                     uint64_t D,
+                                                     uint64_t H,
+                                                     uint64_t W,
+                                                     uint64_t OD,
+                                                     uint64_t OH,
+                                                     uint64_t OW,
+                                                     tensor_view_t<5> output_grad_tv,
+                                                     tensor_view_t<5> input_grad_tv)
+{
+    adaptiveAvgPoolBackward3d<INPUT_TYPE, OUTPUT_TYPE>(
+        output_grad, input_grad, N, C, D, H, W, OD, OH, OW, output_grad_tv, input_grad_tv);
+}
diff --git a/src/solver.cpp b/src/solver.cpp
index d9e2fdb1be..a9b7ffdf29 100644
--- a/src/solver.cpp
+++ b/src/solver.cpp
@@ -26,6 +26,7 @@
 
 #include <miopen/activ/solvers.hpp>
 #include <miopen/adam/solvers.hpp>
+#include <miopen/adaptiveavgpool/solvers.hpp>
 #include <miopen/batchnorm/solvers.hpp>
 #include <miopen/cat/solvers.hpp>
 #include <miopen/conv/solvers.hpp>
@@ -707,6 +708,30 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry)
              multimarginloss::MultiMarginLossForward{}.SolverDbId());
 
     Register(registry, ++id, Primitive::Mha, mha::MhaCKFlashAttentionV2Forward{}.SolverDbId());
+    Register(registry,
+             ++id,
+             Primitive::AdaptiveAvgPool,
+             adaptiveavgpool::AdaptiveAvgPoolForward1d{}.SolverDbId());
+    Register(registry,
+             ++id,
+             Primitive::AdaptiveAvgPool,
+             adaptiveavgpool::AdaptiveAvgPoolForward2d{}.SolverDbId());
+    Register(registry,
+             ++id,
+             Primitive::AdaptiveAvgPool,
+             adaptiveavgpool::AdaptiveAvgPoolForward3d{}.SolverDbId());
+    Register(registry,
+             ++id,
+             Primitive::AdaptiveAvgPool,
+             adaptiveavgpool::AdaptiveAvgPoolBackward1d{}.SolverDbId());
+    Register(registry,
+             ++id,
+             Primitive::AdaptiveAvgPool,
+             adaptiveavgpool::AdaptiveAvgPoolBackward2d{}.SolverDbId());
+    Register(registry,
+             ++id,
+             Primitive::AdaptiveAvgPool,
+             adaptiveavgpool::AdaptiveAvgPoolBackward3d{}.SolverDbId());
     // IMPORTANT: New solvers should be added to the end of the function, and don't leave a white
     // space between this comment and the newly registered solver(s)!
 }
diff --git a/src/solver/adaptiveavgpool/backward_adaptiveavgpool_1d.cpp b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_1d.cpp
new file mode 100644
index 0000000000..1552ac8385
--- /dev/null
+++ b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_1d.cpp
@@ -0,0 +1,139 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/conv_solution.hpp>
+#include <miopen/execution_context.hpp>
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor_view_utils.hpp>
+#include <miopen/adaptiveavgpool/solvers.hpp>
+
+#include <miopen/adaptiveavgpool/invoke_params.hpp>
+#include <miopen/datatype.hpp>
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/target_properties.hpp>
+
+#define LOCAL_SIZE_BWD_1D 256
+
+namespace miopen {
+
+namespace solver {
+
+namespace adaptiveavgpool {
+
+namespace {
+
+bool IsOverRocmBwd1d(const miopen::adaptiveavgpool::BwdProblemDescription& problem)
+{
+    if(!problem.IsAllContiguous())
+    {
+        return true;
+    }
+    else
+    {
+        auto mul_nc = problem.GetOutputGradDesc().GetLengths()[0] *
+                      problem.GetOutputGradDesc().GetLengths()[1];
+        if(mul_nc < 141312)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+} // namespace
+
+bool AdaptiveAvgPoolBackward1d::IsApplicable(
+    const ExecutionContext&, const miopen::adaptiveavgpool::BwdProblemDescription& problem) const
+{
+    if(problem.GetInputGradDesc().GetNumDims() != 3 ||
+       problem.GetOutputGradDesc().GetNumDims() != 3)
+    {
+        return false;
+    }
+    if(!IsOverRocmBwd1d(problem))
+    {
+        return false;
+    }
+    if(!(problem.GetInputGradDesc().GetType() == miopenFloat ||
+         problem.GetInputGradDesc().GetType() == miopenHalf ||
+         problem.GetInputGradDesc().GetType() == miopenBFloat16))
+        return false;
+    return true;
+}
+
+ConvSolution AdaptiveAvgPoolBackward1d::GetSolution(
+    const ExecutionContext& context,
+    const miopen::adaptiveavgpool::BwdProblemDescription& problem) const
+{
+    std::ignore = context;
+
+    auto result       = ConvSolution{miopenStatusSuccess};
+    auto input_dtype  = miopen::GetDataType(problem.GetOutputGradDesc().GetType());
+    auto output_dtype = miopen::GetDataType(problem.GetInputGradDesc().GetType());
+    auto dtype        = problem.GetInputGradDesc().GetType();
+    uint64_t N_total  = problem.GetNtotal();
+
+    auto build_params = KernelBuildParameters{
+        {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)},
+        {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)},
+        {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)},
+        {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)},
+        {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype},
+        {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}};
+
+    result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_BWD_1D},
+                                                         {N_total},
+                                                         "MIOpenAdaptiveAvgPool.cpp",
+                                                         "AdaptiveAvgPoolBackward1d",
+                                                         build_params));
+
+    result.invoker_factory = [](const std::vector<Kernel>& kernels) {
+        return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+            decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::BwdInvokeParams>();
+
+            decltype(auto) kernel = handle_.Run(kernels.front());
+
+            auto input_grad_tv  = get_inner_expanded_tv<3>(deref(params.inputGradDesc));
+            auto output_grad_tv = get_inner_expanded_tv<3>(deref(params.outputGradDesc));
+
+            uint64_t N  = deref(params.inputGradDesc).GetLengths()[0];
+            uint64_t C  = deref(params.inputGradDesc).GetLengths()[1];
+            uint64_t H  = deref(params.inputGradDesc).GetLengths()[2];
+            uint64_t OH = deref(params.outputGradDesc).GetLengths()[2];
+
+            kernel(
+                params.output_grad, params.input_grad, N, C, H, OH, output_grad_tv, input_grad_tv);
+        };
+    };
+
+    return result;
+}
+
+} // namespace adaptiveavgpool
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/src/solver/adaptiveavgpool/backward_adaptiveavgpool_2d.cpp b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_2d.cpp
new file mode 100644
index 0000000000..46dcef3e88
--- /dev/null
+++ b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_2d.cpp
@@ -0,0 +1,165 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/conv_solution.hpp>
+#include <miopen/execution_context.hpp>
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor_view_utils.hpp>
+#include <miopen/adaptiveavgpool/solvers.hpp>
+
+#include <miopen/adaptiveavgpool/invoke_params.hpp>
+#include <miopen/datatype.hpp>
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/target_properties.hpp>
+
+#define LOCAL_SIZE_BWD_2D 256
+
+namespace miopen {
+
+namespace solver {
+
+namespace adaptiveavgpool {
+
+namespace {
+
+bool IsOverRocmBwd2d(const miopen::adaptiveavgpool::BwdProblemDescription& problem)
+{
+    if(problem.IsAllContiguous())
+    {
+        return false;
+    }
+    else
+    {
+        auto dtype       = problem.GetInputGradDesc().GetType();
+        auto in_nelems   = problem.GetInputGradDesc().GetElementSize();
+        auto out_nelems  = problem.GetOutputGradDesc().GetElementSize();
+        auto in_over_out = static_cast<float>(in_nelems) / out_nelems;
+
+        if(dtype == miopenFloat)
+        {
+            if(in_nelems > 3801600)
+                return true;
+        }
+        else if(dtype == miopenHalf)
+        {
+            if(in_over_out == 1 || (in_over_out >= 1024 && in_over_out <= 4096))
+                return true;
+        }
+        else if(dtype == miopenBFloat16)
+        {
+            if(in_over_out < 13 || (in_over_out >= 1024 && in_over_out <= 4096))
+            {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+} // namespace
+
+bool AdaptiveAvgPoolBackward2d::IsApplicable(
+    const ExecutionContext&, const miopen::adaptiveavgpool::BwdProblemDescription& problem) const
+{
+    if(problem.GetInputGradDesc().GetNumDims() != 4 ||
+       problem.GetOutputGradDesc().GetNumDims() != 4)
+    {
+        return false;
+    }
+    if(!IsOverRocmBwd2d(problem))
+    {
+        return false;
+    }
+    if(!(problem.GetInputGradDesc().GetType() == miopenFloat ||
+         problem.GetInputGradDesc().GetType() == miopenHalf ||
+         problem.GetInputGradDesc().GetType() == miopenBFloat16))
+        return false;
+    return true;
+}
+
+ConvSolution AdaptiveAvgPoolBackward2d::GetSolution(
+    const ExecutionContext& context,
+    const miopen::adaptiveavgpool::BwdProblemDescription& problem) const
+{
+    std::ignore = context;
+
+    auto result       = ConvSolution{miopenStatusSuccess};
+    auto input_dtype  = miopen::GetDataType(problem.GetOutputGradDesc().GetType());
+    auto output_dtype = miopen::GetDataType(problem.GetInputGradDesc().GetType());
+    auto dtype        = problem.GetInputGradDesc().GetType();
+    uint64_t N_total  = problem.GetNtotal();
+
+    auto build_params = KernelBuildParameters{
+        {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)},
+        {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)},
+        {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)},
+        {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)},
+        {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype},
+        {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}};
+
+    result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_BWD_2D},
+                                                         {N_total},
+                                                         "MIOpenAdaptiveAvgPool.cpp",
+                                                         "AdaptiveAvgPoolBackward2d",
+                                                         build_params));
+
+    result.invoker_factory = [](const std::vector<Kernel>& kernels) {
+        return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+            decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::BwdInvokeParams>();
+
+            decltype(auto) kernel = handle_.Run(kernels.front());
+
+            auto input_grad_tv  = get_inner_expanded_tv<4>(deref(params.inputGradDesc));
+            auto output_grad_tv = get_inner_expanded_tv<4>(deref(params.outputGradDesc));
+
+            uint64_t N  = deref(params.inputGradDesc).GetLengths()[0];
+            uint64_t C  = deref(params.inputGradDesc).GetLengths()[1];
+            uint64_t H  = deref(params.inputGradDesc).GetLengths()[2];
+            uint64_t W  = deref(params.inputGradDesc).GetLengths()[3];
+            uint64_t OH = deref(params.outputGradDesc).GetLengths()[2];
+            uint64_t OW = deref(params.outputGradDesc).GetLengths()[3];
+
+            kernel(params.output_grad,
+                   params.input_grad,
+                   N,
+                   C,
+                   H,
+                   W,
+                   OH,
+                   OW,
+                   output_grad_tv,
+                   input_grad_tv);
+        };
+    };
+
+    return result;
+}
+
+} // namespace adaptiveavgpool
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/src/solver/adaptiveavgpool/backward_adaptiveavgpool_3d.cpp b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_3d.cpp
new file mode 100644
index 0000000000..c16603a530
--- /dev/null
+++ b/src/solver/adaptiveavgpool/backward_adaptiveavgpool_3d.cpp
@@ -0,0 +1,150 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/conv_solution.hpp>
+#include <miopen/execution_context.hpp>
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor_view_utils.hpp>
+#include <miopen/adaptiveavgpool/solvers.hpp>
+
+#include <miopen/adaptiveavgpool/invoke_params.hpp>
+#include <miopen/datatype.hpp>
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/target_properties.hpp>
+
+#define LOCAL_SIZE_BWD_3D 256
+
+namespace miopen {
+
+namespace solver {
+
+namespace adaptiveavgpool {
+
+namespace {
+
+bool IsOverRocmBwd3d(const miopen::adaptiveavgpool::BwdProblemDescription& problem)
+{
+    if(!problem.IsAllContiguous())
+    {
+        return true;
+    }
+    else
+    {
+        if((problem.GetInputGradDesc().GetElementSize() /
+            problem.GetOutputGradDesc().GetElementSize()) == 1)
+            return true;
+    }
+    return false;
+}
+
+} // namespace
+
+bool AdaptiveAvgPoolBackward3d::IsApplicable(
+    const ExecutionContext&, const miopen::adaptiveavgpool::BwdProblemDescription& problem) const
+{
+    if(problem.GetInputGradDesc().GetNumDims() != 5 ||
+       problem.GetOutputGradDesc().GetNumDims() != 5)
+    {
+        return false;
+    }
+    if(!IsOverRocmBwd3d(problem))
+    {
+        return false;
+    }
+    if(!(problem.GetInputGradDesc().GetType() == miopenFloat ||
+         problem.GetInputGradDesc().GetType() == miopenHalf ||
+         problem.GetInputGradDesc().GetType() == miopenBFloat16))
+        return false;
+    return true;
+}
+
+ConvSolution AdaptiveAvgPoolBackward3d::GetSolution(
+    const ExecutionContext& context,
+    const miopen::adaptiveavgpool::BwdProblemDescription& problem) const
+{
+    std::ignore = context;
+
+    auto result       = ConvSolution{miopenStatusSuccess};
+    auto input_dtype  = miopen::GetDataType(problem.GetOutputGradDesc().GetType());
+    auto output_dtype = miopen::GetDataType(problem.GetInputGradDesc().GetType());
+    auto dtype        = problem.GetInputGradDesc().GetType();
+    uint64_t N_total  = problem.GetNtotal();
+
+    auto build_params = KernelBuildParameters{
+        {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)},
+        {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)},
+        {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)},
+        {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)},
+        {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype},
+        {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}};
+
+    result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_BWD_3D},
+                                                         {N_total},
+                                                         "MIOpenAdaptiveAvgPool.cpp",
+                                                         "AdaptiveAvgPoolBackward3d",
+                                                         build_params));
+
+    result.invoker_factory = [](const std::vector<Kernel>& kernels) {
+        return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+            decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::BwdInvokeParams>();
+
+            decltype(auto) kernel = handle_.Run(kernels.front());
+
+            auto input_grad_tv  = get_inner_expanded_tv<5>(deref(params.inputGradDesc));
+            auto output_grad_tv = get_inner_expanded_tv<5>(deref(params.outputGradDesc));
+
+            uint64_t N  = deref(params.inputGradDesc).GetLengths()[0];
+            uint64_t C  = deref(params.inputGradDesc).GetLengths()[1];
+            uint64_t D  = deref(params.inputGradDesc).GetLengths()[2];
+            uint64_t H  = deref(params.inputGradDesc).GetLengths()[3];
+            uint64_t W  = deref(params.inputGradDesc).GetLengths()[4];
+            uint64_t OD = deref(params.outputGradDesc).GetLengths()[2];
+            uint64_t OH = deref(params.outputGradDesc).GetLengths()[3];
+            uint64_t OW = deref(params.outputGradDesc).GetLengths()[4];
+
+            kernel(params.output_grad,
+                   params.input_grad,
+                   N,
+                   C,
+                   D,
+                   H,
+                   W,
+                   OD,
+                   OH,
+                   OW,
+                   output_grad_tv,
+                   input_grad_tv);
+        };
+    };
+
+    return result;
+}
+
+} // namespace adaptiveavgpool
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/src/solver/adaptiveavgpool/forward_adaptiveavgpool_1d.cpp b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_1d.cpp
new file mode 100644
index 0000000000..f31d80c8be
--- /dev/null
+++ b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_1d.cpp
@@ -0,0 +1,132 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/conv_solution.hpp>
+#include <miopen/execution_context.hpp>
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor_view_utils.hpp>
+#include <miopen/adaptiveavgpool/solvers.hpp>
+
+#include <miopen/adaptiveavgpool/invoke_params.hpp>
+#include <miopen/datatype.hpp>
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/target_properties.hpp>
+
+#define LOCAL_SIZE_FWD_1D 256
+
+namespace miopen {
+
+namespace solver {
+
+namespace adaptiveavgpool {
+
+namespace {
+
+bool IsOverRocmFwd1d(const miopen::adaptiveavgpool::FwdProblemDescription& problem)
+{
+    auto in_nelems   = problem.GetInputDesc().GetLengths()[-1];
+    auto out_nelems  = problem.GetOutputDesc().GetLengths()[-1];
+    auto in_over_out = static_cast<float>(in_nelems) / out_nelems;
+
+    if(in_over_out < 56)
+    {
+        return true;
+    }
+    return false;
+}
+
+} // namespace
+
+bool AdaptiveAvgPoolForward1d::IsApplicable(
+    const ExecutionContext&, const miopen::adaptiveavgpool::FwdProblemDescription& problem) const
+{
+    if(problem.GetInputDesc().GetNumDims() != 3 || problem.GetOutputDesc().GetNumDims() != 3)
+    {
+        return false;
+    }
+    if(!IsOverRocmFwd1d(problem))
+    {
+        return false;
+    }
+    if(!(problem.GetInputDesc().GetType() == miopenFloat ||
+         problem.GetInputDesc().GetType() == miopenHalf ||
+         problem.GetInputDesc().GetType() == miopenBFloat16))
+        return false;
+    return true;
+}
+
+ConvSolution AdaptiveAvgPoolForward1d::GetSolution(
+    const ExecutionContext& context,
+    const miopen::adaptiveavgpool::FwdProblemDescription& problem) const
+{
+    std::ignore = context;
+
+    auto result       = ConvSolution{miopenStatusSuccess};
+    auto input_dtype  = miopen::GetDataType(problem.GetInputDesc().GetType());
+    auto output_dtype = miopen::GetDataType(problem.GetOutputDesc().GetType());
+    auto dtype        = problem.GetOutputDesc().GetType();
+    uint64_t N_total  = problem.GetNtotal();
+
+    auto build_params = KernelBuildParameters{
+        {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)},
+        {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)},
+        {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)},
+        {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)},
+        {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype},
+        {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}};
+
+    result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_FWD_1D},
+                                                         {N_total},
+                                                         "MIOpenAdaptiveAvgPool.cpp",
+                                                         "AdaptiveAvgPoolForward1d",
+                                                         build_params));
+
+    result.invoker_factory = [](const std::vector<Kernel>& kernels) {
+        return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+            decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::FwdInvokeParams>();
+
+            decltype(auto) kernel = handle_.Run(kernels.front());
+
+            auto input_tv  = get_inner_expanded_tv<3>(deref(params.inputDesc));
+            auto output_tv = get_inner_expanded_tv<3>(deref(params.outputDesc));
+
+            uint64_t N  = deref(params.inputDesc).GetLengths()[0];
+            uint64_t C  = deref(params.inputDesc).GetLengths()[1];
+            uint64_t H  = deref(params.inputDesc).GetLengths()[2];
+            uint64_t OH = deref(params.outputDesc).GetLengths()[2];
+
+            kernel(params.input, params.output, N, C, H, OH, input_tv, output_tv);
+        };
+    };
+
+    return result;
+}
+
+} // namespace adaptiveavgpool
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/src/solver/adaptiveavgpool/forward_adaptiveavgpool_2d.cpp b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_2d.cpp
new file mode 100644
index 0000000000..344071a3a4
--- /dev/null
+++ b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_2d.cpp
@@ -0,0 +1,140 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/conv_solution.hpp>
+#include <miopen/execution_context.hpp>
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor_view_utils.hpp>
+#include <miopen/adaptiveavgpool/solvers.hpp>
+
+#include <miopen/adaptiveavgpool/invoke_params.hpp>
+#include <miopen/datatype.hpp>
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/target_properties.hpp>
+
+#define LOCAL_SIZE_FWD_2D 256
+
+namespace miopen {
+
+namespace solver {
+
+namespace adaptiveavgpool {
+
+namespace {
+
+bool IsOverRocmFwd2d(const miopen::adaptiveavgpool::FwdProblemDescription& problem)
+{
+    auto in_nelems   = problem.GetInputDesc().GetElementSize();
+    auto out_nelems  = problem.GetOutputDesc().GetElementSize();
+    auto in_over_out = static_cast<float>(in_nelems) / out_nelems;
+
+    if(problem.IsAllContiguous())
+    {
+        if((in_over_out < 13) || (in_over_out >= 100 && in_over_out <= 112))
+            return true;
+    }
+    else
+    {
+        if(in_over_out < 248)
+            return true;
+    }
+    return false;
+}
+
+} // namespace
+
+bool AdaptiveAvgPoolForward2d::IsApplicable(
+    const ExecutionContext&, const miopen::adaptiveavgpool::FwdProblemDescription& problem) const
+{
+    if(problem.GetInputDesc().GetNumDims() != 4 || problem.GetOutputDesc().GetNumDims() != 4)
+    {
+        return false;
+    }
+    if(!IsOverRocmFwd2d(problem))
+    {
+        return false;
+    }
+    if(!(problem.GetInputDesc().GetType() == miopenFloat ||
+         problem.GetInputDesc().GetType() == miopenHalf ||
+         problem.GetInputDesc().GetType() == miopenBFloat16))
+        return false;
+    return true;
+}
+
+ConvSolution AdaptiveAvgPoolForward2d::GetSolution(
+    const ExecutionContext& context,
+    const miopen::adaptiveavgpool::FwdProblemDescription& problem) const
+{
+    std::ignore = context;
+
+    auto result       = ConvSolution{miopenStatusSuccess};
+    auto input_dtype  = miopen::GetDataType(problem.GetInputDesc().GetType());
+    auto output_dtype = miopen::GetDataType(problem.GetOutputDesc().GetType());
+    auto dtype        = problem.GetOutputDesc().GetType();
+    uint64_t N_total  = problem.GetNtotal();
+
+    auto build_params = KernelBuildParameters{
+        {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)},
+        {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)},
+        {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)},
+        {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)},
+        {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype},
+        {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}};
+
+    result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_FWD_2D},
+                                                         {N_total},
+                                                         "MIOpenAdaptiveAvgPool.cpp",
+                                                         "AdaptiveAvgPoolForward2d",
+                                                         build_params));
+
+    result.invoker_factory = [](const std::vector<Kernel>& kernels) {
+        return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+            decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::FwdInvokeParams>();
+
+            decltype(auto) kernel = handle_.Run(kernels.front());
+
+            auto input_tv  = get_inner_expanded_tv<4>(deref(params.inputDesc));
+            auto output_tv = get_inner_expanded_tv<4>(deref(params.outputDesc));
+
+            uint64_t N  = deref(params.inputDesc).GetLengths()[0];
+            uint64_t C  = deref(params.inputDesc).GetLengths()[1];
+            uint64_t H  = deref(params.inputDesc).GetLengths()[2];
+            uint64_t W  = deref(params.inputDesc).GetLengths()[3];
+            uint64_t OH = deref(params.outputDesc).GetLengths()[2];
+            uint64_t OW = deref(params.outputDesc).GetLengths()[3];
+
+            kernel(params.input, params.output, N, C, H, W, OH, OW, input_tv, output_tv);
+        };
+    };
+
+    return result;
+}
+
+} // namespace adaptiveavgpool
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/src/solver/adaptiveavgpool/forward_adaptiveavgpool_3d.cpp b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_3d.cpp
new file mode 100644
index 0000000000..3c4fcf552f
--- /dev/null
+++ b/src/solver/adaptiveavgpool/forward_adaptiveavgpool_3d.cpp
@@ -0,0 +1,142 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/conv_solution.hpp>
+#include <miopen/execution_context.hpp>
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor_view_utils.hpp>
+#include <miopen/adaptiveavgpool/solvers.hpp>
+
+#include <miopen/adaptiveavgpool/invoke_params.hpp>
+#include <miopen/datatype.hpp>
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/target_properties.hpp>
+
+#define LOCAL_SIZE_FWD_3D 256
+
+namespace miopen {
+
+namespace solver {
+
+namespace adaptiveavgpool {
+
+namespace {
+
+bool IsOverRocmFwd3d(const miopen::adaptiveavgpool::FwdProblemDescription& problem)
+{
+    auto in_nelems   = problem.GetInputDesc().GetElementSize();
+    auto out_nelems  = problem.GetOutputDesc().GetElementSize();
+    auto in_over_out = static_cast<float>(in_nelems) / out_nelems;
+
+    if(problem.IsAllContiguous())
+    {
+        if(in_over_out <= 98)
+            return true;
+    }
+    else
+    {
+        if(in_over_out < 8000)
+            return true;
+    }
+    return false;
+}
+
+} // namespace
+
+bool AdaptiveAvgPoolForward3d::IsApplicable(
+    const ExecutionContext&, const miopen::adaptiveavgpool::FwdProblemDescription& problem) const
+{
+    if(problem.GetInputDesc().GetNumDims() != 5 || problem.GetOutputDesc().GetNumDims() != 5)
+    {
+        return false;
+    }
+    if(!IsOverRocmFwd3d(problem))
+    {
+        return false;
+    }
+    if(!(problem.GetInputDesc().GetType() == miopenFloat ||
+         problem.GetInputDesc().GetType() == miopenHalf ||
+         problem.GetInputDesc().GetType() == miopenBFloat16))
+        return false;
+    return true;
+}
+
+ConvSolution AdaptiveAvgPoolForward3d::GetSolution(
+    const ExecutionContext& context,
+    const miopen::adaptiveavgpool::FwdProblemDescription& problem) const
+{
+    std::ignore = context;
+
+    auto result       = ConvSolution{miopenStatusSuccess};
+    auto input_dtype  = miopen::GetDataType(problem.GetInputDesc().GetType());
+    auto output_dtype = miopen::GetDataType(problem.GetOutputDesc().GetType());
+    auto dtype        = problem.GetOutputDesc().GetType();
+    uint64_t N_total  = problem.GetNtotal();
+
+    auto build_params = KernelBuildParameters{
+        {"MIOPEN_USE_FP16", static_cast<int>(dtype == miopenHalf)},
+        {"MIOPEN_USE_FP32", static_cast<int>(dtype == miopenFloat)},
+        {"MIOPEN_USE_FP64", static_cast<int>(dtype == miopenDouble)},
+        {"MIOPEN_USE_BFP16", static_cast<int>(dtype == miopenBFloat16)},
+        {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype},
+        {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}};
+
+    result.construction_params.push_back(make_hip_kernel({LOCAL_SIZE_FWD_3D},
+                                                         {N_total},
+                                                         "MIOpenAdaptiveAvgPool.cpp",
+                                                         "AdaptiveAvgPoolForward3d",
+                                                         build_params));
+
+    result.invoker_factory = [](const std::vector<Kernel>& kernels) {
+        return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+            decltype(auto) params = raw_params.CastTo<miopen::adaptiveavgpool::FwdInvokeParams>();
+
+            decltype(auto) kernel = handle_.Run(kernels.front());
+
+            auto input_tv  = get_inner_expanded_tv<5>(deref(params.inputDesc));
+            auto output_tv = get_inner_expanded_tv<5>(deref(params.outputDesc));
+
+            uint64_t N  = deref(params.inputDesc).GetLengths()[0];
+            uint64_t C  = deref(params.inputDesc).GetLengths()[1];
+            uint64_t D  = deref(params.inputDesc).GetLengths()[2];
+            uint64_t H  = deref(params.inputDesc).GetLengths()[3];
+            uint64_t W  = deref(params.inputDesc).GetLengths()[4];
+            uint64_t OD = deref(params.outputDesc).GetLengths()[2];
+            uint64_t OH = deref(params.outputDesc).GetLengths()[3];
+            uint64_t OW = deref(params.outputDesc).GetLengths()[4];
+
+            kernel(params.input, params.output, N, C, D, H, W, OD, OH, OW, input_tv, output_tv);
+        };
+    };
+
+    return result;
+}
+
+} // namespace adaptiveavgpool
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/test/cpu_adaptiveavgpool.hpp b/test/cpu_adaptiveavgpool.hpp
new file mode 100644
index 0000000000..462cbda67c
--- /dev/null
+++ b/test/cpu_adaptiveavgpool.hpp
@@ -0,0 +1,263 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#include "tensor_holder.hpp"
+#include <miopen/tensor_view_utils.hpp>
+#include "ford.hpp"
+
+template <class T>
+void cpu_adaptiveavgpool_forward_1d(
+    tensor<T> input, tensor<T>& output, size_t C, size_t H, size_t OH)
+{
+    auto numel = output.desc.GetElementSize();
+
+    auto input_tv  = miopen::get_inner_expanded_tv<3>(input.desc);
+    auto output_tv = miopen::get_inner_expanded_tv<3>(output.desc);
+
+    par_ford(numel)([&](size_t gid) {
+        size_t nc = gid / OH, oh = gid % OH;
+        size_t n = nc / C, c = nc % C;
+
+        size_t h  = oh * H / OH;
+        size_t kh = (((oh + 1) * H + OH - 1) / OH) - h;
+
+        float sum = 0;
+        for(size_t ih = h; ih < (h + kh); ++ih)
+        {
+            sum += static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, ih})]);
+        }
+
+        output[output_tv.get_tensor_view_idx({n, c, oh})] = static_cast<T>(sum / kh);
+    });
+}
+
+template <class T>
+void cpu_adaptiveavgpool_forward_2d(
+    tensor<T> input, tensor<T>& output, size_t C, size_t H, size_t W, size_t OH, size_t OW)
+{
+    auto numel = output.desc.GetElementSize();
+
+    auto input_tv  = miopen::get_inner_expanded_tv<4>(input.desc);
+    auto output_tv = miopen::get_inner_expanded_tv<4>(output.desc);
+
+    par_ford(numel)([&](size_t gid) {
+        size_t ncoh = gid / OW, ow = gid % OW;
+        size_t nc = ncoh / OH, oh = ncoh % OH;
+        size_t n = nc / C, c = nc % C;
+
+        size_t h  = (oh * H) / OH;
+        size_t kh = (((oh + 1) * H + OH - 1) / OH) - h;
+
+        size_t w  = (ow * W) / OW;
+        size_t kw = (((ow + 1) * W + OW - 1) / OW) - w;
+
+        float divider = static_cast<float>(kh * kw);
+        float sum     = 0;
+        for(size_t ih = h; ih < (h + kh); ++ih)
+        {
+            for(size_t iw = w; iw < (w + kw); ++iw)
+            {
+                sum += static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, ih, iw})]);
+            }
+        }
+
+        output[output_tv.get_tensor_view_idx({n, c, oh, ow})] = static_cast<T>(sum / divider);
+    });
+}
+
+template <class T>
+void cpu_adaptiveavgpool_forward_3d(tensor<T> input,
+                                    tensor<T>& output,
+                                    size_t C,
+                                    size_t D,
+                                    size_t H,
+                                    size_t W,
+                                    size_t OD,
+                                    size_t OH,
+                                    size_t OW)
+{
+    auto numel = output.desc.GetElementSize();
+
+    auto input_tv  = miopen::get_inner_expanded_tv<5>(input.desc);
+    auto output_tv = miopen::get_inner_expanded_tv<5>(output.desc);
+
+    par_ford(numel)([&](size_t gid) {
+        size_t ncodoh = gid / OW, ow = gid % OW;
+        size_t ncod = ncodoh / OH, oh = ncodoh % OH;
+        size_t nc = ncod / OD, od = ncod % OD;
+        size_t n = nc / C, c = nc % C;
+
+        size_t d  = (od * D) / OD;
+        size_t kd = ((od + 1) * D + OD - 1) / OD - d;
+
+        size_t h  = (oh * H) / OH;
+        size_t kh = ((oh + 1) * H + OH - 1) / OH - h;
+
+        size_t w  = (ow * W) / OW;
+        size_t kw = ((ow + 1) * W + OW - 1) / OW - w;
+
+        float sum = 0;
+        for(size_t id = d; id < (d + kd); ++id)
+        {
+            for(size_t ih = h; ih < (h + kh); ++ih)
+            {
+                for(size_t iw = w; iw < (w + kw); ++iw)
+                {
+                    sum +=
+                        static_cast<float>(input[input_tv.get_tensor_view_idx({n, c, id, ih, iw})]);
+                }
+            }
+        }
+
+        output[output_tv.get_tensor_view_idx({n, c, od, oh, ow})] =
+            static_cast<T>(sum / (kd * kh * kw));
+    });
+}
+
+template <class T>
+void cpu_adaptiveavgpool_backward_1d(
+    tensor<T> output_grad, tensor<T>& input_grad, size_t C, size_t H, size_t OH)
+{
+    auto numel = input_grad.desc.GetElementSize();
+
+    auto output_grad_tv = miopen::get_inner_expanded_tv<3>(output_grad.desc);
+    auto input_grad_tv  = miopen::get_inner_expanded_tv<3>(input_grad.desc);
+
+    par_ford(numel)([&](size_t gid) {
+        size_t nc = gid / H, h = gid % H;
+        size_t n = nc / C, c = nc % C;
+
+        size_t oh  = (h * OH) / H;
+        size_t koh = (((h + 1) * OH + H - 1) / H) - oh;
+
+        float grad = 0;
+        for(size_t ih = oh; ih < (oh + koh); ++ih)
+        {
+            size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+            grad +=
+                static_cast<float>(output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih})]) /
+                kh;
+        }
+
+        input_grad[input_grad_tv.get_tensor_view_idx({n, c, h})] = static_cast<T>(grad);
+    });
+}
+
+template <class T>
+void cpu_adaptiveavgpool_backward_2d(tensor<T> output_grad,
+                                     tensor<T>& input_grad,
+                                     size_t C,
+                                     size_t H,
+                                     size_t W,
+                                     size_t OH,
+                                     size_t OW)
+{
+    auto numel = input_grad.desc.GetElementSize();
+
+    auto output_grad_tv = miopen::get_inner_expanded_tv<4>(output_grad.desc);
+    auto input_grad_tv  = miopen::get_inner_expanded_tv<4>(input_grad.desc);
+
+    par_ford(numel)([&](size_t gid) {
+        size_t nch = gid / W, w = gid % W;
+        size_t nc = nch / H, h = nch % H;
+        size_t n = nc / C, c = nc % C;
+
+        size_t oh  = (h * OH) / H;
+        size_t koh = ((h + 1) * OH + H - 1) / H - oh;
+
+        size_t ow  = (w * OW) / W;
+        size_t kow = ((w + 1) * OW + W - 1) / W - ow;
+
+        float grad = 0;
+        for(size_t ih = oh; ih < (oh + koh); ++ih)
+        {
+            size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+            for(size_t iw = ow; iw < (ow + kow); ++iw)
+            {
+                size_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW;
+                grad += static_cast<float>(
+                            output_grad[output_grad_tv.get_tensor_view_idx({n, c, ih, iw})]) /
+                        (kh * kw);
+            }
+        }
+
+        input_grad[input_grad_tv.get_tensor_view_idx({n, c, h, w})] = static_cast<T>(grad);
+    });
+}
+
+template <class T>
+void cpu_adaptiveavgpool_backward_3d(tensor<T> output_grad,
+                                     tensor<T>& input_grad,
+                                     size_t C,
+                                     size_t D,
+                                     size_t H,
+                                     size_t W,
+                                     size_t OD,
+                                     size_t OH,
+                                     size_t OW)
+{
+    auto numel = input_grad.desc.GetElementSize();
+
+    auto output_grad_tv = miopen::get_inner_expanded_tv<5>(output_grad.desc);
+    auto input_grad_tv  = miopen::get_inner_expanded_tv<5>(input_grad.desc);
+
+    par_ford(numel)([&](size_t gid) {
+        size_t ncdh = gid / W, w = gid % W;
+        size_t ncd = ncdh / H, h = ncdh % H;
+        size_t nc = ncd / D, d = ncd % D;
+        size_t n = nc / C, c = nc % C;
+
+        size_t od  = (d * OD) / D;
+        size_t kod = ((d + 1) * OD + D - 1) / D - od;
+
+        size_t oh  = (h * OH) / H;
+        size_t koh = ((h + 1) * OH + H - 1) / H - oh;
+
+        size_t ow  = (w * OW) / W;
+        size_t kow = ((w + 1) * OW + W - 1) / W - ow;
+
+        float grad = 0;
+        for(size_t id = od; id < (od + kod); ++id)
+        {
+            size_t kd = ((id + 1) * D + OD - 1) / OD - (id * D) / OD;
+            for(size_t ih = oh; ih < (oh + koh); ++ih)
+            {
+                size_t kh = ((ih + 1) * H + OH - 1) / OH - (ih * H) / OH;
+                for(size_t iw = ow; iw < (ow + kow); ++iw)
+                {
+                    size_t kw = ((iw + 1) * W + OW - 1) / OW - (iw * W) / OW;
+                    grad +=
+                        static_cast<float>(
+                            output_grad[output_grad_tv.get_tensor_view_idx({n, c, id, ih, iw})]) /
+                        (kd * kh * kw);
+                }
+            }
+        }
+
+        input_grad[input_grad_tv.get_tensor_view_idx({n, c, d, h, w})] = static_cast<T>(grad);
+    });
+}
diff --git a/test/gtest/adaptiveavgpool.cpp b/test/gtest/adaptiveavgpool.cpp
new file mode 100644
index 0000000000..b09c286b15
--- /dev/null
+++ b/test/gtest/adaptiveavgpool.cpp
@@ -0,0 +1,94 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include "adaptiveavgpool.hpp"
+#include "gtest/gtest.h"
+using float16 = half_float::half;
+
+// FORWARD TEST
+using GPU_AdaptiveAvgPool_fwd_FP32  = AdaptiveAvgPoolTestFwd<float>;
+using GPU_AdaptiveAvgPool_fwd_FP16  = AdaptiveAvgPoolTestFwd<float16>;
+using GPU_AdaptiveAvgPool_fwd_BFP16 = AdaptiveAvgPoolTestFwd<bfloat16>;
+
+TEST_P(GPU_AdaptiveAvgPool_fwd_FP32, AdaptiveAvgPoolTestFwd)
+{
+    RunTest();
+    Verify();
+};
+
+TEST_P(GPU_AdaptiveAvgPool_fwd_FP16, AdaptiveAvgPoolTestFwd)
+{
+    RunTest();
+    Verify();
+};
+
+TEST_P(GPU_AdaptiveAvgPool_fwd_BFP16, AdaptiveAvgPoolTestFwd)
+{
+    RunTest();
+    Verify();
+};
+
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_AdaptiveAvgPool_fwd_FP32,
+                         testing::ValuesIn(AdaptiveAvgPoolTestConfigsFwdFp32()));
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_AdaptiveAvgPool_fwd_FP16,
+                         testing::ValuesIn(AdaptiveAvgPoolTestConfigsFwdFp16()));
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_AdaptiveAvgPool_fwd_BFP16,
+                         testing::ValuesIn(AdaptiveAvgPoolTestConfigsFwdBfp16()));
+
+// BACKWARD TEST
+using GPU_AdaptiveAvgPool_bwd_FP32  = AdaptiveAvgPoolTestBwd<float>;
+using GPU_AdaptiveAvgPool_bwd_FP16  = AdaptiveAvgPoolTestBwd<float16>;
+using GPU_AdaptiveAvgPool_bwd_BFP16 = AdaptiveAvgPoolTestBwd<bfloat16>;
+
+TEST_P(GPU_AdaptiveAvgPool_bwd_FP32, AdaptiveAvgPoolTestBwd)
+{
+    RunTest();
+    Verify();
+};
+
+TEST_P(GPU_AdaptiveAvgPool_bwd_FP16, AdaptiveAvgPoolTestBwd)
+{
+    RunTest();
+    Verify();
+};
+
+TEST_P(GPU_AdaptiveAvgPool_bwd_BFP16, AdaptiveAvgPoolTestBwd)
+{
+    RunTest();
+    Verify();
+};
+
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_AdaptiveAvgPool_bwd_FP32,
+                         testing::ValuesIn(AdaptiveAvgPoolTestConfigsBwdFp32()));
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_AdaptiveAvgPool_bwd_FP16,
+                         testing::ValuesIn(AdaptiveAvgPoolTestConfigsBwdFp16()));
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_AdaptiveAvgPool_bwd_BFP16,
+                         testing::ValuesIn(AdaptiveAvgPoolTestConfigsBwdBfp16()));
diff --git a/test/gtest/adaptiveavgpool.hpp b/test/gtest/adaptiveavgpool.hpp
new file mode 100644
index 0000000000..cf2e1fa5dd
--- /dev/null
+++ b/test/gtest/adaptiveavgpool.hpp
@@ -0,0 +1,378 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include "cpu_adaptiveavgpool.hpp"
+#include "get_handle.hpp"
+#include "tensor_holder.hpp"
+#include "verify.hpp"
+#include <gtest/gtest.h>
+#include <iostream>
+#include <miopen/adaptiveavgpool.hpp>
+#include <miopen/miopen.h>
+#include <vector>
+
+template <class T>
+inline std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
+{
+    os << '{';
+    for(size_t i = 0; i < v.size(); ++i)
+    {
+        if(i != 0)
+            os << ',';
+        os << v[i];
+    }
+    os << '}';
+    return os;
+}
+
+struct AdaptiveAvgPoolTestCase
+{
+    std::vector<size_t> input_dims;
+    std::vector<size_t> output_dims;
+    bool is_contiguous = true;
+
+    friend std::ostream& operator<<(std::ostream& os, const AdaptiveAvgPoolTestCase& tc)
+    {
+        return os << " input_dims:" << tc.input_dims << " output_dims:" << tc.output_dims
+                  << "is_contiguous:" << tc.is_contiguous;
+    }
+
+    std::vector<size_t> GetInput() const { return input_dims; }
+    std::vector<size_t> GetOutput() const { return output_dims; }
+
+    std::vector<size_t> ComputeStrides(std::vector<size_t> inputDim) const
+    {
+        if(!is_contiguous)
+            std::swap(inputDim.front(), inputDim.back());
+        std::vector<size_t> strides(inputDim.size());
+        strides.back() = 1;
+        for(int i = inputDim.size() - 2; i >= 0; --i)
+            strides[i] = strides[i + 1] * inputDim[i + 1];
+        if(!is_contiguous)
+            std::swap(strides.front(), strides.back());
+        return strides;
+    }
+};
+
+inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsFwdFp32()
+{
+    return {
+        {{64, 768, 17}, {10}, false},
+        {{64, 768, 17}, {10}, true},
+        {{64, 78, 17, 17}, {10, 10}, false},
+        {{64, 78, 17, 17}, {10, 10}, true},
+        {{6, 18, 18, 18, 18}, {5, 5, 5}, false},
+        {{6, 18, 18, 18, 18}, {5, 5, 5}, true},
+    };
+}
+
+inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsFwdFp16()
+{
+    return {
+        {{64, 768, 17}, {10}, false},
+        {{64, 768, 17}, {10}, true},
+        {{64, 78, 17, 17}, {10, 10}, false},
+        {{64, 78, 17, 17}, {10, 10}, true},
+        {{6, 18, 18, 18, 18}, {5, 5, 5}, false},
+        {{6, 18, 18, 18, 18}, {5, 5, 5}, true},
+    };
+}
+
+inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsFwdBfp16()
+{
+    return {
+        {{64, 768, 17}, {10}, false},
+        {{64, 768, 17}, {10}, true},
+        {{64, 78, 17, 17}, {10, 10}, false},
+        {{64, 78, 17, 17}, {10, 10}, true},
+        {{6, 18, 18, 18, 18}, {5, 5, 5}, false},
+        {{6, 18, 18, 18, 18}, {5, 5, 5}, true},
+    };
+}
+
+inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsBwdFp32()
+{
+    return {
+        {{64, 768, 17}, {10}, false},
+        {{64, 768, 17}, {10}, true},
+        {{64, 206, 17, 17}, {10, 10}, false},
+        {{6, 18, 18, 18, 18}, {5, 5, 5}, false},
+        {{6, 18, 18, 18, 18}, {18, 18, 18}, true},
+    };
+}
+
+inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsBwdFp16()
+{
+    return {
+        {{64, 768, 17}, {10}, false},
+        {{64, 768, 17}, {10}, true},
+        {{64, 28, 35, 35}, {35, 35}, false},
+        {{6, 28, 35, 35, 35}, {10, 10, 10}, false},
+        {{6, 28, 35, 35, 35}, {35, 35, 35}, true},
+    };
+}
+
+inline std::vector<AdaptiveAvgPoolTestCase> AdaptiveAvgPoolTestConfigsBwdBfp16()
+{
+    return {
+        {{64, 768, 17}, {10}, false},
+        {{64, 768, 17}, {10}, true},
+        {{64, 208, 9, 9}, {7, 7}, false},
+        {{6, 18, 12, 12, 12}, {5, 5, 5}, false},
+        {{6, 18, 12, 12, 12}, {12, 12, 12}, true},
+    };
+}
+
+// FORWARD TEST
+template <typename T = float>
+struct AdaptiveAvgPoolTestFwd : public ::testing::TestWithParam<AdaptiveAvgPoolTestCase>
+{
+protected:
+    void SetUp() override
+    {
+        auto&& handle                     = get_handle();
+        adaptiveavgpool_config            = GetParam();
+        auto in_dim                       = adaptiveavgpool_config.GetInput();
+        auto in_strides                   = adaptiveavgpool_config.ComputeStrides(in_dim);
+        auto out_dim                      = adaptiveavgpool_config.GetOutput();
+        N                                 = in_dim[0];
+        C                                 = in_dim[1];
+        std::vector<size_t> out_dim_final = {N, C};
+        if(in_dim.size() == 3)
+        {
+            D = 1;
+            H = in_dim[2];
+            W = 1;
+
+            OD = 1;
+            OH = out_dim[0];
+            OW = 1;
+            out_dim_final.push_back(OH);
+        }
+        else if(in_dim.size() == 4)
+        {
+            D = 1;
+            H = in_dim[2];
+            W = in_dim[3];
+
+            OD = 1;
+            OH = out_dim[0];
+            OW = out_dim[1];
+            out_dim_final.push_back(OH);
+            out_dim_final.push_back(OW);
+        }
+        else if(in_dim.size() == 5)
+        {
+            D = in_dim[2];
+            H = in_dim[3];
+            W = in_dim[4];
+
+            OD = out_dim[0];
+            OH = out_dim[1];
+            OW = out_dim[2];
+            out_dim_final.push_back(OD);
+            out_dim_final.push_back(OH);
+            out_dim_final.push_back(OW);
+        }
+
+        auto gen_input_value = [](auto...) {
+            return prng::gen_A_to_B<T>(static_cast<T>(-10.0f), static_cast<T>(10.0f));
+        };
+        input = tensor<T>{in_dim, in_strides}.generate(gen_input_value);
+
+        output = tensor<T>{out_dim_final};
+        std::fill(output.begin(), output.end(), std::numeric_limits<T>::quiet_NaN());
+
+        ref_output = tensor<T>{out_dim_final};
+        std::fill(ref_output.begin(), ref_output.end(), std::numeric_limits<T>::quiet_NaN());
+
+        input_dev  = handle.Write(input.data);
+        output_dev = handle.Write(output.data);
+    }
+
+    void RunTest()
+    {
+        auto&& handle = get_handle();
+        miopenStatus_t status;
+
+        auto dims = input.desc.GetNumDims();
+        if(dims == 3)
+        {
+            cpu_adaptiveavgpool_forward_1d<T>(input, ref_output, C, H, OH);
+        }
+        else if(dims == 4)
+        {
+            cpu_adaptiveavgpool_forward_2d<T>(input, ref_output, C, H, W, OH, OW);
+        }
+        else if(dims == 5)
+        {
+            cpu_adaptiveavgpool_forward_3d<T>(input, ref_output, C, D, H, W, OD, OH, OW);
+        }
+        status = miopen::adaptiveavgpool::AdaptiveAvgPoolForward(
+            handle, input.desc, input_dev.get(), output.desc, output_dev.get());
+        ASSERT_EQ(status, miopenStatusSuccess);
+
+        output.data = handle.Read<T>(output_dev, output.data.size());
+    }
+
+    void Verify()
+    {
+        double threshold = std::numeric_limits<T>::epsilon();
+
+        auto error = miopen::rms_range(ref_output, output);
+
+        ASSERT_EQ(miopen::range_distance(ref_output), miopen::range_distance(output));
+        EXPECT_LT(error, threshold * 10) << "Error forward Output beyond 10xthreshold : " << error
+                                         << " Tolerance: " << threshold * 10;
+    }
+    AdaptiveAvgPoolTestCase adaptiveavgpool_config;
+
+    tensor<T> input;
+    tensor<T> output;
+    tensor<T> ref_output;
+
+    size_t N, C, D, H, W, OD, OH, OW;
+
+    miopen::Allocator::ManageDataPtr input_dev;
+    miopen::Allocator::ManageDataPtr output_dev;
+};
+
+// BACKWARD TEST
+template <typename T = float>
+struct AdaptiveAvgPoolTestBwd : public ::testing::TestWithParam<AdaptiveAvgPoolTestCase>
+{
+protected:
+    void SetUp() override
+    {
+        auto&& handle                          = get_handle();
+        adaptiveavgpool_config                 = GetParam();
+        auto in_grad_dim                       = adaptiveavgpool_config.GetInput();
+        auto out_grad_dim                      = adaptiveavgpool_config.GetOutput();
+        N                                      = in_grad_dim[0];
+        C                                      = in_grad_dim[1];
+        std::vector<size_t> out_grad_dim_final = {N, C};
+
+        if(in_grad_dim.size() == 3)
+        {
+            D = 1;
+            H = in_grad_dim[2];
+            W = 1;
+
+            OD = 1;
+            OH = out_grad_dim[0];
+            OW = 1;
+            out_grad_dim_final.push_back(OH);
+        }
+        else if(in_grad_dim.size() == 4)
+        {
+            D = 1;
+            H = in_grad_dim[2];
+            W = in_grad_dim[3];
+
+            OD = 1;
+            OH = out_grad_dim[0];
+            OW = out_grad_dim[1];
+            out_grad_dim_final.push_back(OH);
+            out_grad_dim_final.push_back(OW);
+        }
+        else if(in_grad_dim.size() == 5)
+        {
+            D = in_grad_dim[2];
+            H = in_grad_dim[3];
+            W = in_grad_dim[4];
+
+            OD = out_grad_dim[0];
+            OH = out_grad_dim[1];
+            OW = out_grad_dim[2];
+            out_grad_dim_final.push_back(OD);
+            out_grad_dim_final.push_back(OH);
+            out_grad_dim_final.push_back(OW);
+        }
+        auto out_grad_strides = adaptiveavgpool_config.ComputeStrides(out_grad_dim_final);
+
+        auto gen_output_grad_value = [](auto...) {
+            return prng::gen_A_to_B<T>(static_cast<T>(-10.0f), static_cast<T>(10.0f));
+        };
+        output_grad =
+            tensor<T>{out_grad_dim_final, out_grad_strides}.generate(gen_output_grad_value);
+
+        input_grad = tensor<T>{in_grad_dim};
+        std::fill(input_grad.begin(), input_grad.end(), std::numeric_limits<T>::quiet_NaN());
+
+        ref_input_grad = tensor<T>{in_grad_dim};
+        std::fill(
+            ref_input_grad.begin(), ref_input_grad.end(), std::numeric_limits<T>::quiet_NaN());
+
+        output_grad_dev = handle.Write(output_grad.data);
+        input_grad_dev  = handle.Write(input_grad.data);
+    }
+
+    void RunTest()
+    {
+        auto&& handle = get_handle();
+
+        miopenStatus_t status;
+
+        auto dims = input_grad.desc.GetNumDims();
+        if(dims == 3)
+        {
+            cpu_adaptiveavgpool_backward_1d<T>(output_grad, ref_input_grad, C, H, OH);
+        }
+        else if(dims == 4)
+        {
+            cpu_adaptiveavgpool_backward_2d<T>(output_grad, ref_input_grad, C, H, W, OH, OW);
+        }
+        else if(dims == 5)
+        {
+            cpu_adaptiveavgpool_backward_3d<T>(output_grad, ref_input_grad, C, D, H, W, OD, OH, OW);
+        }
+        status = miopen::adaptiveavgpool::AdaptiveAvgPoolBackward(
+            handle, output_grad.desc, output_grad_dev.get(), input_grad.desc, input_grad_dev.get());
+
+        ASSERT_EQ(status, miopenStatusSuccess);
+
+        input_grad.data = handle.Read<T>(input_grad_dev, input_grad.data.size());
+    }
+
+    void Verify()
+    {
+        double threshold = std::numeric_limits<T>::epsilon();
+        auto error       = miopen::rms_range(ref_input_grad, input_grad);
+        ASSERT_EQ(miopen::range_distance(ref_input_grad), miopen::range_distance(input_grad));
+        EXPECT_LT(error, threshold * 10)
+            << "Error backward Input Gradient beyond 10xthreshold : " << error
+            << " Tolerance: " << threshold * 10;
+    }
+    AdaptiveAvgPoolTestCase adaptiveavgpool_config;
+
+    tensor<T> output_grad;
+    tensor<T> input_grad;
+    tensor<T> ref_input_grad;
+
+    size_t N, C, D, H, W, OD, OH, OW;
+
+    miopen::Allocator::ManageDataPtr output_grad_dev;
+    miopen::Allocator::ManageDataPtr input_grad_dev;
+};