From 927aff695f507dd0588e0315be64f784fe13ab48 Mon Sep 17 00:00:00 2001
From: huafengchun <huafengchun@gmail.com>
Date: Thu, 21 Dec 2023 15:26:29 +0800
Subject: [PATCH 01/19] Enable AscendC kernel operator

AscendC is an extended syntax for the C/C++ language that can be used to
write operators that run on Ascend NPU.

This commit introduce an operator(threshold) written in AscendC. Others
can refer to this to implement other operators.

AscendC can implement efficient fusion operators according to needs, in
this case, threshold execution speed increased by nearly 4 times.

Co-authored-by: CaoMengqing <cmq0113@163.com>
---
 modules/cannops/CMakeLists.txt                |   6 +
 .../cannops/ascendc_kernels/CMakeLists.txt    |  17 +
 .../ascendc_kernels/kernel_tiling_types.h     |  22 +
 .../threshold_opencv_kernel.cpp               | 387 ++++++++++++++++++
 .../cannops/ascendc_kernels/vector_tiling.h   |  77 ++++
 .../include/opencv2/ascendc_kernels.hpp       |   7 +
 modules/cannops/include/opencv2/cann_call.hpp |  28 +-
 .../cannops/perf/perf_element_operations.cpp  |  20 +
 modules/cannops/src/ascend_mat.cpp            |   6 +-
 modules/cannops/src/cann_call.cpp             |  12 +-
 modules/cannops/src/element_operations.cpp    | 128 +++---
 modules/cannops/src/precomp.hpp               |   1 +
 .../cannops/test/test_element_operations.cpp  |  33 +-
 modules/cannops/test/test_kernel.cpp          |  51 +++
 modules/cannops/test/test_precomp.hpp         |   1 +
 15 files changed, 704 insertions(+), 92 deletions(-)
 create mode 100644 modules/cannops/ascendc_kernels/CMakeLists.txt
 create mode 100644 modules/cannops/ascendc_kernels/kernel_tiling_types.h
 create mode 100644 modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp
 create mode 100644 modules/cannops/ascendc_kernels/vector_tiling.h
 create mode 100644 modules/cannops/include/opencv2/ascendc_kernels.hpp
 create mode 100644 modules/cannops/test/test_kernel.cpp

diff --git a/modules/cannops/CMakeLists.txt b/modules/cannops/CMakeLists.txt
index 0c16c5eb143..557fbe7f492 100644
--- a/modules/cannops/CMakeLists.txt
+++ b/modules/cannops/CMakeLists.txt
@@ -15,3 +15,9 @@ ocv_include_directories(${CMAKE_SOURCE_DIR}/modules/ts/include)
 ocv_add_accuracy_tests(DEPENDS_ON opencv_cannops)
 ocv_add_perf_tests(DEPENDS_ON opencv_cannops)
 ocv_add_samples(opencv_cannops)
+
+# compile ascnedc kernels.
+add_subdirectory(ascendc_kernels)
+ocv_include_directories(${CMAKE_BINARY_DIR}/include/ascendc_kernels)
+ocv_target_link_libraries(opencv_cannops PRIVATE ascendc_kernels)
+ocv_target_link_libraries(opencv_test_cannops PRIVATE ascendc_kernels)
diff --git a/modules/cannops/ascendc_kernels/CMakeLists.txt b/modules/cannops/ascendc_kernels/CMakeLists.txt
new file mode 100644
index 00000000000..c4198e8b8e6
--- /dev/null
+++ b/modules/cannops/ascendc_kernels/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(SOC_VERSION "ascend310p3" CACHE STRING "system on chip type")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package installation directory")
+set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim/cpu")
+
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.")
+endif()
+
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+ascendc_library(ascendc_kernels STATIC
+    threshold_opencv_kernel.cpp
+)
diff --git a/modules/cannops/ascendc_kernels/kernel_tiling_types.h b/modules/cannops/ascendc_kernels/kernel_tiling_types.h
new file mode 100644
index 00000000000..3fbbdd06a63
--- /dev/null
+++ b/modules/cannops/ascendc_kernels/kernel_tiling_types.h
@@ -0,0 +1,22 @@
+#ifndef KERNEL_TILING_H
+#define KERNEL_TILING_H
+
+/*
+ * threshType:
+ *   THRESH_BINARY     = 0,
+ *   THRESH_BINARY_INV = 1,
+ *   THRESH_TRUNC      = 2,
+ *   THRESH_TOZERO     = 3,
+ *   THRESH_TOZERO_INV = 4,
+*/
+#pragma pack(push, 8)
+struct ThresholdOpencvTilingData
+{
+    float maxVal;
+    float thresh;
+    uint32_t totalLength;
+    uint8_t threshType;
+    uint8_t dtype;
+};
+#pragma pack(pop)
+#endif // KERNEL_TILING_H
diff --git a/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp b/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp
new file mode 100644
index 00000000000..ffab30ebd54
--- /dev/null
+++ b/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp
@@ -0,0 +1,387 @@
+#include "kernel_operator.h"
+#include "vector_tiling.h"
+#include "kernel_tiling_types.h"
+
+using namespace AscendC;
+
+// Make compiler happy. These two function will never be called.
+__aicore__ static inline void Cast(const LocalTensor<half>& dstLocal,
+                                   const LocalTensor<half>& srcLocal, const RoundMode& round_mode,
+                                   const uint32_t calCount){};
+__aicore__ static inline void Cast(const LocalTensor<float>& dstLocal,
+                                   const LocalTensor<float>& srcLocal, const RoundMode& round_mode,
+                                   const uint32_t calCount){};
+
+/**
+ * T: input data type.
+ * C: data type for calculate.
+ * if T != C, data should cast from T to C.
+ */
+template <typename T, typename C>
+class KernelThreshold
+{
+public:
+    __aicore__ inline KernelThreshold() {}
+    __aicore__ inline void Init(ThresholdOpencvTilingData* tiling, GM_ADDR x, GM_ADDR y)
+    {
+        tilingData = tiling;
+
+        /**
+         * Calculate memory use per element.
+         * 1. InputQueue: sizeof(T) * BUFFER_NUM
+         * 2. OutputQueue: sizeof(T) * BUFFER_NUM
+         * 3. maskBuffer: 1 byte at most.
+         */
+        uint64_t bytesPerElem = sizeof(T) * BUFFER_NUM * 2 + sizeof(uint8_t) * 1;
+
+        /**
+         * If need cast, should init two more cast buffers.
+         * Memory use per element:
+         * 1. InputCastBuffer: sizeof(C)
+         * 2. OutputCastBuffer: sizeof(C)
+         */
+        if (!std::is_same<T, C>::value)
+        {
+            bytesPerElem += sizeof(C) * 2;
+        }
+
+        // Most of AscendC APIs need align to 32 Bytes, but Compare and Select need
+        // align to 256 Bytes, 256/sizeof(C) means how many element can be process
+        // in one loop.
+        vecTiling.calculate(tilingData->totalLength, GetBlockNum(), GetBlockIdx(), bytesPerElem,
+                            256 / sizeof(C));
+
+        xGM.SetGlobalBuffer((__gm__ T*)x + vecTiling.blockOffset, vecTiling.blockLength);
+        yGM.SetGlobalBuffer((__gm__ T*)y + vecTiling.blockOffset, vecTiling.blockLength);
+
+        // Cast buffer.
+        if (!std::is_same<T, C>::value)
+        {
+            pipe.InitBuffer(InputCastBuffer, vecTiling.loopLength * sizeof(C));
+            pipe.InitBuffer(outputCastBuffer, vecTiling.loopLength * sizeof(C));
+        }
+
+        pipe.InitBuffer(inputQueue, BUFFER_NUM, vecTiling.loopLength * sizeof(T));
+        pipe.InitBuffer(outputQueue, BUFFER_NUM, vecTiling.loopLength * sizeof(T));
+        pipe.InitBuffer(maskBuffer, vecTiling.loopLength * sizeof(uint8_t));
+    }
+
+    __aicore__ inline void Run()
+    {
+        for (uint32_t loop = 0; loop < vecTiling.loopCount; loop++)
+        {
+            uint32_t offset = loop * vecTiling.loopLength;
+            Compute(offset, vecTiling.loopLength);
+        }
+
+        if (vecTiling.loopTailLength != 0)
+        {
+            uint32_t offset = vecTiling.loopCount * vecTiling.loopLength;
+            Compute(offset, vecTiling.loopTailLength);
+        }
+    }
+
+private:
+    __aicore__ inline void Compute(uint32_t offset, uint32_t len)
+    {
+        CopyIn(offset, len);
+
+        // Get local Tensor, if case is need, local tensors come from
+        // cast buffer. otherwise, local tensors come from input/output queue.
+        LocalTensor<C> xLocal = CastInput(inputQueue, InputCastBuffer, len);
+        LocalTensor<C> yLocal = GetOutput(outputQueue, outputCastBuffer);
+
+        Threshold(xLocal, yLocal, len);
+
+        // Free local input tensor if tensor is not from cast buffer.
+        FreeInput(inputQueue, xLocal);
+        // Cast output tensor to output queue if output tensor is from cast buffer.
+        CastOutput(outputQueue, yLocal, len);
+
+        CopyOut(offset, len);
+    }
+
+    /**
+     * If need cast:
+     * 1. Get data from input queue, this data can't be calculate directly.
+     * 2. Get buffer with type C, which satisfied AscendC APIs.
+     * 3. Cast data from T to C.
+     *
+     * If not need cast:
+     * 1. Only need get data from queue.
+     */
+    __aicore__ inline LocalTensor<C> CastInput(TQue<QuePosition::VECIN, BUFFER_NUM>& queue,
+                                               TBuf<TPosition::VECCALC>& buffer, uint32_t len)
+    {
+        LocalTensor<C> xLocal;
+        if (std::is_same<T, C>::value)
+        {
+            xLocal = queue.DeQue<C>();
+        }
+        else
+        {
+            xLocal = buffer.Get<C>();
+            LocalTensor<T> xCast = queue.DeQue<T>();
+            Cast(xLocal, xCast, RoundMode::CAST_NONE, len);
+            queue.FreeTensor(xCast);
+        }
+        return xLocal;
+    }
+
+    /**
+     * If need cast:
+     * 1. Get local tensor from cast buffer.
+     *
+     * If not need cast:
+     * 1. Alloc local tensor from output queue.
+     */
+    __aicore__ inline LocalTensor<C> GetOutput(TQue<QuePosition::VECOUT, BUFFER_NUM>& queue,
+                                               TBuf<TPosition::VECCALC>& buffer)
+    {
+        if (std::is_same<T, C>::value)
+        {
+            return queue.AllocTensor<C>();
+        }
+        else
+        {
+            return buffer.Get<C>();
+        }
+    }
+
+    /**
+     * If need cast:
+     * 1. Input local tensor are get from cast buffer, which do not need free.
+     *
+     * If not need cast:
+     * 1. Input local tensor are alloced from input queue, which need free.
+     */
+    __aicore__ inline void FreeInput(TQue<QuePosition::VECIN, BUFFER_NUM>& queue,
+                                     LocalTensor<C>& xLocal)
+    {
+        if (std::is_same<T, C>::value)
+        {
+            queue.FreeTensor(xLocal);
+        }
+    }
+
+    /**
+     * If need cast:
+     * 1. Alloc local tensor from output queue.
+     * 2. Cast from C to T.
+     * 3. Put casted local tensor in queue.
+     *
+     * If not need cast:
+     * 1. Only put local tensor in queue.
+     *
+     */
+    __aicore__ inline void CastOutput(TQue<QuePosition::VECOUT, BUFFER_NUM>& queue,
+                                      LocalTensor<C>& yLocal, uint32_t len)
+    {
+        if (std::is_same<T, C>::value)
+        {
+            queue.EnQue(yLocal);
+        }
+        else
+        {
+            LocalTensor<T> yCast = queue.AllocTensor<T>();
+            RoundMode roundMode = RoundMode::CAST_NONE;
+            // Ref to AscendC cast API.
+            if (std::is_same<T, int16_t>::value)
+            {
+                roundMode = RoundMode::CAST_RINT;
+            }
+            else if (std::is_same<T, int32_t>::value)
+            {
+                roundMode = RoundMode::CAST_ROUND;
+            }
+            Cast(yCast, yLocal, roundMode, len);
+            queue.EnQue(yCast);
+        }
+    }
+
+    __aicore__ inline void CopyIn(uint32_t offset, uint32_t len)
+    {
+        LocalTensor<T> xLocal = inputQueue.AllocTensor<T>();
+        DataCopy(xLocal, xGM[offset], len);
+        inputQueue.EnQue(xLocal);
+    }
+
+    __aicore__ inline void CopyOut(uint32_t offset, uint32_t len)
+    {
+        LocalTensor<T> yLocal = outputQueue.DeQue<T>();
+        DataCopy(yGM[offset], yLocal, len);
+        outputQueue.FreeTensor(yLocal);
+    }
+
+    /**
+     * AscendC API Compare Warpper.
+     * AscendC Compare level2 API need input length align to 256, process
+     * tail data by level0 API.
+     */
+    __aicore__ inline void CompareWrap(const LocalTensor<uint8_t>& dstLocal,
+                                       const LocalTensor<C>& src0Local,
+                                       const LocalTensor<C>& src1Local, CMPMODE cmpMode,
+                                       uint32_t calCount)
+    {
+        // Elements total count for on loop inside Compare.
+        uint32_t batchCount = 256 / sizeof(C);
+
+        // Tail elements count.
+        uint32_t tailCount = calCount % batchCount;
+
+        // Level2 API, calCount should align to 256.
+        Compare(dstLocal, src0Local, src1Local, cmpMode, calCount - tailCount);
+
+        // Data blocks are already cut align to 256, tail count will be 0 for
+        // all process loops except last one.
+        if (tailCount != 0)
+        {
+            BinaryRepeatParams repeatParams = {1, 1, 1, 8, 8, 8};
+            uint32_t tailIdx = calCount - tailCount;
+            uint32_t maskIdx = tailIdx / sizeof(uint8_t);
+            Compare(dstLocal[maskIdx], src0Local[tailIdx], src1Local[tailIdx], cmpMode, tailCount,
+                    1, repeatParams);
+        }
+    }
+
+    /**
+     * AscendC API Select Warpper.
+     * AscendC Select level2 API need input length align to 256, process
+     * tail data by level0 API.
+     */
+    __aicore__ inline void SelectWrap(const LocalTensor<C>& dstLocal,
+                                      const LocalTensor<uint8_t>& selMask,
+                                      const LocalTensor<C>& src0Local, C src1Local, SELMODE selMode,
+                                      uint32_t calCount)
+    {
+        uint32_t batchCount = 256 / sizeof(C);
+        uint32_t tailCount = calCount % batchCount;
+
+        Select(dstLocal, selMask, src0Local, src1Local, selMode, calCount - tailCount);
+        if (tailCount != 0)
+        {
+            BinaryRepeatParams repeatParams = {1, 1, 1, 8, 8, 8};
+            uint32_t tailIdx = calCount - tailCount;
+            uint32_t maskIdx = tailIdx / sizeof(uint8_t);
+            Select(dstLocal[tailIdx], selMask[maskIdx], src0Local[tailIdx], src1Local, selMode,
+                   tailCount, 1, repeatParams);
+        }
+    }
+
+    __aicore__ inline void Threshold(LocalTensor<C>& xLocal, LocalTensor<C>& yLocal, uint32_t len)
+    {
+        LocalTensor<uint8_t> mask = maskBuffer.Get<uint8_t>();
+        Duplicate(yLocal, static_cast<C>(tilingData->thresh), len);
+        switch (tilingData->threshType)
+        {
+            case 0:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::LE, len);
+                Duplicate(yLocal, static_cast<C>(0), len);
+                SelectWrap(yLocal, mask, yLocal, static_cast<C>(tilingData->maxVal),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            case 1:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::GT, len);
+                Duplicate(yLocal, static_cast<C>(0), len);
+                SelectWrap(yLocal, mask, yLocal, static_cast<C>(tilingData->maxVal),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            case 2:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::LE, len);
+                SelectWrap(yLocal, mask, xLocal, static_cast<C>(tilingData->thresh),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            case 3:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::GT, len);
+                SelectWrap(yLocal, mask, xLocal, static_cast<C>(0),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            case 4:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::LE, len);
+                SelectWrap(yLocal, mask, xLocal, static_cast<C>(0),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            default:
+                break;
+        }
+    }
+
+    TPipe pipe;
+    TQue<QuePosition::VECIN, BUFFER_NUM> inputQueue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> outputQueue;
+    TBuf<TPosition::VECCALC> InputCastBuffer, outputCastBuffer, maskBuffer;
+
+    GlobalTensor<T> xGM, yGM;
+    VectorTiling vecTiling;
+    ThresholdOpencvTilingData* tilingData;
+};
+
+#define LAUNCH_THRESHOLD_KERNEL(NAME, T, C)                                                      \
+    __aicore__ inline void launch_threshold_kernel_##NAME(ThresholdOpencvTilingData* tilingData, \
+                                                          GM_ADDR x, GM_ADDR y)                  \
+    {                                                                                            \
+        KernelThreshold<T, C> op;                                                                \
+        op.Init(tilingData, x, y);                                                               \
+        op.Run();                                                                                \
+    }
+
+LAUNCH_THRESHOLD_KERNEL(CV_8U, uint8_t, half)   // CV_8U
+LAUNCH_THRESHOLD_KERNEL(CV_8S, int8_t, half)    // CV_8S
+                                                // CV_16U
+LAUNCH_THRESHOLD_KERNEL(CV_16S, int16_t, half)  // CV_16S
+LAUNCH_THRESHOLD_KERNEL(CV_32S, int32_t, float) // CV_32S
+LAUNCH_THRESHOLD_KERNEL(CV_32F, float, float)   // CV_32F
+                                                // CV_64F
+LAUNCH_THRESHOLD_KERNEL(CV_16F, half, half)     // CV_16F
+
+#undef LAUNCH_THRESHOLD_KERNEL
+
+#define CALL_THRESHOLD_KERNEL(NAME) launch_threshold_kernel_##NAME
+
+extern "C" __global__ __aicore__ void threshold_opencv(GM_ADDR tilingGM, GM_ADDR x, GM_ADDR y)
+{
+    ThresholdOpencvTilingData tilingData;
+    auto tempTilingGM = (__gm__ uint8_t*)tilingGM;
+    auto tempTiling = (uint8_t*)&tilingData;
+    for (int32_t i = 0; i < sizeof(ThresholdOpencvTilingData) / sizeof(uint8_t);
+         ++i, ++tempTilingGM, ++tempTiling)
+    {
+        *tempTiling = *tempTilingGM;
+    }
+
+    // AscendC can only call inline functions, function pointer can't be used here.
+    // Use Macro and switch case instead.
+    switch (tilingData.dtype)
+    {
+        case 0:
+            CALL_THRESHOLD_KERNEL(CV_8U)(&tilingData, x, y);
+            break;
+        case 1:
+            CALL_THRESHOLD_KERNEL(CV_8S)(&tilingData, x, y);
+            break;
+        case 3:
+            CALL_THRESHOLD_KERNEL(CV_16S)(&tilingData, x, y);
+            break;
+        case 4:
+            CALL_THRESHOLD_KERNEL(CV_32S)(&tilingData, x, y);
+            break;
+        case 5:
+            CALL_THRESHOLD_KERNEL(CV_32F)(&tilingData, x, y);
+            break;
+        case 7:
+            CALL_THRESHOLD_KERNEL(CV_16F)(&tilingData, x, y);
+            break;
+        case 2: case 6: default: // CV_16U, CV_64F
+            break;
+    }
+    // Clear tiling GM cache manually. (cce compiler bug)
+    dcci(tilingGM, 1);
+}
+
+#ifndef __CCE_KT_TEST__
+void threshold_opencv_kernel(uint32_t blockDim, void* l2ctrl, void* stream, uint8_t* tiling,
+                             uint8_t* x, uint8_t* y)
+{
+    threshold_opencv<<<blockDim, l2ctrl, stream>>>(tiling, x, y);
+}
+#endif
diff --git a/modules/cannops/ascendc_kernels/vector_tiling.h b/modules/cannops/ascendc_kernels/vector_tiling.h
new file mode 100644
index 00000000000..e00dd423c38
--- /dev/null
+++ b/modules/cannops/ascendc_kernels/vector_tiling.h
@@ -0,0 +1,77 @@
+#ifndef TILING_KERNEL_H
+#define TILING_KERNEL_H
+
+#ifdef __CCE_KT_TEST__
+#define __aicore__
+#else
+#define __aicore__ [aicore]
+#endif
+
+inline __aicore__ int32_t AlignNCeil(int32_t n, int32_t align) { return ((n + align) & ~(align-1)); }
+
+inline __aicore__ int32_t AlignNFloor(int32_t n, int32_t align) { return (n & ~(align-1)); }
+
+constexpr int32_t BUFFER_NUM = 2;
+constexpr int32_t UB_BUF_LEN = 248 * 1024;
+
+struct VectorTiling {
+  __aicore__ inline void calculate(uint64_t _totalLength, uint64_t _blockNum,
+                                   uint64_t _blockIdx, uint64_t _variableBytesPerElem, uint32_t _align) {
+    totalLength = _totalLength;
+    blockNum = _blockNum;
+    blockIdx = _blockIdx;
+    variableBytesPerElem = _variableBytesPerElem;
+    blockLength = 0;
+    blockOffset = 0;
+    align = _align;
+    GetBlockLengthAndOffset();
+    GetLoopLengthAndCount();
+#ifdef __CCE_KT_TEST__
+    std::cout << "Block(" << blockIdx << "): BlockLength = " << blockLength
+              << ", BlockOffset = " << blockOffset
+              << ", LoopLength = " << loopLength
+              << ", LoopCount = " << loopCount
+              << ", LoopTailLength = " << loopTailLength << std::endl;
+#endif
+  }
+
+  __aicore__ inline void GetBlockLengthAndOffset() {
+    // Data should Align by 32B.
+    uint32_t fullBlockLength = AlignNCeil(totalLength / blockNum, 32);
+    // Some core may get no data after Align32 Ceil.
+    uint32_t fullBlockNum = totalLength / fullBlockLength;
+    uint32_t blockTailLength = totalLength % fullBlockLength;
+
+    if (blockIdx < fullBlockNum) {
+      blockLength = fullBlockLength;
+      blockOffset = blockIdx * blockLength;
+      // Last block must less than full block num.
+    } else if (blockTailLength != 0 && blockIdx == fullBlockNum) {
+      blockLength = blockTailLength;
+      blockOffset = blockIdx * fullBlockLength;
+    }
+  }
+
+  /**
+   * @brief Get length for one loop and loop count.
+   * Use as much UB buf as possible.
+   */
+  __aicore__ inline void GetLoopLengthAndCount() {
+    loopLength = AlignNFloor(UB_BUF_LEN / variableBytesPerElem, align);
+    loopCount = blockLength / loopLength;
+    loopTailLength = blockLength - (loopLength * loopCount);
+  }
+
+  uint64_t totalLength;
+  uint64_t blockNum;
+  uint64_t blockIdx;
+  uint64_t variableBytesPerElem;
+  uint32_t blockLength;
+  uint32_t blockOffset;
+  uint32_t loopLength;
+  uint32_t loopCount;
+  uint32_t loopTailLength;
+  uint32_t align;
+};
+
+#endif  // TILING_KERNEL_H
diff --git a/modules/cannops/include/opencv2/ascendc_kernels.hpp b/modules/cannops/include/opencv2/ascendc_kernels.hpp
new file mode 100644
index 00000000000..714b6460fd7
--- /dev/null
+++ b/modules/cannops/include/opencv2/ascendc_kernels.hpp
@@ -0,0 +1,7 @@
+#ifndef ASCENDC_KERNELS_H
+#define KERNEL_TILINASCENDC_KERNELS_HG_H
+
+#include "../../ascendc_kernels/kernel_tiling_types.h"
+#include "aclrtlaunch_threshold_opencv.h"
+
+#endif //ASCENDC_KERNELS_H
diff --git a/modules/cannops/include/opencv2/cann_call.hpp b/modules/cannops/include/opencv2/cann_call.hpp
index 651bff8bba0..e21f339db96 100644
--- a/modules/cannops/include/opencv2/cann_call.hpp
+++ b/modules/cannops/include/opencv2/cann_call.hpp
@@ -9,7 +9,9 @@
 #include <set>
 #include <string>
 #include <acl/acl_base.h>
-#include "opencv2/cann.hpp"
+#include "cann.hpp"
+#include "stream_accessor.hpp"
+#include "ascendc_kernels.hpp"
 
 class aclopAttr;
 
@@ -17,6 +19,15 @@ namespace cv
 {
 namespace cann
 {
+CV_EXPORTS void checkAclError(aclError err, const char* file, const int line, const char* func);
+void checkAclPtr(void* ptr, const char* file, const int line, const char* func);
+#define CV_ACL_SAFE_CALL(expr) checkAclError((expr), __FILE__, __LINE__, CV_Func)
+#define CV_ACL_SAFE_CALL_PTR(expr)                     \
+    ({                                                 \
+        auto ptr = (expr);                             \
+        checkAclPtr(ptr, __FILE__, __LINE__, CV_Func); \
+        ptr;                                           \
+    })
 // Warpper for functions in CANN, callers should not call CANN's api directly, but should call the
 // function provided in cann_call.
 void aclrtMallocWarpper(void** data, size_t size);
@@ -39,7 +50,7 @@ void aclrtMemsetWarpper(std::shared_ptr<uchar>& ptr, int32_t value, size_t count
 //! Type mapping between opencv and cann.
 aclDataType getACLType(int opencvdepth);
 //! Malloc and upload raw data to devices.
-std::shared_ptr<uchar> mallocAndUpload(const void* data, size_t size, AscendStream& stream,
+CV_EXPORTS std::shared_ptr<uchar> mallocAndUpload(const void* data, size_t size, AscendStream& stream,
                                        AscendMat::Allocator* allocator);
 /**
  * @brief Warpper of CANN streams.
@@ -151,6 +162,19 @@ class OperatorRunner
     OperatorRunner& run(AscendStream& stream);
 };
 
+template <typename KERNEL_TYPE, typename TILING_TYPE, typename... ARGS>
+void kernel_launch(KERNEL_TYPE kernel, AscendStream& stream, TILING_TYPE& tiling, ARGS... args)
+{
+    std::shared_ptr<uchar> tilingDevice =
+        mallocAndUpload(&tiling, sizeof(TILING_TYPE), stream, AscendMat::defaultAllocator());
+    aclrtStream rawStream = AscendStreamAccessor::getStream(stream);
+    CV_ACL_SAFE_CALL(kernel(1, rawStream, tilingDevice.get(), args...));
+    if (rawStream == nullptr)
+    {
+        stream.waitForCompletion();
+    }
+}
+
 } // namespace cann
 } // namespace cv
 
diff --git a/modules/cannops/perf/perf_element_operations.cpp b/modules/cannops/perf/perf_element_operations.cpp
index 0612abe6085..4527346e190 100644
--- a/modules/cannops/perf/perf_element_operations.cpp
+++ b/modules/cannops/perf/perf_element_operations.cpp
@@ -207,5 +207,25 @@ PERF_TEST_P(CPU, MAT_BITWISE_NOT_MAT, testing::Combine(TYPICAL_ASCEND_MAT_SIZES,
     SANITY_CHECK_NOTHING();
 }
 
+PERF_TEST_P(NPU, THRESHOLD_ASCENDC, testing::Combine(TYPICAL_ASCEND_MAT_SIZES,  Values(CV_8U, CV_16S, CV_32F)))
+{
+    Mat mat(GET_PARAM(0), GET_PARAM(1));
+    AscendMat dst;
+    AscendMat src;
+    src.upload(mat);
+    declare.in(mat, WARMUP_RNG);
+    TEST_CYCLE_N(10) { cv::cann::threshold(src, dst, 100.0, 255.0, cv::THRESH_BINARY); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(CPU, THRESHOLD, testing::Combine(TYPICAL_ASCEND_MAT_SIZES, Values(CV_8U, CV_16S, CV_32F)))
+{
+    Mat mat(GET_PARAM(0), GET_PARAM(1));
+    Mat dst;
+    declare.in(mat, WARMUP_RNG);
+    TEST_CYCLE_N(10) { cv::threshold(mat, dst, 100.0, 255.0, cv::THRESH_BINARY); }
+    SANITY_CHECK_NOTHING();
+}
+
 } // namespace
 } // namespace opencv_test
diff --git a/modules/cannops/src/ascend_mat.cpp b/modules/cannops/src/ascend_mat.cpp
index ba17a545bb7..dde838c8d37 100644
--- a/modules/cannops/src/ascend_mat.cpp
+++ b/modules/cannops/src/ascend_mat.cpp
@@ -23,7 +23,11 @@ std::shared_ptr<uchar> DefaultAllocator::allocate(size_t size)
 
 bool DefaultAllocator::allocate(cv::cann::AscendMat* mat, int rows, int cols, size_t elemSize)
 {
-    mat->data = allocate(elemSize * cols * rows);
+    size_t totalBytes = elemSize * cols * rows;
+
+    // align by 32B.
+    totalBytes = ((totalBytes + 32) & ~31);
+    mat->data = allocate(totalBytes);
     mat->step = cols * elemSize;
 
     return true;
diff --git a/modules/cannops/src/cann_call.cpp b/modules/cannops/src/cann_call.cpp
index 3b83052ccbe..97d49d66fd1 100644
--- a/modules/cannops/src/cann_call.cpp
+++ b/modules/cannops/src/cann_call.cpp
@@ -11,7 +11,7 @@ namespace cv
 namespace cann
 {
 /*******************************Acl Error Checker*****************************/
-static inline void checkAclError(aclError err, const char* file, const int line, const char* func)
+void checkAclError(aclError err, const char* file, const int line, const char* func)
 {
     if (ACL_SUCCESS != err)
     {
@@ -20,7 +20,7 @@ static inline void checkAclError(aclError err, const char* file, const int line,
     }
 }
 
-static inline void checkAclPtr(void* ptr, const char* file, const int line, const char* func)
+void checkAclPtr(void* ptr, const char* file, const int line, const char* func)
 {
     if (nullptr == ptr)
     {
@@ -29,14 +29,6 @@ static inline void checkAclPtr(void* ptr, const char* file, const int line, cons
     }
 }
 
-#define CV_ACL_SAFE_CALL(expr) checkAclError((expr), __FILE__, __LINE__, CV_Func)
-#define CV_ACL_SAFE_CALL_PTR(expr)                     \
-    ({                                                 \
-        auto ptr = (expr);                             \
-        checkAclPtr(ptr, __FILE__, __LINE__, CV_Func); \
-        ptr;                                           \
-    })
-
 /******************************Acl Runtime Warpper****************************/
 void aclrtMallocWarpper(void** data, size_t size)
 {
diff --git a/modules/cannops/src/element_operations.cpp b/modules/cannops/src/element_operations.cpp
index 402658369b5..48d9edb596b 100644
--- a/modules/cannops/src/element_operations.cpp
+++ b/modules/cannops/src/element_operations.cpp
@@ -3,6 +3,7 @@
 // of this distribution and at http://opencv.org/license.html.
 
 #include "precomp.hpp"
+
 namespace cv
 {
 namespace cann
@@ -110,8 +111,8 @@ static void convert(const Scalar& src, Scalar& dst, AscendStream& stream)
 }
 
 template <typename T1, typename T2>
-static void arithm_op(const T1& src1, const T2& src2, AscendMat& dst, const AscendMat& mask, float scale,
-                      int dtype, const char* op, AscendStream& stream)
+static void arithm_op(const T1& src1, const T2& src2, AscendMat& dst, const AscendMat& mask,
+                      float scale, int dtype, const char* op, AscendStream& stream)
 {
     T1 castedSrc1;
     T2 castedSrc2;
@@ -170,8 +171,9 @@ static void arithm_op(const T1& src1, const T2& src2, AscendMat& dst, const Asce
     }
 }
 
-static void arithm_op(const InputArray _src1, const InputArray _src2, OutputArray _dst, const InputArray _mask,
-                      float scale, int dtype, const char* op, AscendStream& stream)
+static void arithm_op(const InputArray _src1, const InputArray _src2, OutputArray _dst,
+                      const InputArray _mask, float scale, int dtype, const char* op,
+                      AscendStream& stream)
 {
     const bool isScalar1 = (_src1.kind() == _InputArray::MATX);
     const bool isScalar2 = (_src2.kind() == _InputArray::MATX);
@@ -213,56 +215,54 @@ static void arithm_op(const InputArray _src1, const InputArray _src2, OutputArra
 }
 
 // In order to supply more interfaces, differnet function declaration shoule be done.
-void add(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask, int dtype,
-         AscendStream& stream)
+void add(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
+         int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream);
 }
 
-void add(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-         AscendStream& stream)
+void add(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
+         int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream);
 }
 
-void add(const AscendMat& src1, const Scalar& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-         AscendStream& stream)
+void add(const AscendMat& src1, const Scalar& src2, AscendMat& dst, const AscendMat& mask,
+         int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream);
 }
 
-void add(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-         AscendStream& stream)
+void add(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
+         int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream);
 }
 
-
-void subtract(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask, int dtype,
-              AscendStream& stream)
+void subtract(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
+              int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream);
 }
 
-void subtract(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-              AscendStream& stream)
+void subtract(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
+              int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream);
 }
 
-void subtract(const AscendMat& src1, const Scalar& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-              AscendStream& stream)
+void subtract(const AscendMat& src1, const Scalar& src2, AscendMat& dst, const AscendMat& mask,
+              int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream);
 }
 
-void subtract(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-              AscendStream& stream)
+void subtract(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
+              int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream);
 }
 
-
 void multiply(const InputArray src1, const InputArray src2, OutputArray dst, float scale, int dtype,
               AscendStream& stream)
 {
@@ -287,7 +287,6 @@ void multiply(const Scalar& src1, const AscendMat& src2, AscendMat& dst, float s
     arithm_op(src1, src2, dst, AscendMat(), scale, dtype, "Mul", stream);
 }
 
-
 void divide(const InputArray src1, const InputArray src2, OutputArray dst, float scale, int dtype,
             AscendStream& stream)
 {
@@ -312,15 +311,14 @@ void divide(const Scalar& src1, const AscendMat& src2, AscendMat& dst, float sca
     arithm_op(src1, src2, dst, AscendMat(), scale, dtype, "RealDiv", stream);
 }
 
-
-void bitwise_and(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
-                 AscendStream& stream)
+void bitwise_and(const InputArray src1, const InputArray src2, OutputArray dst,
+                 const InputArray mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream);
 }
 
-void bitwise_and(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
-                 AscendStream& stream)
+void bitwise_and(const AscendMat& src1, const AscendMat& src2, AscendMat& dst,
+                 const AscendMat& mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream);
 }
@@ -337,9 +335,8 @@ void bitwise_and(const Scalar& src1, const AscendMat& src2, AscendMat& dst, cons
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream);
 }
 
-
-void bitwise_or(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
-                AscendStream& stream)
+void bitwise_or(const InputArray src1, const InputArray src2, OutputArray dst,
+                const InputArray mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseOr", stream);
 }
@@ -362,15 +359,14 @@ void bitwise_or(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseOr", stream);
 }
 
-
-void bitwise_xor(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
-                 AscendStream& stream)
+void bitwise_xor(const InputArray src1, const InputArray src2, OutputArray dst,
+                 const InputArray mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream);
 }
 
-void bitwise_xor(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
-                 AscendStream& stream)
+void bitwise_xor(const AscendMat& src1, const AscendMat& src2, AscendMat& dst,
+                 const AscendMat& mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream);
 }
@@ -387,7 +383,6 @@ void bitwise_xor(const Scalar& src1, const AscendMat& src2, AscendMat& dst, cons
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream);
 }
 
-
 void bitwise_not(const InputArray src, OutputArray dst, const InputArray mask, AscendStream& stream)
 {
     arithm_op(src, noArray(), dst, mask, 1, -1, "Invert", stream);
@@ -398,9 +393,8 @@ void bitwise_not(const AscendMat& src, AscendMat& dst, const AscendMat& mask, As
     arithm_op(src, AscendMat(), dst, mask, 1, -1, "Invert", stream);
 }
 
-
-void addWeighted(const AscendMat& src1, double alpha, const AscendMat& src2, double beta, double gamma,
-                 AscendMat& dst, int dtype, AscendStream& stream)
+void addWeighted(const AscendMat& src1, double alpha, const AscendMat& src2, double beta,
+                 double gamma, AscendMat& dst, int dtype, AscendStream& stream)
 {
     if (dtype < 0)
         dtype = src1.depth();
@@ -421,8 +415,8 @@ void addWeighted(const AscendMat& src1, double alpha, const AscendMat& src2, dou
     arithm_op(srcWeightedSumRet, (float)gamma, dst, "Adds", stream);
 }
 
-void addWeighted(const InputArray _src1, double alpha, const InputArray _src2, double beta, double gamma,
-                 OutputArray _dst, int dtype, AscendStream& stream)
+void addWeighted(const InputArray _src1, double alpha, const InputArray _src2, double beta,
+                 double gamma, OutputArray _dst, int dtype, AscendStream& stream)
 {
     AscendMat src1, src2, dst;
     src1.upload(_src1, stream);
@@ -442,45 +436,23 @@ double threshold(const AscendMat& src, AscendMat& dst, double thresh, double max
 
     dst.create(src.rows, src.cols, src.type());
 
-    OperatorRunner runner;
-    runner.setOp("Threshold")
-        .addInput(src, "x")
-        .addOutput(threshMat, "y")
-        .addAttr((float)thresh, "threshold")
-        .run(stream);
-
-    // THRESH_*_INV, THRESH_TRUNC need a inverse threshMat.
-    // THRESH_BINARY_INV = 1, THRESH_TRUNC = 2, THRESH_TOZERO_INV = 4,
-    if (type == 1 || type == 2 || type == 4)
+    if (src.depth() == CV_8U || src.depth() == CV_8S || src.depth() == CV_16S ||
+        src.depth() == CV_32S || src.depth() == CV_32F || src.depth() == CV_16F)
     {
-        AscendMat threshInvMat(src.size(), src.type());
-        AscendMat ones(src.size(), src.type());
-        Scalar s(1, 1, 1, 1);
-        ones.setTo(s, stream);
-        arithm_op(ones, threshMat, threshInvMat, "Sub", stream);
-
-        if (type == 1)
-            arithm_op(threshInvMat, (float)maxval, dst, "Muls", stream);
-        else if (type == 2)
-        {
-            AscendMat ToZeroInvMat(src.size(), src.type());
-            AscendMat TruncMat(src.size(), src.type());
-            arithm_op(threshInvMat, src, ToZeroInvMat, "Mul", stream);
-            arithm_op(threshMat, (float)thresh, TruncMat, "Muls", stream);
-            arithm_op(ToZeroInvMat, TruncMat, dst, "Add", stream);
-        }
-        else
-            arithm_op(threshInvMat, src, dst, "Mul", stream);
+        ThresholdOpencvTilingData tiling;
+        tiling.maxVal = maxval;
+        tiling.thresh = thresh;
+        // AscendMat memory will be align to 32B, it's safe to set totalLengh a little bigger.
+        size_t totalBytes = src.rows * src.cols * src.channels();
+        tiling.totalLength = ((totalBytes + 32) & ~31);
+        tiling.threshType = type;
+        tiling.dtype = src.depth();
+
+        kernel_launch(aclrtlaunch_threshold_opencv, stream, tiling, src.data.get(), dst.data.get());
     }
     else
-    {
-        if (type == 0) /* THRESH_BINARY = 0 */
-            arithm_op(threshMat, (float)maxval, dst, "Muls", stream);
-        else if (type == 3) /* THRESH_TOZERO = 3 */
-            arithm_op(threshMat, src, dst, "Mul", stream);
-        else
-            CV_Error(Error::StsError, "Unknown/unsupported threshold type");
-    }
+        CV_Error(Error::StsUnsupportedFormat, "");
+
     return thresh;
 }
 
diff --git a/modules/cannops/src/precomp.hpp b/modules/cannops/src/precomp.hpp
index 8411cc40407..53ed398fde3 100644
--- a/modules/cannops/src/precomp.hpp
+++ b/modules/cannops/src/precomp.hpp
@@ -10,5 +10,6 @@
 #include "opencv2/cann_call.hpp"
 #include "opencv2/cann_interface.hpp"
 #include "opencv2/cann_private.hpp"
+#include "opencv2/ascendc_kernels.hpp"
 
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/cannops/test/test_element_operations.cpp b/modules/cannops/test/test_element_operations.cpp
index 76c103a65f4..730d2912d95 100644
--- a/modules/cannops/test/test_element_operations.cpp
+++ b/modules/cannops/test/test_element_operations.cpp
@@ -678,7 +678,6 @@ TEST(ELEMENTWISE_OP, MAT_THRESHOLD)
     for (int i = 0; i <= 4; i++)
     {
         cv::threshold(cpuMat, cpuOpRet, 128, 250, i);
-        // TODO find the reason empty AscendMat is not continuous.
         cv::cann::threshold(ascendMat16F, aclOpRet, 128, 250, i);
         aclOpRet.convertTo(aclOpRet16S, CV_16S);
         aclOpRet16S.download(checker);
@@ -693,5 +692,37 @@ TEST(ELEMENTWISE_OP, MAT_THRESHOLD)
     cv::cann::resetDevice();
 }
 
+TEST(ELEMENTWISE_OP, MAT_THRESHOLD_ASCENDC)
+{
+    cv::cann::setDevice(DEVICE_ID);
+    Mat cpuRet, npuRet;
+    AscendMat npuImg, npuTmpMat;
+
+    // opencv do not support CV_8S, CV_32S, CV_16F
+    // ascend do not support CV_16U, CV_64F
+    uint8_t dtypes[] = {CV_8U, CV_16S, CV_32F};
+
+    for (uint i = 0; i <= 4; i++)
+    {
+        for (uint j = 0; j < sizeof(dtypes) / sizeof(dtypes[0]); j++)
+        {
+            double thresh = 90.5;
+            double maxVal = 85.2;
+
+            Mat img = randomMat(10, 10, CV_MAKETYPE(dtypes[j], 3), 0.0f, 128.0f);
+            npuImg.upload(img);
+            npuTmpMat.create(npuImg.rows, npuImg.cols, npuImg.type());
+
+            cv::threshold(img, cpuRet, thresh, maxVal, i);
+            cv::cann::threshold(npuImg, npuTmpMat, thresh, maxVal, i);
+
+            npuTmpMat.download(npuRet);
+            EXPECT_MAT_NEAR(cpuRet, npuRet, 10.0f);
+        }
+    }
+
+    cv::cann::resetDevice();
+}
+
 } // namespace
 } // namespace opencv_test
diff --git a/modules/cannops/test/test_kernel.cpp b/modules/cannops/test/test_kernel.cpp
new file mode 100644
index 00000000000..ac0996a27b6
--- /dev/null
+++ b/modules/cannops/test/test_kernel.cpp
@@ -0,0 +1,51 @@
+#include "test_precomp.hpp"
+#include "opencv2/cann_call.hpp"
+
+namespace opencv_test
+{
+namespace
+{
+
+TEST(ASCENDC_KERNEL, THRESHOLD)
+{
+    cv::cann::setDevice(DEVICE_ID);
+    Mat cpuRet, npuRet;
+    AscendMat npuImg, npuTmpMat;
+
+    // opencv do not support CV_8S, CV_32S, CV_16F
+    // ascend do not support CV_16U, CV_64F
+    uint8_t dtypes[] = {CV_8U, CV_16S, CV_32F};
+
+    for (uint i = 0; i <= 4; i++)
+    {
+        for (uint j = 0; j < sizeof(dtypes) / sizeof(dtypes[0]); j++)
+        {
+            double thresh = 90.5;
+            double maxVal = 85.2;
+
+            Mat img = randomMat(10, 10, CV_MAKETYPE(dtypes[j], 3), 0.0f, 128.0f);
+            npuImg.upload(img);
+            npuTmpMat.create(npuImg.rows, npuImg.cols, npuImg.type());
+
+            cv::threshold(img, cpuRet, thresh, maxVal, i);
+            ThresholdOpencvTilingData tiling;
+            tiling.maxVal = maxVal;
+            tiling.thresh = thresh;
+            size_t totalBytes = img.rows * img.cols * img.channels();
+            // AscendMat memory will be align to 32B, it's safe to set totalLengh a little bigger.
+            tiling.totalLength = ((totalBytes + 32) & ~31);
+            tiling.threshType = i;
+            tiling.dtype = dtypes[j];
+            kernel_launch(aclrtlaunch_threshold_opencv, AscendStream::Null(), tiling,
+                          npuImg.data.get(), npuTmpMat.data.get());
+
+            npuTmpMat.download(npuRet);
+            EXPECT_MAT_NEAR(cpuRet, npuRet, 10.0f);
+        }
+    }
+
+    cv::cann::resetDevice();
+}
+
+} // namespace
+} // namespace opencv_test
diff --git a/modules/cannops/test/test_precomp.hpp b/modules/cannops/test/test_precomp.hpp
index f7bdbea0b08..74cfcb11ee9 100644
--- a/modules/cannops/test/test_precomp.hpp
+++ b/modules/cannops/test/test_precomp.hpp
@@ -9,6 +9,7 @@
 #include "opencv2/cann.hpp"
 #include "opencv2/ts/cuda_test.hpp"
 #include "opencv2/cann_interface.hpp"
+#include "opencv2/ascendc_kernels.hpp"
 
 using namespace cv;
 using namespace cv::cann;

From 09ed18d2802d892755ff168d9b6677c462ae4ec2 Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Wed, 21 Feb 2024 10:33:09 +0800
Subject: [PATCH 02/19] Remove redundant code

1. remove threshold_opencv_kernel
2. typo ASCENDC_KERNELS_H
3. add ALIGN_UP macro
---
 .../cannops/ascendc_kernels/threshold_opencv_kernel.cpp   | 8 --------
 modules/cannops/include/opencv2/ascendc_kernels.hpp       | 2 +-
 modules/cannops/src/element_operations.cpp                | 2 +-
 modules/cannops/src/precomp.hpp                           | 1 +
 4 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp b/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp
index ffab30ebd54..7fa1867c8b1 100644
--- a/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp
+++ b/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp
@@ -377,11 +377,3 @@ extern "C" __global__ __aicore__ void threshold_opencv(GM_ADDR tilingGM, GM_ADDR
     // Clear tiling GM cache manually. (cce compiler bug)
     dcci(tilingGM, 1);
 }
-
-#ifndef __CCE_KT_TEST__
-void threshold_opencv_kernel(uint32_t blockDim, void* l2ctrl, void* stream, uint8_t* tiling,
-                             uint8_t* x, uint8_t* y)
-{
-    threshold_opencv<<<blockDim, l2ctrl, stream>>>(tiling, x, y);
-}
-#endif
diff --git a/modules/cannops/include/opencv2/ascendc_kernels.hpp b/modules/cannops/include/opencv2/ascendc_kernels.hpp
index 714b6460fd7..b030920a62f 100644
--- a/modules/cannops/include/opencv2/ascendc_kernels.hpp
+++ b/modules/cannops/include/opencv2/ascendc_kernels.hpp
@@ -1,5 +1,5 @@
 #ifndef ASCENDC_KERNELS_H
-#define KERNEL_TILINASCENDC_KERNELS_HG_H
+#define ASCENDC_KERNELS_H
 
 #include "../../ascendc_kernels/kernel_tiling_types.h"
 #include "aclrtlaunch_threshold_opencv.h"
diff --git a/modules/cannops/src/element_operations.cpp b/modules/cannops/src/element_operations.cpp
index 48d9edb596b..cacf6e6cff1 100644
--- a/modules/cannops/src/element_operations.cpp
+++ b/modules/cannops/src/element_operations.cpp
@@ -444,7 +444,7 @@ double threshold(const AscendMat& src, AscendMat& dst, double thresh, double max
         tiling.thresh = thresh;
         // AscendMat memory will be align to 32B, it's safe to set totalLengh a little bigger.
         size_t totalBytes = src.rows * src.cols * src.channels();
-        tiling.totalLength = ((totalBytes + 32) & ~31);
+        tiling.totalLength = ALIGN_UP(totalBytes, 32);
         tiling.threshType = type;
         tiling.dtype = src.depth();
 
diff --git a/modules/cannops/src/precomp.hpp b/modules/cannops/src/precomp.hpp
index 53ed398fde3..8aadaf4d8de 100644
--- a/modules/cannops/src/precomp.hpp
+++ b/modules/cannops/src/precomp.hpp
@@ -11,5 +11,6 @@
 #include "opencv2/cann_interface.hpp"
 #include "opencv2/cann_private.hpp"
 #include "opencv2/ascendc_kernels.hpp"
+#define ALIGN_UP(num, align) (((num) + (align) - 1) & ~((align) - 1))
 
 #endif /* __OPENCV_PRECOMP_H__ */

From 1ad3ad5f856aac752403bf7725e75b491aa4d975 Mon Sep 17 00:00:00 2001
From: Dhanwanth1803 <147172285+Dhanwanth1803@users.noreply.github.com>
Date: Tue, 5 Mar 2024 15:17:19 +0530
Subject: [PATCH 03/19] Merge pull request #3646 from Dhanwanth1803:latch

Fixes #25081:Latch input corruption fix #3646

Fixes https://github.com/opencv/opencv/issues/25081

As mentioned by WennPaper making it `grayImage = image.clone();` will make a deep copy. It prevents the `InputArray` from being modified.

- [X] I agree to contribute to the project under Apache 2 License.
- [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [X] The PR is proposed to the proper branch
- [X] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake

---
Co-authored-by: Dhanwanth1803 <dhanwanthvarala@gmail,com>
Co-authored-by: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
---
 modules/xfeatures2d/src/latch.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/xfeatures2d/src/latch.cpp b/modules/xfeatures2d/src/latch.cpp
index da5041fa0ae..49cd8f58947 100644
--- a/modules/xfeatures2d/src/latch.cpp
+++ b/modules/xfeatures2d/src/latch.cpp
@@ -519,7 +519,7 @@ namespace cv
             switch (image.type())
             {
             case CV_8UC1:
-                grayImage = image;
+                grayImage = sigma_ ? image.clone() : image;
                 break;
             case CV_8UC3:
                 cvtColor(image, grayImage, COLOR_BGR2GRAY);

From 1aaf6e1c8ba7472741adb0f90b6d11d2aeeeae1d Mon Sep 17 00:00:00 2001
From: Maksim Shabunin <maksim.shabunin@gmail.com>
Date: Tue, 5 Mar 2024 16:20:40 +0300
Subject: [PATCH 04/19] Merge pull request #3638 from mshabunin:doc-upgrade

Documentation transition to fresh Doxygen #3638

Merge with https://github.com/opencv/opencv/pull/25042
---
 .../include/opencv2/bioinspired/retina.hpp    | 51 +----------
 .../samples/default_retina_config.xml         | 24 ++++++
 .../samples/realistic_retina_config.xml       | 24 ++++++
 .../retina_model/retina_model.markdown        | 12 +--
 modules/cannops/include/opencv2/cann.hpp      |  4 +-
 .../include/opencv2/cann_interface.hpp        |  4 +-
 .../include/opencv2/cudaimgproc.hpp           |  1 -
 .../tutorials/benchmark/sr_benchmark.markdown | 29 ++-----
 .../face/include/opencv2/face/facemark.hpp    |  9 +-
 .../include/opencv2/face/facemark_train.hpp   |  6 --
 .../face_landmark_trainer.markdown            |  6 +-
 modules/fuzzy/include/opencv2/fuzzy.hpp       | 10 +--
 modules/hdf/include/opencv2/hdf.hpp           | 14 ++-
 .../mcc/include/opencv2/mcc/checker_model.hpp |  1 -
 modules/rgbd/include/opencv2/rgbd/dynafu.hpp  |  7 +-
 modules/sfm/include/opencv2/sfm.hpp           | 17 ++--
 .../opencv2/stereo/quasi_dense_stereo.hpp     |  6 +-
 modules/text/include/opencv2/text.hpp         | 86 +++++++++----------
 modules/text/include/opencv2/text/ocr.hpp     |  1 -
 .../videostab/include/opencv2/videostab.hpp   | 32 ++++---
 modules/viz/include/opencv2/viz.hpp           | 29 +++----
 .../include/opencv2/xfeatures2d.hpp           | 13 ++-
 .../include/opencv2/xfeatures2d/nonfree.hpp   |  3 +
 modules/ximgproc/include/opencv2/ximgproc.hpp | 36 ++++----
 .../include/opencv2/ximgproc/color_match.hpp  |  2 +
 .../opencv2/ximgproc/deriche_filter.hpp       |  2 +
 .../ximgproc/edgepreserving_filter.hpp        |  4 +-
 .../opencv2/ximgproc/fast_hough_transform.hpp |  3 +-
 .../opencv2/ximgproc/paillou_filter.hpp       |  2 +
 .../include/opencv2/ximgproc/peilin.hpp       |  2 +
 .../ximgproc/run_length_morphology.hpp        |  2 +
 31 files changed, 214 insertions(+), 228 deletions(-)
 create mode 100644 modules/bioinspired/samples/default_retina_config.xml
 create mode 100644 modules/bioinspired/samples/realistic_retina_config.xml

diff --git a/modules/bioinspired/include/opencv2/bioinspired/retina.hpp b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp
index 8e6eda93cae..478b6a0f75c 100644
--- a/modules/bioinspired/include/opencv2/bioinspired/retina.hpp
+++ b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp
@@ -94,57 +94,12 @@ enum {
     Here is the default configuration file of the retina module. It gives results such as the first
     retina output shown on the top of this page.
 
-    @code{xml}
-    <?xml version="1.0"?>
-    <opencv_storage>
-    <OPLandIPLparvo>
-        <colorMode>1</colorMode>
-        <normaliseOutput>1</normaliseOutput>
-        <photoreceptorsLocalAdaptationSensitivity>7.5e-01</photoreceptorsLocalAdaptationSensitivity>
-        <photoreceptorsTemporalConstant>9.0e-01</photoreceptorsTemporalConstant>
-        <photoreceptorsSpatialConstant>5.3e-01</photoreceptorsSpatialConstant>
-        <horizontalCellsGain>0.01</horizontalCellsGain>
-        <hcellsTemporalConstant>0.5</hcellsTemporalConstant>
-        <hcellsSpatialConstant>7.</hcellsSpatialConstant>
-        <ganglionCellsSensitivity>7.5e-01</ganglionCellsSensitivity></OPLandIPLparvo>
-    <IPLmagno>
-        <normaliseOutput>1</normaliseOutput>
-        <parasolCells_beta>0.</parasolCells_beta>
-        <parasolCells_tau>0.</parasolCells_tau>
-        <parasolCells_k>7.</parasolCells_k>
-        <amacrinCellsTemporalCutFrequency>2.0e+00</amacrinCellsTemporalCutFrequency>
-        <V0CompressionParameter>9.5e-01</V0CompressionParameter>
-        <localAdaptintegration_tau>0.</localAdaptintegration_tau>
-        <localAdaptintegration_k>7.</localAdaptintegration_k></IPLmagno>
-    </opencv_storage>
-    @endcode
+    @include default_retina_config.xml
 
     Here is the 'realistic" setup used to obtain the second retina output shown on the top of this page.
 
-    @code{xml}
-    <?xml version="1.0"?>
-    <opencv_storage>
-    <OPLandIPLparvo>
-      <colorMode>1</colorMode>
-      <normaliseOutput>1</normaliseOutput>
-      <photoreceptorsLocalAdaptationSensitivity>8.9e-01</photoreceptorsLocalAdaptationSensitivity>
-      <photoreceptorsTemporalConstant>9.0e-01</photoreceptorsTemporalConstant>
-      <photoreceptorsSpatialConstant>5.3e-01</photoreceptorsSpatialConstant>
-      <horizontalCellsGain>0.3</horizontalCellsGain>
-      <hcellsTemporalConstant>0.5</hcellsTemporalConstant>
-      <hcellsSpatialConstant>7.</hcellsSpatialConstant>
-      <ganglionCellsSensitivity>8.9e-01</ganglionCellsSensitivity></OPLandIPLparvo>
-    <IPLmagno>
-      <normaliseOutput>1</normaliseOutput>
-      <parasolCells_beta>0.</parasolCells_beta>
-      <parasolCells_tau>0.</parasolCells_tau>
-      <parasolCells_k>7.</parasolCells_k>
-      <amacrinCellsTemporalCutFrequency>2.0e+00</amacrinCellsTemporalCutFrequency>
-      <V0CompressionParameter>9.5e-01</V0CompressionParameter>
-      <localAdaptintegration_tau>0.</localAdaptintegration_tau>
-      <localAdaptintegration_k>7.</localAdaptintegration_k></IPLmagno>
-    </opencv_storage>
-    @endcode
+    @include realistic_retina_config.xml
+
       */
     struct RetinaParameters{
         //! Outer Plexiform Layer (OPL) and Inner Plexiform Layer Parvocellular (IplParvo) parameters
diff --git a/modules/bioinspired/samples/default_retina_config.xml b/modules/bioinspired/samples/default_retina_config.xml
new file mode 100644
index 00000000000..469b5d58f10
--- /dev/null
+++ b/modules/bioinspired/samples/default_retina_config.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<opencv_storage>
+<OPLandIPLparvo>
+    <colorMode>1</colorMode>
+    <normaliseOutput>1</normaliseOutput>
+    <photoreceptorsLocalAdaptationSensitivity>7.5e-01</photoreceptorsLocalAdaptationSensitivity>
+    <photoreceptorsTemporalConstant>9.0e-01</photoreceptorsTemporalConstant>
+    <photoreceptorsSpatialConstant>5.3e-01</photoreceptorsSpatialConstant>
+    <horizontalCellsGain>0.01</horizontalCellsGain>
+    <hcellsTemporalConstant>0.5</hcellsTemporalConstant>
+    <hcellsSpatialConstant>7.</hcellsSpatialConstant>
+    <ganglionCellsSensitivity>7.5e-01</ganglionCellsSensitivity>
+</OPLandIPLparvo>
+<IPLmagno>
+    <normaliseOutput>1</normaliseOutput>
+    <parasolCells_beta>0.</parasolCells_beta>
+    <parasolCells_tau>0.</parasolCells_tau>
+    <parasolCells_k>7.</parasolCells_k>
+    <amacrinCellsTemporalCutFrequency>2.0e+00</amacrinCellsTemporalCutFrequency>
+    <V0CompressionParameter>9.5e-01</V0CompressionParameter>
+    <localAdaptintegration_tau>0.</localAdaptintegration_tau>
+    <localAdaptintegration_k>7.</localAdaptintegration_k>
+</IPLmagno>
+</opencv_storage>
diff --git a/modules/bioinspired/samples/realistic_retina_config.xml b/modules/bioinspired/samples/realistic_retina_config.xml
new file mode 100644
index 00000000000..c02e79b3c6d
--- /dev/null
+++ b/modules/bioinspired/samples/realistic_retina_config.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<opencv_storage>
+<OPLandIPLparvo>
+    <colorMode>1</colorMode>
+    <normaliseOutput>1</normaliseOutput>
+    <photoreceptorsLocalAdaptationSensitivity>8.9e-01</photoreceptorsLocalAdaptationSensitivity>
+    <photoreceptorsTemporalConstant>9.0e-01</photoreceptorsTemporalConstant>
+    <photoreceptorsSpatialConstant>5.3e-01</photoreceptorsSpatialConstant>
+    <horizontalCellsGain>0.3</horizontalCellsGain>
+    <hcellsTemporalConstant>0.5</hcellsTemporalConstant>
+    <hcellsSpatialConstant>7.</hcellsSpatialConstant>
+    <ganglionCellsSensitivity>8.9e-01</ganglionCellsSensitivity>
+</OPLandIPLparvo>
+<IPLmagno>
+    <normaliseOutput>1</normaliseOutput>
+    <parasolCells_beta>0.</parasolCells_beta>
+    <parasolCells_tau>0.</parasolCells_tau>
+    <parasolCells_k>7.</parasolCells_k>
+    <amacrinCellsTemporalCutFrequency>2.0e+00</amacrinCellsTemporalCutFrequency>
+    <V0CompressionParameter>9.5e-01</V0CompressionParameter>
+    <localAdaptintegration_tau>0.</localAdaptintegration_tau>
+    <localAdaptintegration_k>7.</localAdaptintegration_k>
+</IPLmagno>
+</opencv_storage>
diff --git a/modules/bioinspired/tutorials/retina_model/retina_model.markdown b/modules/bioinspired/tutorials/retina_model/retina_model.markdown
index 37285bfa1c7..d71fe797bec 100644
--- a/modules/bioinspired/tutorials/retina_model/retina_model.markdown
+++ b/modules/bioinspired/tutorials/retina_model/retina_model.markdown
@@ -1,6 +1,8 @@
 Retina and real-world vision {#tutorial_bioinspired_retina_model}
 =============================================================
 
+@tableofcontents
+
 Goal
 ----
 
@@ -382,7 +384,7 @@ need to know if mean luminance information is required or not. If not, the the r
 significantly reduce its energy thus giving more visibility to higher spatial frequency details.
 
 
-#### Basic parameters
+## Basic parameters
 
 The simplest parameters are as follows :
 
@@ -397,7 +399,7 @@ processing. You can expect much faster processing using gray levels : it would r
 product per pixel for all of the retina processes and it has recently been parallelized for multicore
 architectures.
 
-#### Photo-receptors parameters
+## Photo-receptors parameters
 
 The following parameters act on the entry point of the retina - photo-receptors - and has impact on all
  of the following processes. These sensors are low pass spatio-temporal filters that smooth temporal and
@@ -421,7 +423,7 @@ and high frequency noise canceling.
     A good compromise for color images is a 0.53 value since such choice won't affect too much the color spectrum.
     Higher values would lead to gray and blurred output images.
 
-#### Horizontal cells parameters
+## Horizontal cells parameters
 
 This parameter set tunes the neural network connected to the photo-receptors, the horizontal cells.
 It modulates photo-receptors sensitivity and completes the processing for final spectral whitening
@@ -446,7 +448,7 @@ It modulates photo-receptors sensitivity and completes the processing for final
 and luminance is already partly enhanced. The following parameters act on the last processing stages
 of the two outing retina signals.
 
-#### Parvo (details channel) dedicated parameter
+## Parvo (details channel) dedicated parameter
 
 -   **ganglionCellsSensitivity** specifies the strength of the final local adaptation occurring at
     the output of this details' dedicated channel. Parameter values remain between 0 and 1. Low value
@@ -455,7 +457,7 @@ of the two outing retina signals.
 **Note :** this parameter can correct eventual burned images by favoring low energetic details of
 the visual scene, even in bright areas.
 
-#### IPL Magno (motion/transient channel) parameters
+## IPL Magno (motion/transient channel) parameters
 
 Once image's information are cleaned, this channel acts as a high pass temporal filter that
 selects only the signals related to transient signals (events, motion, etc.). A low pass spatial filter
diff --git a/modules/cannops/include/opencv2/cann.hpp b/modules/cannops/include/opencv2/cann.hpp
index 30555dd8257..bd351481624 100644
--- a/modules/cannops/include/opencv2/cann.hpp
+++ b/modules/cannops/include/opencv2/cann.hpp
@@ -8,12 +8,12 @@
 #include "opencv2/core.hpp"
 
 /**
-  @defgroup cann Ascend-accelerated Computer Vision
+  @defgroup cannops Ascend-accelerated Computer Vision
   @{
     @defgroup canncore Core part
     @{
       @defgroup cann_struct Data Structures
-      @defgroup cann_init Initializeation and Information
+      @defgroup cann_init Initialization and Information
     @}
   @}
  */
diff --git a/modules/cannops/include/opencv2/cann_interface.hpp b/modules/cannops/include/opencv2/cann_interface.hpp
index 6667eb58519..6b13090f4f1 100644
--- a/modules/cannops/include/opencv2/cann_interface.hpp
+++ b/modules/cannops/include/opencv2/cann_interface.hpp
@@ -13,9 +13,9 @@ namespace cann
 {
 
 /**
-  @addtogroup cann
+  @addtogroup cannops
   @{
-    @defgroup cannops Operations for Ascend Backend.
+    @defgroup cannops_ops Operations for Ascend Backend.
     @{
         @defgroup cannops_elem Per-element Operations
         @defgroup cannops_core Core Operations on Matrices
diff --git a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp
index d72700168cd..01e7c41ca9a 100644
--- a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp
+++ b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp
@@ -844,7 +844,6 @@ cv::Moments cvMoments = convertSpatialMoments<float>(spatialMoments, order);
 ```
 
 see the \a CUDA_TEST_P(Moments, Async) test inside opencv_contrib_source_code/modules/cudaimgproc/test/test_moments.cpp for an example.
-@returns cv::Moments.
 @sa cuda::moments, cuda::convertSpatialMoments, cuda::numMoments, cuda::MomentsOrder
 */
 CV_EXPORTS_W void spatialMoments(InputArray src, OutputArray moments, const bool binaryImage = false, const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS, const int momentsType = CV_64F, Stream& stream = Stream::Null());
diff --git a/modules/dnn_superres/tutorials/benchmark/sr_benchmark.markdown b/modules/dnn_superres/tutorials/benchmark/sr_benchmark.markdown
index 26244c9f8ae..3a4b88ef81b 100644
--- a/modules/dnn_superres/tutorials/benchmark/sr_benchmark.markdown
+++ b/modules/dnn_superres/tutorials/benchmark/sr_benchmark.markdown
@@ -50,14 +50,9 @@ Explanation
 Benchmarking results
 -----------
 
-Dataset benchmarking
-----
-
-###General100 dataset
-
-<center>
+## General100 dataset
 
-#####2x scaling factor
+### 2x scaling factor
 
 
 |               | Avg inference time in sec (CPU)| Avg PSNR | Avg SSIM |
@@ -70,7 +65,7 @@ Dataset benchmarking
 | Nearest neighbor | 0.000114 | 29.1665 | 0.9049 |
 | Lanczos          | 0.001094 | 32.4687 | 0.9327 |
 
-#####3x scaling factor
+### 3x scaling factor
 
 |               | Avg inference time in sec (CPU)| Avg PSNR | Avg SSIM |
 | ------------- |:-------------------:| ---------:|--------:|
@@ -83,7 +78,7 @@ Dataset benchmarking
 | Lanczos          | 0.001012  |25.9115  |0.8706  |
 
 
-#####4x scaling factor
+### 4x scaling factor
 
 |               | Avg inference time in sec (CPU)| Avg PSNR | Avg SSIM |
 | ------------- |:-------------------:| ---------:|--------:|
@@ -96,14 +91,10 @@ Dataset benchmarking
 | Lanczos          | 0.001012  |25.9115  |0.8706  |
 
 
-</center>
 
-Images
-----
-
-<center>
+## Images
 
-####2x scaling factor
+### 2x scaling factor
 
 |Set5: butterfly.png | size: 256x256 | ||
 |:-------------:|:-------------------:|:-------------:|:----:|
@@ -112,7 +103,7 @@ Images
 ![ESPCN](images/espcn_butterfly.jpg)| ![FSRCNN](images/fsrcnn_butterfly.jpg) | ![LapSRN](images/lapsrn_butterfly.jpg) | ![EDSR](images/edsr_butterfly.jpg)
 |29.0341 / 0.9354 / **0.004157**| 29.0077 / 0.9345 / 0.006325 | 27.8212 / 0.9230 / 0.037937 | **30.0347** / **0.9453** / 2.077280 |
 
-####3x scaling factor
+### 3x scaling factor
 
 |Urban100: img_001.png | size: 1024x644 | ||
 |:-------------:|:-------------------:|:-------------:|:----:|
@@ -122,7 +113,7 @@ Images
 |28.0118 / 0.8588 / **0.030748**| 28.0184 / 0.8597 / 0.094173 |  | **30.5671** / **0.9019** / 9.517580 |
 
 
-####4x scaling factor
+### 4x scaling factor
 
 |Set14: comic.png | size: 250x361 | ||
 |:-------------:|:-------------------:|:-------------:|:----:|
@@ -131,7 +122,7 @@ Images
 |![ESPCN](images/espcn_comic.jpg)| ![FSRCNN](images/fsrcnn_comic.jpg) | ![LapSRN](images/lapsrn_comic.jpg) | ![EDSR](images/edsr_comic.jpg)
 |20.0417 / 0.6302 / **0.001894**| 20.0885 / 0.6384 / 0.002103 | 20.0676 / 0.6339 / 0.061640 | **20.5233** / **0.6901** / 0.665876 |
 
-####8x scaling factor
+### 8x scaling factor
 
 |Div2K: 0006.png | size: 1356x2040 | |
 |:-------------:|:-------------------:|:-------------:|
@@ -139,5 +130,3 @@ Images
 |PSRN / SSIM / Speed (CPU)| 26.3139 / **0.8033** / 0.001107| 23.8291 / 0.7340 / **0.000611** |
 |![Lanczos interpolation](images/lanczos_div2k.jpg)| ![LapSRN](images/lapsrn_div2k.jpg) | |
 |26.1565 / 0.7962 / 0.004782| **26.7046** / 0.7987 / 2.274290 | |
-
-</center>
\ No newline at end of file
diff --git a/modules/face/include/opencv2/face/facemark.hpp b/modules/face/include/opencv2/face/facemark.hpp
index 86e9384342e..4e66727fe46 100644
--- a/modules/face/include/opencv2/face/facemark.hpp
+++ b/modules/face/include/opencv2/face/facemark.hpp
@@ -12,12 +12,6 @@ Mentor: Delia Passalacqua
 #ifndef __OPENCV_FACELANDMARK_HPP__
 #define __OPENCV_FACELANDMARK_HPP__
 
-/**
-@defgroup face Face Analysis
-- @ref tutorial_table_of_content_facemark
-- The Facemark API
-*/
-
 #include "opencv2/core.hpp"
 #include <vector>
 
@@ -25,6 +19,8 @@ Mentor: Delia Passalacqua
 namespace cv {
 namespace face {
 
+//! @addtogroup face
+//! @{
 
 /** @brief Abstract base class for all facemark models
 
@@ -88,6 +84,7 @@ CV_EXPORTS_W Ptr<Facemark> createFacemarkLBF();
 //! construct a Kazemi facemark detector
 CV_EXPORTS_W Ptr<Facemark> createFacemarkKazemi();
 
+//! @}
 
 } // face
 } // cv
diff --git a/modules/face/include/opencv2/face/facemark_train.hpp b/modules/face/include/opencv2/face/facemark_train.hpp
index d6e27e9face..591c079a0d6 100644
--- a/modules/face/include/opencv2/face/facemark_train.hpp
+++ b/modules/face/include/opencv2/face/facemark_train.hpp
@@ -12,12 +12,6 @@ Mentor: Delia Passalacqua
 #ifndef __OPENCV_FACELANDMARKTRAIN_HPP__
 #define __OPENCV_FACELANDMARKTRAIN_HPP__
 
-/**
-@defgroup face Face Analysis
-- @ref tutorial_table_of_content_facemark
-- The Facemark API
-*/
-
 #include "opencv2/face/facemark.hpp"
 #include "opencv2/objdetect.hpp"
 #include <vector>
diff --git a/modules/face/tutorials/face_landmark/face_landmark_trainer.markdown b/modules/face/tutorials/face_landmark/face_landmark_trainer.markdown
index 601a6b4c428..8fdeaa611d5 100644
--- a/modules/face/tutorials/face_landmark/face_landmark_trainer.markdown
+++ b/modules/face/tutorials/face_landmark/face_landmark_trainer.markdown
@@ -21,7 +21,7 @@ The above format is similar to HELEN dataset which is used for training the mode
 ./sample_train_landmark_detector -annotations=/home/sukhad/Downloads/code/trainset/ -config=config.xml -face_cascade=lbpcascadefrontalface.xml -model=trained_model.dat -width=460 -height=460
 ```
 
-### Description of command parameters
+## Description of command parameters
 
 > * **annotations** a : (REQUIRED) Path to annotations txt file [example - /data/annotations.txt]
 > * **config** c : (REQUIRED) Path to configuration xml file containing parameters for training.[ example - /data/config.xml]
@@ -30,7 +30,7 @@ The above format is similar to HELEN dataset which is used for training the mode
 > * **height** h : (OPTIONAL) The height which you want all images to get to scale the annotations. Large images are slow to process [default = 460]
 > * **face_cascade** f (REQUIRED) Path to the face cascade xml file which you want to use as a detector.
 
-### Description of training parameters
+## Description of training parameters
 
 
 The configuration file described above which is used while training contains the training parameters which are required for training.
@@ -49,7 +49,7 @@ The configuration file described above which is used while training contains the
 
 To get more detailed description about the training parameters you can refer to the [Research paper](https://pdfs.semanticscholar.org/d78b/6a5b0dcaa81b1faea5fb0000045a62513567.pdf).
 
-### Understanding code
+## Understanding code
 
 
 ![](images/3.jpg)
diff --git a/modules/fuzzy/include/opencv2/fuzzy.hpp b/modules/fuzzy/include/opencv2/fuzzy.hpp
index d660cc3615c..59f2a3f2a1f 100644
--- a/modules/fuzzy/include/opencv2/fuzzy.hpp
+++ b/modules/fuzzy/include/opencv2/fuzzy.hpp
@@ -52,19 +52,19 @@
 
 Namespace for all functions is `ft`. The module brings implementation of the last image processing algorithms based on fuzzy mathematics. Method are named based on the pattern `FT`_degree_dimension`_`method.
 
-  @{
+@{
     @defgroup f0_math Math with F0-transform support
 
-Fuzzy transform (\f$F^0\f$-transform) of the 0th degree transforms whole image to a matrix of its components. These components are used in latter computation where each of them represents average color of certain subarea.
+    Fuzzy transform (\f$F^0\f$-transform) of the 0th degree transforms whole image to a matrix of its components. These components are used in latter computation where each of them represents average color of certain subarea.
 
     @defgroup f1_math Math with F1-transform support
 
-Fuzzy transform (\f$F^1\f$-transform) of the 1th degree transforms whole image to a matrix of its components. Each component is polynomial of the 1th degree carrying information about average color and average gradient of certain subarea.
+    Fuzzy transform (\f$F^1\f$-transform) of the 1th degree transforms whole image to a matrix of its components. Each component is polynomial of the 1th degree carrying information about average color and average gradient of certain subarea.
 
     @defgroup f_image Fuzzy image processing
 
-Image proceesing based on fuzzy mathematics namely F-transform.
-   @}
+    Image proceesing based on fuzzy mathematics namely F-transform.
+@}
 
 */
 
diff --git a/modules/hdf/include/opencv2/hdf.hpp b/modules/hdf/include/opencv2/hdf.hpp
index ff40426ff65..ac48e4b9ac8 100644
--- a/modules/hdf/include/opencv2/hdf.hpp
+++ b/modules/hdf/include/opencv2/hdf.hpp
@@ -41,17 +41,15 @@
 
 This module provides storage routines for Hierarchical Data Format objects.
 
-  @{
+@{
     @defgroup hdf5 Hierarchical Data Format version 5
 
-Hierarchical Data Format version 5
---------------------------------------------------------
+    Hierarchical Data Format version 5
+    --------------------------------------------------------
 
-In order to use it, the hdf5 library has to be installed, which
-means cmake should find it using `find_package(HDF5)` .
-
-
-  @}
+    In order to use it, the hdf5 library has to be installed, which
+    means cmake should find it using `find_package(HDF5)`.
+@}
 */
 
 #endif
diff --git a/modules/mcc/include/opencv2/mcc/checker_model.hpp b/modules/mcc/include/opencv2/mcc/checker_model.hpp
index c13d5afc585..0768c691e05 100644
--- a/modules/mcc/include/opencv2/mcc/checker_model.hpp
+++ b/modules/mcc/include/opencv2/mcc/checker_model.hpp
@@ -116,7 +116,6 @@ class CV_EXPORTS_W CCheckerDraw
     virtual ~CCheckerDraw() {}
     /** \brief Draws the checker to the given image.
     * \param img image in color space BGR
-    * \return void
     */
     CV_WRAP virtual void draw(InputOutputArray img) = 0;
     /** \brief Create a new CCheckerDraw object.
diff --git a/modules/rgbd/include/opencv2/rgbd/dynafu.hpp b/modules/rgbd/include/opencv2/rgbd/dynafu.hpp
index 32875ad5ac7..e5ad3447778 100644
--- a/modules/rgbd/include/opencv2/rgbd/dynafu.hpp
+++ b/modules/rgbd/include/opencv2/rgbd/dynafu.hpp
@@ -114,7 +114,6 @@ class CV_EXPORTS_W DynaFu
     virtual void renderSurface(OutputArray depthImage, OutputArray vertImage, OutputArray normImage, bool warp=true) = 0;
 };
 
-//! @}
-}
-}
-#endif
+} // dynafu::
+} // cv::
+#endif // __OPENCV_RGBD_DYNAFU_HPP__
diff --git a/modules/sfm/include/opencv2/sfm.hpp b/modules/sfm/include/opencv2/sfm.hpp
index 25a3b10da5d..52c1af07e8e 100644
--- a/modules/sfm/include/opencv2/sfm.hpp
+++ b/modules/sfm/include/opencv2/sfm.hpp
@@ -75,7 +75,7 @@ This module has been originally developed as a project for Google Summer of Code
   - Notice that it is compiled only when Eigen, GLog and GFlags are correctly installed.\n
     Check installation instructions in the following tutorial: @ref tutorial_sfm_installation
 
-  @{
+@{
     @defgroup conditioning Conditioning
     @defgroup fundamental Fundamental
     @defgroup io Input/Output
@@ -85,18 +85,17 @@ This module has been originally developed as a project for Google Summer of Code
     @defgroup triangulation Triangulation
 
     @defgroup reconstruction Reconstruction
-      @note
-        - Notice that it is compiled only when Ceres Solver is correctly installed.\n
-          Check installation instructions in the following tutorial: @ref tutorial_sfm_installation
 
+    @note
+    - Notice that it is compiled only when Ceres Solver is correctly installed.\n
+        Check installation instructions in the following tutorial: @ref tutorial_sfm_installation
 
     @defgroup simple_pipeline Simple Pipeline
-      @note
-          - Notice that it is compiled only when Ceres Solver is correctly installed.\n
-            Check installation instructions in the following tutorial: @ref tutorial_sfm_installation
-
-  @}
 
+    @note
+        - Notice that it is compiled only when Ceres Solver is correctly installed.\n
+        Check installation instructions in the following tutorial: @ref tutorial_sfm_installation
+@}
 */
 
 #endif
diff --git a/modules/stereo/include/opencv2/stereo/quasi_dense_stereo.hpp b/modules/stereo/include/opencv2/stereo/quasi_dense_stereo.hpp
index b2290e3768c..469c46f72ea 100644
--- a/modules/stereo/include/opencv2/stereo/quasi_dense_stereo.hpp
+++ b/modules/stereo/include/opencv2/stereo/quasi_dense_stereo.hpp
@@ -18,6 +18,7 @@ namespace cv
 {
 namespace stereo
 {
+
 /** \addtogroup stereo
  *  @{
  */
@@ -190,9 +191,8 @@ class  CV_EXPORTS_W QuasiDenseStereo
     CV_PROP_RW PropagationParameters Param;
 };
 
-} //namespace cv
-} //namespace stereo
-
 /** @}*/
 
+} //namespace cv
+} //namespace stereo
 #endif // __OPENCV_QUASI_DENSE_STEREO_H__
diff --git a/modules/text/include/opencv2/text.hpp b/modules/text/include/opencv2/text.hpp
index 86ce3ec6e80..2b84451c23f 100644
--- a/modules/text/include/opencv2/text.hpp
+++ b/modules/text/include/opencv2/text.hpp
@@ -52,49 +52,49 @@ scene images.
   @{
     @defgroup text_detect Scene Text Detection
 
-Class-specific Extremal Regions for Scene Text Detection
---------------------------------------------------------
-
-The scene text detection algorithm described below has been initially proposed by Lukás Neumann &
-Jiri Matas @cite Neumann11. The main idea behind Class-specific Extremal Regions is similar to the MSER
-in that suitable Extremal Regions (ERs) are selected from the whole component tree of the image.
-However, this technique differs from MSER in that selection of suitable ERs is done by a sequential
-classifier trained for character detection, i.e. dropping the stability requirement of MSERs and
-selecting class-specific (not necessarily stable) regions.
-
-The component tree of an image is constructed by thresholding by an increasing value step-by-step
-from 0 to 255 and then linking the obtained connected components from successive levels in a
-hierarchy by their inclusion relation:
-
-![image](pics/component_tree.png)
-
-The component tree may contain a huge number of regions even for a very simple image as shown in
-the previous image. This number can easily reach the order of 1 x 10\^6 regions for an average 1
-Megapixel image. In order to efficiently select suitable regions among all the ERs the algorithm
-make use of a sequential classifier with two differentiated stages.
-
-In the first stage incrementally computable descriptors (area, perimeter, bounding box, and Euler's
-number) are computed (in O(1)) for each region r and used as features for a classifier which
-estimates the class-conditional probability p(r|character). Only the ERs which correspond to local
-maximum of the probability p(r|character) are selected (if their probability is above a global limit
-p_min and the difference between local maximum and local minimum is greater than a delta_min
-value).
-
-In the second stage, the ERs that passed the first stage are classified into character and
-non-character classes using more informative but also more computationally expensive features. (Hole
-area ratio, convex hull ratio, and the number of outer boundary inflexion points).
-
-This ER filtering process is done in different single-channel projections of the input image in
-order to increase the character localization recall.
-
-After the ER filtering is done on each input channel, character candidates must be grouped in
-high-level text blocks (i.e. words, text lines, paragraphs, ...). The opencv_text module implements
-two different grouping algorithms: the Exhaustive Search algorithm proposed in @cite Neumann12 for
-grouping horizontally aligned text, and the method proposed by Lluis Gomez and Dimosthenis Karatzas
-in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
-
-To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+    Class-specific Extremal Regions for Scene Text Detection
+    --------------------------------------------------------
+
+    The scene text detection algorithm described below has been initially proposed by Lukás Neumann &
+    Jiri Matas @cite Neumann11. The main idea behind Class-specific Extremal Regions is similar to the MSER
+    in that suitable Extremal Regions (ERs) are selected from the whole component tree of the image.
+    However, this technique differs from MSER in that selection of suitable ERs is done by a sequential
+    classifier trained for character detection, i.e. dropping the stability requirement of MSERs and
+    selecting class-specific (not necessarily stable) regions.
+
+    The component tree of an image is constructed by thresholding by an increasing value step-by-step
+    from 0 to 255 and then linking the obtained connected components from successive levels in a
+    hierarchy by their inclusion relation:
+
+    ![image](pics/component_tree.png)
+
+    The component tree may contain a huge number of regions even for a very simple image as shown in
+    the previous image. This number can easily reach the order of 1 x 10\^6 regions for an average 1
+    Megapixel image. In order to efficiently select suitable regions among all the ERs the algorithm
+    make use of a sequential classifier with two differentiated stages.
+
+    In the first stage incrementally computable descriptors (area, perimeter, bounding box, and Euler's
+    number) are computed (in O(1)) for each region r and used as features for a classifier which
+    estimates the class-conditional probability p(r|character). Only the ERs which correspond to local
+    maximum of the probability p(r|character) are selected (if their probability is above a global limit
+    p_min and the difference between local maximum and local minimum is greater than a delta_min
+    value).
+
+    In the second stage, the ERs that passed the first stage are classified into character and
+    non-character classes using more informative but also more computationally expensive features. (Hole
+    area ratio, convex hull ratio, and the number of outer boundary inflexion points).
+
+    This ER filtering process is done in different single-channel projections of the input image in
+    order to increase the character localization recall.
+
+    After the ER filtering is done on each input channel, character candidates must be grouped in
+    high-level text blocks (i.e. words, text lines, paragraphs, ...). The opencv_text module implements
+    two different grouping algorithms: the Exhaustive Search algorithm proposed in @cite Neumann12 for
+    grouping horizontally aligned text, and the method proposed by Lluis Gomez and Dimosthenis Karatzas
+    in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
+
+    To see the text detector at work, have a look at the textdetection demo:
+    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
 
     @defgroup text_recognize Scene Text Recognition
   @}
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index a0c967e87bd..083fc7a5aba 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -363,7 +363,6 @@ CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(cons
 
  */
 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifier(const String& filename, int classifier);
-//! @}
 
 /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
  *
diff --git a/modules/videostab/include/opencv2/videostab.hpp b/modules/videostab/include/opencv2/videostab.hpp
index ca3f5adef2b..14c52ebaf1b 100644
--- a/modules/videostab/include/opencv2/videostab.hpp
+++ b/modules/videostab/include/opencv2/videostab.hpp
@@ -44,7 +44,7 @@
 #define OPENCV_VIDEOSTAB_HPP
 
 /**
-  @defgroup videostab Video Stabilization
+@defgroup videostab Video Stabilization
 
 The video stabilization module contains a set of functions and classes that can be used to solve the
 problem of video stabilization. There are a few methods implemented, most of them are described in
@@ -53,26 +53,24 @@ paper methods.
 
 ### References
 
- 1. "Full-Frame Video Stabilization with Motion Inpainting"
-     Yasuyuki Matsushita, Eyal Ofek, Weina Ge, Xiaoou Tang, Senior Member, and Heung-Yeung Shum
- 2. "Auto-Directed Video Stabilization with Robust L1 Optimal Camera Paths"
-     Matthias Grundmann, Vivek Kwatra, Irfan Essa
+1. "Full-Frame Video Stabilization with Motion Inpainting"
+    Yasuyuki Matsushita, Eyal Ofek, Weina Ge, Xiaoou Tang, Senior Member, and Heung-Yeung Shum
+2. "Auto-Directed Video Stabilization with Robust L1 Optimal Camera Paths"
+    Matthias Grundmann, Vivek Kwatra, Irfan Essa
 
-     @{
-         @defgroup videostab_motion Global Motion Estimation
+@{
+    @defgroup videostab_motion Global Motion Estimation
 
-The video stabilization module contains a set of functions and classes for global motion estimation
-between point clouds or between images. In the last case features are extracted and matched
-internally. For the sake of convenience the motion estimation functions are wrapped into classes.
-Both the functions and the classes are available.
+    The video stabilization module contains a set of functions and classes for global motion estimation
+    between point clouds or between images. In the last case features are extracted and matched
+    internally. For the sake of convenience the motion estimation functions are wrapped into classes.
+    Both the functions and the classes are available.
 
-         @defgroup videostab_marching Fast Marching Method
-
-The Fast Marching Method @cite Telea04 is used in of the video stabilization routines to do motion and
-color inpainting. The method is implemented is a flexible way and it's made public for other users.
-
-     @}
+    @defgroup videostab_marching Fast Marching Method
 
+    The Fast Marching Method @cite Telea04 is used in of the video stabilization routines to do motion and
+    color inpainting. The method is implemented is a flexible way and it's made public for other users.
+@}
 */
 
 #include "opencv2/videostab/stabilizer.hpp"
diff --git a/modules/viz/include/opencv2/viz.hpp b/modules/viz/include/opencv2/viz.hpp
index fc79b8b60e7..c31ed342ab1 100644
--- a/modules/viz/include/opencv2/viz.hpp
+++ b/modules/viz/include/opencv2/viz.hpp
@@ -60,25 +60,24 @@ interact with it.
 3D visualization window (see Viz3d) is used to display widgets (see Widget), and it provides several
 methods to interact with scene and widgets.
 
-  @{
+@{
     @defgroup viz_widget Widget
 
-In this section, the widget framework is explained. Widgets represent 2D or 3D objects, varying from
-simple ones such as lines to complex ones such as point clouds and meshes.
+    In this section, the widget framework is explained. Widgets represent 2D or 3D objects, varying from
+    simple ones such as lines to complex ones such as point clouds and meshes.
 
-Widgets are **implicitly shared**. Therefore, one can add a widget to the scene, and modify the
-widget without re-adding the widget.
+    Widgets are **implicitly shared**. Therefore, one can add a widget to the scene, and modify the
+    widget without re-adding the widget.
 
-@code
-// Create a cloud widget
-viz::WCloud cw(cloud, viz::Color::red());
-// Display it in a window
-myWindow.showWidget("CloudWidget1", cw);
-// Modify it, and it will be modified in the window.
-cw.setColor(viz::Color::yellow());
-@endcode
-
-  @}
+    @code
+    // Create a cloud widget
+    viz::WCloud cw(cloud, viz::Color::red());
+    // Display it in a window
+    myWindow.showWidget("CloudWidget1", cw);
+    // Modify it, and it will be modified in the window.
+    cw.setColor(viz::Color::yellow());
+    @endcode
+@}
 */
 
 #endif /* OPENCV_VIZ_HPP */
diff --git a/modules/xfeatures2d/include/opencv2/xfeatures2d.hpp b/modules/xfeatures2d/include/opencv2/xfeatures2d.hpp
index 3313a38348a..3793541c238 100644
--- a/modules/xfeatures2d/include/opencv2/xfeatures2d.hpp
+++ b/modules/xfeatures2d/include/opencv2/xfeatures2d.hpp
@@ -46,19 +46,18 @@ the use of this software, even if advised of the possibility of such damage.
 @{
     @defgroup xfeatures2d_experiment Experimental 2D Features Algorithms
 
-This section describes experimental algorithms for 2d feature detection.
+    This section describes experimental algorithms for 2d feature detection.
 
     @defgroup xfeatures2d_nonfree Non-free 2D Features Algorithms
 
-This section describes two popular algorithms for 2d feature detection, SIFT and SURF, that are
-known to be patented. You need to set the OPENCV_ENABLE_NONFREE option in cmake to use those. Use them at your own risk.
+    This section describes two popular algorithms for 2d feature detection, SIFT and SURF, that are
+    known to be patented. You need to set the OPENCV_ENABLE_NONFREE option in cmake to use those. Use them at your own risk.
 
     @defgroup xfeatures2d_match Experimental 2D Features Matching Algorithm
 
-This section describes the following matching strategies:
-    - GMS: Grid-based Motion Statistics, @cite Bian2017gms
-    - LOGOS: Local geometric support for high-outlier spatial verification, @cite Lowry2018LOGOSLG
-
+    This section describes the following matching strategies:
+        - GMS: Grid-based Motion Statistics, @cite Bian2017gms
+        - LOGOS: Local geometric support for high-outlier spatial verification, @cite Lowry2018LOGOSLG
 @}
 */
 
diff --git a/modules/xfeatures2d/include/opencv2/xfeatures2d/nonfree.hpp b/modules/xfeatures2d/include/opencv2/xfeatures2d/nonfree.hpp
index 8eb11aa6653..5fb299f20f4 100644
--- a/modules/xfeatures2d/include/opencv2/xfeatures2d/nonfree.hpp
+++ b/modules/xfeatures2d/include/opencv2/xfeatures2d/nonfree.hpp
@@ -50,6 +50,9 @@ namespace cv
 namespace xfeatures2d
 {
 
+//! @addtogroup xfeatures2d_nonfree
+//! @{
+
 /** @brief Class for extracting Speeded Up Robust Features from an image @cite Bay06 .
 
 The algorithm parameters:
diff --git a/modules/ximgproc/include/opencv2/ximgproc.hpp b/modules/ximgproc/include/opencv2/ximgproc.hpp
index dca0443c0ad..099205126cb 100644
--- a/modules/ximgproc/include/opencv2/ximgproc.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc.hpp
@@ -65,12 +65,13 @@
 #include "ximgproc/find_ellipses.hpp"
 
 
-/** @defgroup ximgproc Extended Image Processing
-  @{
+/**
+@defgroup ximgproc Extended Image Processing
+@{
     @defgroup ximgproc_edge Structured forests for fast edge detection
 
-This module contains implementations of modern structured edge detection algorithms,
-i.e. algorithms which somehow takes into account pixel affinities in natural images.
+    This module contains implementations of modern structured edge detection algorithms,
+    i.e. algorithms which somehow takes into account pixel affinities in natural images.
 
     @defgroup ximgproc_edgeboxes EdgeBoxes
 
@@ -84,16 +85,16 @@ i.e. algorithms which somehow takes into account pixel affinities in natural ima
 
     @defgroup ximgproc_edge_drawing EdgeDrawing
 
-EDGE DRAWING LIBRARY FOR GEOMETRIC FEATURE EXTRACTION AND VALIDATION
+    EDGE DRAWING LIBRARY FOR GEOMETRIC FEATURE EXTRACTION AND VALIDATION
 
-Edge Drawing (ED) algorithm is an proactive approach on edge detection problem. In contrast to many other existing edge detection algorithms which follow a subtractive
-approach (i.e. after applying gradient filters onto an image eliminating pixels w.r.t. several rules, e.g. non-maximal suppression and hysteresis in Canny), ED algorithm
-works via an additive strategy, i.e. it picks edge pixels one by one, hence the name Edge Drawing. Then we process those random shaped edge segments to extract higher level
-edge features, i.e. lines, circles, ellipses, etc. The popular method of extraction edge pixels from the thresholded gradient magnitudes is non-maximal supression that tests
-every pixel whether it has the maximum gradient response along its gradient direction and eliminates if it does not. However, this method does not check status of the
-neighboring pixels, and therefore might result low quality (in terms of edge continuity, smoothness, thinness, localization) edge segments. Instead of non-maximal supression,
-ED points a set of edge pixels and join them by maximizing the total gradient response of edge segments. Therefore it can extract high quality edge segments without need for
-an additional hysteresis step.
+    Edge Drawing (ED) algorithm is an proactive approach on edge detection problem. In contrast to many other existing edge detection algorithms which follow a subtractive
+    approach (i.e. after applying gradient filters onto an image eliminating pixels w.r.t. several rules, e.g. non-maximal suppression and hysteresis in Canny), ED algorithm
+    works via an additive strategy, i.e. it picks edge pixels one by one, hence the name Edge Drawing. Then we process those random shaped edge segments to extract higher level
+    edge features, i.e. lines, circles, ellipses, etc. The popular method of extraction edge pixels from the thresholded gradient magnitudes is non-maximal supression that tests
+    every pixel whether it has the maximum gradient response along its gradient direction and eliminates if it does not. However, this method does not check status of the
+    neighboring pixels, and therefore might result low quality (in terms of edge continuity, smoothness, thinness, localization) edge segments. Instead of non-maximal supression,
+    ED points a set of edge pixels and join them by maximizing the total gradient response of edge segments. Therefore it can extract high quality edge segments without need for
+    an additional hysteresis step.
 
     @defgroup ximgproc_fourier Fourier descriptors
 
@@ -115,8 +116,7 @@ an additional hysteresis step.
 
     The size of the original image is required for compatibility with the imgproc functions when the boundary handling requires that pixel outside the image boundary are
     "on".
-
-    @}
+@}
 */
 
 namespace cv
@@ -124,6 +124,9 @@ namespace cv
 namespace ximgproc
 {
 
+//! @addtogroup ximgproc
+//! @{
+
 enum ThinningTypes{
     THINNING_ZHANGSUEN    = 0, // Thinning technique of Zhang-Suen
     THINNING_GUOHALL      = 1  // Thinning technique of Guo-Hall
@@ -139,9 +142,6 @@ enum LocalBinarizationMethods{
 	BINARIZATION_NICK = 3     //!< NICK technique. See @cite Khurshid2009 .
 };
 
-//! @addtogroup ximgproc
-//! @{
-
 /** @brief Performs thresholding on input images using Niblack's technique or some of the
 popular variations it inspired.
 
diff --git a/modules/ximgproc/include/opencv2/ximgproc/color_match.hpp b/modules/ximgproc/include/opencv2/ximgproc/color_match.hpp
index c18390d4ac6..8408b5b2331 100644
--- a/modules/ximgproc/include/opencv2/ximgproc/color_match.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/color_match.hpp
@@ -61,6 +61,8 @@ CV_EXPORTS_W void qdft(InputArray img, OutputArray qimg, int  	flags, bool sideL
 */
 CV_EXPORTS_W void colorMatchTemplate(InputArray img, InputArray templ, OutputArray result);
 
+//! @}
+
 }
 }
 #endif
diff --git a/modules/ximgproc/include/opencv2/ximgproc/deriche_filter.hpp b/modules/ximgproc/include/opencv2/ximgproc/deriche_filter.hpp
index 26d3b6759da..18adade6f90 100644
--- a/modules/ximgproc/include/opencv2/ximgproc/deriche_filter.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/deriche_filter.hpp
@@ -71,6 +71,8 @@ CV_EXPORTS_W void GradientDericheY(InputArray op, OutputArray dst, double alpha,
 */
 CV_EXPORTS_W void GradientDericheX(InputArray op, OutputArray dst, double alpha,double omega);
 
+//! @}
+
 }
 }
 #endif
diff --git a/modules/ximgproc/include/opencv2/ximgproc/edgepreserving_filter.hpp b/modules/ximgproc/include/opencv2/ximgproc/edgepreserving_filter.hpp
index f5685ce39bb..758b61b4349 100644
--- a/modules/ximgproc/include/opencv2/ximgproc/edgepreserving_filter.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/edgepreserving_filter.hpp
@@ -26,8 +26,8 @@ namespace cv { namespace ximgproc {
     */
     CV_EXPORTS_W void edgePreservingFilter( InputArray src, OutputArray dst, int d, double threshold );
 
-}} // namespace
-
 //! @}
 
+}} // namespace
+
 #endif
diff --git a/modules/ximgproc/include/opencv2/ximgproc/fast_hough_transform.hpp b/modules/ximgproc/include/opencv2/ximgproc/fast_hough_transform.hpp
index adfbf543b57..94668b06520 100644
--- a/modules/ximgproc/include/opencv2/ximgproc/fast_hough_transform.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/fast_hough_transform.hpp
@@ -82,8 +82,7 @@ enum AngleRangeOption
  *          two operands. Formally, a binary operation @f$ f @f$ on a set @f$ S @f$
  *          is a binary relation that maps elements of the Cartesian product
  *          @f$ S \times S @f$ to @f$ S @f$:
-*           @f[ f: S \times S \to S @f]
- * @ingroup MinUtils_MathOper
+ *          @f[ f: S \times S \to S @f]
  */
 enum HoughOp
 {
diff --git a/modules/ximgproc/include/opencv2/ximgproc/paillou_filter.hpp b/modules/ximgproc/include/opencv2/ximgproc/paillou_filter.hpp
index 03754a1119d..56fcd3c9618 100644
--- a/modules/ximgproc/include/opencv2/ximgproc/paillou_filter.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/paillou_filter.hpp
@@ -61,6 +61,8 @@ namespace ximgproc {
 CV_EXPORTS void GradientPaillouY(InputArray op, OutputArray _dst, double alpha, double omega);
 CV_EXPORTS void GradientPaillouX(InputArray op, OutputArray _dst, double alpha, double omega);
 
+//! @}
+
 }
 }
 #endif
diff --git a/modules/ximgproc/include/opencv2/ximgproc/peilin.hpp b/modules/ximgproc/include/opencv2/ximgproc/peilin.hpp
index 1b224aaf88b..194f12e1196 100644
--- a/modules/ximgproc/include/opencv2/ximgproc/peilin.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/peilin.hpp
@@ -27,6 +27,8 @@ namespace cv { namespace ximgproc {
     /** @overload */
     CV_EXPORTS_W void PeiLinNormalization ( InputArray I, OutputArray T );
 
+    //! @}
+
 }} // namespace
 
 #endif
diff --git a/modules/ximgproc/include/opencv2/ximgproc/run_length_morphology.hpp b/modules/ximgproc/include/opencv2/ximgproc/run_length_morphology.hpp
index c19e2d858db..6cf2eb663c1 100644
--- a/modules/ximgproc/include/opencv2/ximgproc/run_length_morphology.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/run_length_morphology.hpp
@@ -113,6 +113,8 @@ CV_EXPORTS void createRLEImage(const std::vector<cv::Point3i>& runs, OutputArray
 CV_EXPORTS void morphologyEx(InputArray rlSrc, OutputArray rlDest, int op, InputArray rlKernel,
     bool bBoundaryOnForErosion = true, Point anchor = Point(0,0));
 
+//! @}
+
 }
 }
 }

From 6b5142ff657ca676ab35233556b49a532e75e2b7 Mon Sep 17 00:00:00 2001
From: Alexander Panov <alexander.panov@xperience.ai>
Date: Thu, 7 Mar 2024 09:14:37 +0300
Subject: [PATCH 05/19] Merge pull request #3647 from
 AleksandrPanov:add_to_mcc_detect_and_infer_test

Add to mcc detect and infer test #3647

merge with https://github.com/opencv/opencv_extra/pull/1153

Added a full pipeline tests:

1. detector->process(img, (TYPECHART)0, 1, true);
2. ColorCorrectionModel model(src, COLORCHECKER_Macbeth); model.run();
3. calibratedImage = model.infer(calibratedImage)*255.;


### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 modules/mcc/src/checker_detector.cpp |  2 +-
 modules/mcc/test/test_mcc.cpp        | 96 ++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/modules/mcc/src/checker_detector.cpp b/modules/mcc/src/checker_detector.cpp
index dcff0a25f3b..7972bd0a7de 100644
--- a/modules/mcc/src/checker_detector.cpp
+++ b/modules/mcc/src/checker_detector.cpp
@@ -511,7 +511,7 @@ void CCheckerDetectorImpl::
     if (params->minImageSize > min_size)
     {
         aspOut = (float)params->minImageSize / min_size;
-        cv::resize(bgr, bgrOut, cv::Size(int(size.width * aspOut), int(size.height * aspOut)));
+        cv::resize(bgr, bgrOut, cv::Size(int(size.width * aspOut), int(size.height * aspOut)), INTER_LINEAR_EXACT);
     }
 
     // Convert to grayscale
diff --git a/modules/mcc/test/test_mcc.cpp b/modules/mcc/test/test_mcc.cpp
index 4aa8ae31302..374b829b4b2 100644
--- a/modules/mcc/test/test_mcc.cpp
+++ b/modules/mcc/test/test_mcc.cpp
@@ -81,5 +81,101 @@ TEST(CV_mccRunCCheckerDetectorBasic, accuracy_VINYL18)
     runCCheckerDetectorBasic("VINYL18.png", VINYL18);
 }
 
+TEST(CV_mcc_ccm_test, detect_Macbeth)
+{
+    string path = cvtest::findDataFile("mcc/mcc_ccm_test.jpg");
+    Mat img = imread(path, IMREAD_COLOR);
+    Ptr<CCheckerDetector> detector = CCheckerDetector::create();
+
+    // detect MCC24 board
+    ASSERT_TRUE(detector->process(img, MCC24, 1, false));
+
+    // read gold Macbeth corners
+    path = cvtest::findDataFile("mcc/mcc_ccm_test.yml");
+    FileStorage fs(path, FileStorage::READ);
+    ASSERT_TRUE(fs.isOpened());
+    FileNode node = fs["Macbeth_corners"];
+    ASSERT_FALSE(node.empty());
+    vector<Point2f> gold_corners;
+    node >> gold_corners;
+    Ptr<CChecker> checker = detector->getBestColorChecker();
+
+    // check Macbeth corners
+    vector<Point2f> corners = checker->getBox();
+    EXPECT_MAT_NEAR(gold_corners, corners, 3.6); // diff 3.57385 in ARM only
+
+    // read gold chartsRGB
+    node = fs["chartsRGB"];
+    Mat goldChartsRGB;
+    node >> goldChartsRGB;
+    fs.release();
+
+    // check chartsRGB
+    Mat chartsRGB = checker->getChartsRGB();
+    EXPECT_MAT_NEAR(goldChartsRGB.col(1), chartsRGB.col(1), 0.25); // diff 0.240634 in ARM only
+}
+
+TEST(CV_mcc_ccm_test, compute_ccm)
+{
+    // read gold chartsRGB
+    string path = cvtest::findDataFile("mcc/mcc_ccm_test.yml");
+    FileStorage fs(path, FileStorage::READ);
+    Mat chartsRGB;
+    FileNode node = fs["chartsRGB"];
+    node >> chartsRGB;
+
+    // compute CCM
+    ColorCorrectionModel model(chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255., COLORCHECKER_Macbeth);
+    model.run();
+
+    // read gold CCM
+    node = fs["ccm"];
+    ASSERT_FALSE(node.empty());
+    Mat gold_ccm;
+    node >> gold_ccm;
+    fs.release();
+
+    // check CCM
+    Mat ccm = model.getCCM();
+    EXPECT_MAT_NEAR(gold_ccm, ccm, 1e-8);
+
+    const double gold_loss = 4.6386569120323129;
+    // check loss
+    const double loss = model.getLoss();
+    EXPECT_NEAR(gold_loss, loss, 1e-8);
+}
+
+TEST(CV_mcc_ccm_test, infer)
+{
+    string path = cvtest::findDataFile("mcc/mcc_ccm_test.jpg");
+    Mat img = imread(path, IMREAD_COLOR);
+    // read gold calibrate img
+    path = cvtest::findDataFile("mcc/mcc_ccm_test_res.png");
+    Mat gold_img = imread(path);
+
+    // read gold chartsRGB
+    path = cvtest::findDataFile("mcc/mcc_ccm_test.yml");
+    FileStorage fs(path, FileStorage::READ);
+    Mat chartsRGB;
+    FileNode node = fs["chartsRGB"];
+    node >> chartsRGB;
+    fs.release();
+
+    // compute CCM
+    ColorCorrectionModel model(chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255., COLORCHECKER_Macbeth);
+    model.run();
+
+    // compute calibrate image
+    Mat calibratedImage;
+    cvtColor(img, calibratedImage, COLOR_BGR2RGB);
+    calibratedImage.convertTo(calibratedImage, CV_64F, 1. / 255.);
+    calibratedImage = model.infer(calibratedImage);
+    calibratedImage.convertTo(calibratedImage, CV_8UC3, 255.);
+    cvtColor(calibratedImage, calibratedImage, COLOR_RGB2BGR);
+    // check calibrated image
+    EXPECT_MAT_NEAR(gold_img, calibratedImage, 0.1);
+}
+
+
 } // namespace
 } // namespace opencv_test

From ca90d3ec382dc29c0b40bdc826aa9e4dfed2d875 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E4=BD=A9=E5=85=B6?= <liupeiqi@live.cn>
Date: Fri, 8 Mar 2024 14:00:42 +0800
Subject: [PATCH 06/19] faster thinning implement

---
 modules/ximgproc/src/thinning.cpp | 210 +++++++++++++++++++++++-------
 1 file changed, 160 insertions(+), 50 deletions(-)

diff --git a/modules/ximgproc/src/thinning.cpp b/modules/ximgproc/src/thinning.cpp
index b28784d2894..00017fe0acb 100644
--- a/modules/ximgproc/src/thinning.cpp
+++ b/modules/ximgproc/src/thinning.cpp
@@ -5,65 +5,175 @@ using namespace std;
 namespace cv {
 namespace ximgproc {
 
+// look up table - there is one entry for each of the 2^8=256 possible
+// combinations of 8 binary neighbors.
+static uint8_t lut_zhang_iter0[] = {
+    1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+    0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
+    1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
+    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
+    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1};
+
+static uint8_t lut_zhang_iter1[] = {
+    1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+    0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
+    0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
+    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
+    0, 1, 1, 1};
+
+static uint8_t lut_guo_iter0[] = {
+    1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+    0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
+    0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
+    0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
+    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1};
+
+static uint8_t lut_guo_iter1[] = {
+    1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
+    1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
+    1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
+    1, 1, 1, 1};
+
 // Applies a thinning iteration to a binary image
 static void thinningIteration(Mat img, int iter, int thinningType){
     Mat marker = Mat::zeros(img.size(), CV_8UC1);
+    int rows = img.rows;
+    int cols = img.cols;
 
     if(thinningType == THINNING_ZHANGSUEN){
-        for (int i = 1; i < img.rows-1; i++)
-        {
-            for (int j = 1; j < img.cols-1; j++)
-            {
-                uchar p2 = img.at<uchar>(i-1, j);
-                uchar p3 = img.at<uchar>(i-1, j+1);
-                uchar p4 = img.at<uchar>(i, j+1);
-                uchar p5 = img.at<uchar>(i+1, j+1);
-                uchar p6 = img.at<uchar>(i+1, j);
-                uchar p7 = img.at<uchar>(i+1, j-1);
-                uchar p8 = img.at<uchar>(i, j-1);
-                uchar p9 = img.at<uchar>(i-1, j-1);
-
-                int A  = (p2 == 0 && p3 == 1) + (p3 == 0 && p4 == 1) +
-                         (p4 == 0 && p5 == 1) + (p5 == 0 && p6 == 1) +
-                         (p6 == 0 && p7 == 1) + (p7 == 0 && p8 == 1) +
-                         (p8 == 0 && p9 == 1) + (p9 == 0 && p2 == 1);
-                int B  = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9;
-                int m1 = iter == 0 ? (p2 * p4 * p6) : (p2 * p4 * p8);
-                int m2 = iter == 0 ? (p4 * p6 * p8) : (p2 * p6 * p8);
-
-                if (A == 1 && (B >= 2 && B <= 6) && m1 == 0 && m2 == 0)
-                    marker.at<uchar>(i,j) = 1;
-            }
-        }
+        marker.forEach<uchar>([=](uchar& value, const int postion[]) {
+            int i = postion[0];
+            int j = postion[1];
+            if (i == 0 || j == 0 || i == rows - 1 || j == cols - 1)
+                return;
+
+            auto ptr = img.ptr(i, j); // p1
+
+            // p9 p2 p3
+            // p8 p1 p4
+            // p7 p6 p5
+            uchar p2 = ptr[-cols];
+            uchar p3 = ptr[-cols + 1];
+            uchar p4 = ptr[1];
+            uchar p5 = ptr[cols + 1];
+            uchar p6 = ptr[cols];
+            uchar p7 = ptr[cols - 1];
+            uchar p8 = ptr[-1];
+            uchar p9 = ptr[-cols - 1];
+
+            int neighbors = p9 | (p2 << 1) | (p3 << 2) | (p4 << 3) | (p5 << 4) | (p6 << 5) | (p7 << 6) | (p8 << 7);
+
+            if (iter == 0)
+                value = lut_zhang_iter0[neighbors];
+            else
+                value = lut_zhang_iter1[neighbors];
+
+            //int A  = (p2 == 0 && p3 == 1) + (p3 == 0 && p4 == 1) +
+            //         (p4 == 0 && p5 == 1) + (p5 == 0 && p6 == 1) +
+            //         (p6 == 0 && p7 == 1) + (p7 == 0 && p8 == 1) +
+            //         (p8 == 0 && p9 == 1) + (p9 == 0 && p2 == 1);
+            //int B  = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9;
+            //int m1 = iter == 0 ? (p2 * p4 * p6) : (p2 * p4 * p8);
+            //int m2 = iter == 0 ? (p4 * p6 * p8) : (p2 * p6 * p8);
+            //if (A == 1 && (B >= 2 && B <= 6) && m1 == 0 && m2 == 0) value = 0;
+        });
     }
     if(thinningType == THINNING_GUOHALL){
-        for (int i = 1; i < img.rows-1; i++)
-        {
-            for (int j = 1; j < img.cols-1; j++)
-            {
-                uchar p2 = img.at<uchar>(i-1, j);
-                uchar p3 = img.at<uchar>(i-1, j+1);
-                uchar p4 = img.at<uchar>(i, j+1);
-                uchar p5 = img.at<uchar>(i+1, j+1);
-                uchar p6 = img.at<uchar>(i+1, j);
-                uchar p7 = img.at<uchar>(i+1, j-1);
-                uchar p8 = img.at<uchar>(i, j-1);
-                uchar p9 = img.at<uchar>(i-1, j-1);
-
-                int C  = ((!p2) & (p3 | p4)) + ((!p4) & (p5 | p6)) +
-                         ((!p6) & (p7 | p8)) + ((!p8) & (p9 | p2));
-                int N1 = (p9 | p2) + (p3 | p4) + (p5 | p6) + (p7 | p8);
-                int N2 = (p2 | p3) + (p4 | p5) + (p6 | p7) + (p8 | p9);
-                int N  = N1 < N2 ? N1 : N2;
-                int m  = iter == 0 ? ((p6 | p7 | (!p9)) & p8) : ((p2 | p3 | (!p5)) & p4);
-
-                if ((C == 1) && ((N >= 2) && ((N <= 3)) & (m == 0)))
-                    marker.at<uchar>(i,j) = 1;
-            }
-        }
+        marker.forEach<uchar>([=](uchar& value, const int postion[]) {
+            int i = postion[0];
+            int j = postion[1];
+            if (i == 0 || j == 0 || i == rows - 1 || j == cols - 1)
+                return;
+
+            auto ptr = img.ptr(i, j); // p1
+
+            // p9 p2 p3
+            // p8 p1 p4
+            // p7 p6 p5
+            uchar p2 = ptr[-cols];
+            uchar p3 = ptr[-cols + 1];
+            uchar p4 = ptr[1];
+            uchar p5 = ptr[cols + 1];
+            uchar p6 = ptr[cols];
+            uchar p7 = ptr[cols - 1];
+            uchar p8 = ptr[-1];
+            uchar p9 = ptr[-cols - 1];
+
+            int neighbors = p9 | (p2 << 1) | (p3 << 2) | (p4 << 3) | (p5 << 4) | (p6 << 5) | (p7 << 6) | (p8 << 7);
+
+            if (iter == 0)
+                value = lut_guo_iter0[neighbors];
+            else
+                value = lut_guo_iter1[neighbors];
+
+            //int C  = ((!p2) & (p3 | p4)) + ((!p4) & (p5 | p6)) +
+            //         ((!p6) & (p7 | p8)) + ((!p8) & (p9 | p2));
+            //int N1 = (p9 | p2) + (p3 | p4) + (p5 | p6) + (p7 | p8);
+            //int N2 = (p2 | p3) + (p4 | p5) + (p6 | p7) + (p8 | p9);
+            //int N  = N1 < N2 ? N1 : N2;
+            //int m  = iter == 0 ? ((p6 | p7 | (!p9)) & p8) : ((p2 | p3 | (!p5)) & p4);
+            //if ((C == 1) && ((N >= 2) && ((N <= 3)) & (m == 0))) value = 0;
+        });
     }
 
-    img &= ~marker;
+    img &= marker;
 }
 
 // Apply the thinning procedure to a given image

From b9a99311d2af19a28e2f88ccc5cefbf1e1a67670 Mon Sep 17 00:00:00 2001
From: Alex <sasha1_1997@mail.ru>
Date: Fri, 15 Mar 2024 10:29:23 +0300
Subject: [PATCH 07/19] remove false from findDataFile

---
 modules/aruco/test/test_aruco_tutorial.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/modules/aruco/test/test_aruco_tutorial.cpp b/modules/aruco/test/test_aruco_tutorial.cpp
index 831a0099698..e59dd02a1d2 100644
--- a/modules/aruco/test/test_aruco_tutorial.cpp
+++ b/modules/aruco/test/test_aruco_tutorial.cpp
@@ -10,7 +10,7 @@ namespace opencv_test { namespace {
 
 TEST(CV_ArucoTutorial, can_find_singlemarkersoriginal)
 {
-    string img_path = cvtest::findDataFile("aruco/singlemarkersoriginal.jpg", false);
+    string img_path = cvtest::findDataFile("aruco/singlemarkersoriginal.jpg");
     Mat image = imread(img_path);
     aruco::ArucoDetector detector(aruco::getPredefinedDictionary(aruco::DICT_6X6_250));
 
@@ -44,9 +44,9 @@ TEST(CV_ArucoTutorial, can_find_singlemarkersoriginal)
 
 TEST(CV_ArucoTutorial, can_find_gboriginal)
 {
-    string imgPath = cvtest::findDataFile("aruco/gboriginal.jpg", false);
+    string imgPath = cvtest::findDataFile("aruco/gboriginal.jpg");
     Mat image = imread(imgPath);
-    string dictPath = cvtest::findDataFile("aruco/tutorial_dict.yml", false);
+    string dictPath = cvtest::findDataFile("aruco/tutorial_dict.yml");
     aruco::Dictionary dictionary;
 
     FileStorage fs(dictPath, FileStorage::READ);
@@ -99,7 +99,7 @@ TEST(CV_ArucoTutorial, can_find_gboriginal)
 
 TEST(CV_ArucoTutorial, can_find_choriginal)
 {
-    string imgPath = cvtest::findDataFile("aruco/choriginal.jpg", false);
+    string imgPath = cvtest::findDataFile("aruco/choriginal.jpg");
     Mat image = imread(imgPath);
     aruco::ArucoDetector detector(aruco::getPredefinedDictionary(aruco::DICT_6X6_250));
 
@@ -138,7 +138,7 @@ TEST(CV_ArucoTutorial, can_find_choriginal)
 
 TEST(CV_ArucoTutorial, can_find_chocclusion)
 {
-    string imgPath = cvtest::findDataFile("aruco/chocclusion_original.jpg", false);
+    string imgPath = cvtest::findDataFile("aruco/chocclusion_original.jpg");
     Mat image = imread(imgPath);
     aruco::ArucoDetector detector(aruco::getPredefinedDictionary(aruco::DICT_6X6_250));
 
@@ -176,15 +176,15 @@ TEST(CV_ArucoTutorial, can_find_chocclusion)
 
 TEST(CV_ArucoTutorial, can_find_diamondmarkers)
 {
-    string imgPath = cvtest::findDataFile("aruco/diamondmarkers.jpg", false);
+    string imgPath = cvtest::findDataFile("aruco/diamondmarkers.jpg");
     Mat image = imread(imgPath);
 
-    string dictPath = cvtest::findDataFile("aruco/tutorial_dict.yml", false);
+    string dictPath = cvtest::findDataFile("aruco/tutorial_dict.yml");
     aruco::Dictionary dictionary;
     FileStorage fs(dictPath, FileStorage::READ);
     dictionary.aruco::Dictionary::readDictionary(fs.root()); // set marker from tutorial_dict.yml
 
-    string detectorPath = cvtest::findDataFile("aruco/detector_params.yml", false);
+    string detectorPath = cvtest::findDataFile("aruco/detector_params.yml");
     fs = FileStorage(detectorPath, FileStorage::READ);
     aruco::DetectorParameters detectorParams;
     detectorParams.readDetectorParameters(fs.root());

From d75a1c39efdbe529a7bab1327d3e0aca8f1017d4 Mon Sep 17 00:00:00 2001
From: Evgeny Latkin <e.latkin@yadro.com>
Date: Fri, 15 Mar 2024 20:10:39 +0700
Subject: [PATCH 08/19] Two simple fixes to We Char QR Code module: -
 decodemgr.cpp: fix messed: width <--> height - unicomblock.cpp: fix wrong
 type at sizeof(...)

---
 modules/wechat_qrcode/src/decodermgr.cpp               | 4 ++--
 modules/wechat_qrcode/src/zxing/common/unicomblock.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/wechat_qrcode/src/decodermgr.cpp b/modules/wechat_qrcode/src/decodermgr.cpp
index 1e93aa15c0c..5083cc930bb 100644
--- a/modules/wechat_qrcode/src/decodermgr.cpp
+++ b/modules/wechat_qrcode/src/decodermgr.cpp
@@ -33,7 +33,7 @@ int DecoderMgr::decodeImage(cv::Mat src, bool use_nn_detector, vector<string>& r
     decode_hints_.setUseNNDetector(use_nn_detector);
 
     Ref<ImgSource> source;
-    qbarUicomBlock_ = new UnicomBlock(width, height);
+    qbarUicomBlock_ = new UnicomBlock(height, width);
 
     // Four Binarizers
     int tryBinarizeTime = 4;
@@ -89,4 +89,4 @@ vector<Ref<Result>> DecoderMgr::Decode(Ref<BinaryBitmap> image, DecodeHints hint
     return reader_->decode(image, hints);
 }
 }  // namespace wechat_qrcode
-}  // namespace cv
\ No newline at end of file
+}  // namespace cv
diff --git a/modules/wechat_qrcode/src/zxing/common/unicomblock.cpp b/modules/wechat_qrcode/src/zxing/common/unicomblock.cpp
index 652458e91c8..a7180d41116 100644
--- a/modules/wechat_qrcode/src/zxing/common/unicomblock.cpp
+++ b/modules/wechat_qrcode/src/zxing/common/unicomblock.cpp
@@ -26,7 +26,7 @@ void UnicomBlock::Init() {
 
 void UnicomBlock::Reset(Ref<BitMatrix> poImage) {
     m_poImage = poImage;
-    memset(&m_vcIndex[0], 0, m_vcIndex.size() * sizeof(short));
+    memset(&m_vcIndex[0], 0, m_vcIndex.size() * sizeof(m_vcIndex[0]));
     m_iNowIdx = 0;
 }
 

From 3157cc16d26e6fe4f76af221edc6ee6fac2c6d28 Mon Sep 17 00:00:00 2001
From: Evgeny Latkin <e.latkin@yadro.com>
Date: Tue, 19 Mar 2024 10:08:29 +0700
Subject: [PATCH 09/19] Exclude "2_qrcodes.png" for `opencv_perf_wechat_qrcode`
 test, as this image appears too difficult, so that test fails on it

---
 modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp b/modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp
index fe00e0168b1..e074a9cb8f8 100644
--- a/modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp
+++ b/modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp
@@ -17,7 +17,8 @@ std::string qrcode_images_name[] = {
     "version_5_down.jpg", "version_5_left.jpg", "version_5_up.jpg", "version_5_top.jpg",
     "russian.jpg", "kanji.jpg", "link_wiki_cv.jpg"};
 
-std::string qrcode_images_multiple[] = {"2_qrcodes.png", "3_qrcodes.png", "3_close_qrcodes.png",
+// NB: exclude "2_qrcodes.png" as this image appears too difficult, so that this test fails on it
+std::string qrcode_images_multiple[] = {/*"2_qrcodes.png",*/ "3_qrcodes.png", "3_close_qrcodes.png",
                                         "4_qrcodes.png", "5_qrcodes.png", "7_qrcodes.png"};
 
 WeChatQRCode createQRDetectorWithDNN(std::string& model_path)

From 35f0b6ecc39ed985033076b87e93c9a1cc8b8a1e Mon Sep 17 00:00:00 2001
From: Evgeny Latkin <e.latkin@yadro.com>
Date: Wed, 20 Mar 2024 11:01:25 +0700
Subject: [PATCH 10/19] Fix data corruption in WeChatQRCode::impl::decode (it
 may lead to incorrect results if multiple QR found at image)

---
 modules/wechat_qrcode/src/wechat_qrcode.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/modules/wechat_qrcode/src/wechat_qrcode.cpp b/modules/wechat_qrcode/src/wechat_qrcode.cpp
index f4bec7c2b36..637827a7f5d 100644
--- a/modules/wechat_qrcode/src/wechat_qrcode.cpp
+++ b/modules/wechat_qrcode/src/wechat_qrcode.cpp
@@ -155,9 +155,11 @@ vector<string> WeChatQRCode::Impl::decode(const Mat& img, vector<Mat>& candidate
 
                     if (use_nn_detector_)
                         points_qr = aligner.warpBack(points_qr);
+
+                    auto point_to_save = Mat(4, 2, CV_32FC1);
                     for (int j = 0; j < 4; ++j) {
-                        point.at<float>(j, 0) = points_qr[j].x;
-                        point.at<float>(j, 1) = points_qr[j].y;
+                        point_to_save.at<float>(j, 0) = points_qr[j].x;
+                        point_to_save.at<float>(j, 1) = points_qr[j].y;
                     }
                     // try to find duplicate qr corners
                     bool isDuplicate = false;
@@ -175,7 +177,7 @@ vector<string> WeChatQRCode::Impl::decode(const Mat& img, vector<Mat>& candidate
                         }
                     }
                     if (isDuplicate == false) {
-                        points.push_back(point);
+                        points.push_back(point_to_save);
                         check_points.push_back(points_qr);
                     }
                     else {
@@ -244,4 +246,4 @@ vector<float> WeChatQRCode::Impl::getScaleList(const int width, const int height
     return {0.5, 1.0};
 }
 }  // namespace wechat_qrcode
-}  // namespace cv
\ No newline at end of file
+}  // namespace cv

From 270d2aa5fdaea528bde63a8c8df61ce837bcbfe1 Mon Sep 17 00:00:00 2001
From: Maksim Shabunin <maksim.shabunin@gmail.com>
Date: Wed, 20 Mar 2024 22:47:06 +0300
Subject: [PATCH 11/19] wechat: fix uninitialized values in tests

---
 .../src/zxing/qrcode/detector/finder_pattern_finder.cpp   | 8 ++++----
 modules/wechat_qrcode/test/test_qrcode.cpp                | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/wechat_qrcode/src/zxing/qrcode/detector/finder_pattern_finder.cpp b/modules/wechat_qrcode/src/zxing/qrcode/detector/finder_pattern_finder.cpp
index 438928c093d..93017a7f515 100644
--- a/modules/wechat_qrcode/src/zxing/qrcode/detector/finder_pattern_finder.cpp
+++ b/modules/wechat_qrcode/src/zxing/qrcode/detector/finder_pattern_finder.cpp
@@ -1066,8 +1066,6 @@ bool FinderPatternFinder::handlePossibleCenter(int* stateCount, size_t i, size_t
     }
     float estimatedHorizontalModuleSize = (float)stateCountTotal / 7.0f;
 
-    float estimatedVerticalModuleSize;
-
     // try different size according to the estimatedHorizontalModuleSize
     float tolerateModuleSize =
         estimatedHorizontalModuleSize > 4.0 ? estimatedHorizontalModuleSize / 2.0f : 1.0f;
@@ -1082,7 +1080,9 @@ bool FinderPatternFinder::handlePossibleCenter(int* stateCount, size_t i, size_t
     int image_width = image_->getWidth();
     for (int k = 0; k < CENTER_CHECK_TIME; k++) {
         float possibleCenterJ = possbileCenterJs[k];
-        if (possibleCenterJ < 0 || possibleCenterJ >= image_width) continue;
+        if (possibleCenterJ < 0 || possibleCenterJ >= image_width)
+            continue;
+        float estimatedVerticalModuleSize = 0;
         float centerI = crossCheckVertical(i, (size_t)possibleCenterJ, stateCount[2],
                                            stateCountTotal, estimatedVerticalModuleSize);
 
@@ -1505,4 +1505,4 @@ Ref<BitMatrix> FinderPatternFinder::getImage() { return image_; }
 vector<Ref<FinderPattern>>& FinderPatternFinder::getPossibleCenters() { return possibleCenters_; }
 
 }  // namespace qrcode
-}  // namespace zxing
\ No newline at end of file
+}  // namespace zxing
diff --git a/modules/wechat_qrcode/test/test_qrcode.cpp b/modules/wechat_qrcode/test/test_qrcode.cpp
index ec2559b0e71..7820329de92 100644
--- a/modules/wechat_qrcode/test/test_qrcode.cpp
+++ b/modules/wechat_qrcode/test/test_qrcode.cpp
@@ -303,7 +303,7 @@ TEST(Objdetect_QRCode_points_position, rotate45) {
     Ptr<QRCodeEncoder> qrcode_enc = cv::QRCodeEncoder::create(params);
     Mat qrImage;
     qrcode_enc->encode(expect_msg, qrImage);
-    Mat image(800, 800, CV_8UC1);
+    Mat image(800, 800, CV_8UC1, Scalar(0));
     const int pixInBlob = 4;
     Size qrSize = Size((21+(params.version-1)*4)*pixInBlob,(21+(params.version-1)*4)*pixInBlob);
     Rect2f rec(static_cast<float>((image.cols - qrSize.width)/2),
@@ -364,7 +364,7 @@ TEST(Objdetect_QRCode_Big, regression) {
     Ptr<QRCodeEncoder> qrcode_enc = cv::QRCodeEncoder::create(params);
     Mat qrImage;
     qrcode_enc->encode(expect_msg, qrImage);
-    Mat largeImage(4032, 3024, CV_8UC1);
+    Mat largeImage(4032, 3024, CV_8UC1, Scalar(0));
     const int pixInBlob = 4;
     Size qrSize = Size((21+(params.version-1)*4)*pixInBlob,(21+(params.version-1)*4)*pixInBlob);
     Mat roiImage = largeImage(Rect((largeImage.cols - qrSize.width)/2, (largeImage.rows - qrSize.height)/2,
@@ -395,7 +395,7 @@ TEST(Objdetect_QRCode_Tiny, regression) {
     Ptr<QRCodeEncoder> qrcode_enc = cv::QRCodeEncoder::create(params);
     Mat qrImage;
     qrcode_enc->encode(expect_msg, qrImage);
-    Mat tinyImage(80, 80, CV_8UC1);
+    Mat tinyImage(80, 80, CV_8UC1, Scalar(0));
     const int pixInBlob = 2;
     Size qrSize = Size((21+(params.version-1)*4)*pixInBlob,(21+(params.version-1)*4)*pixInBlob);
     Mat roiImage = tinyImage(Rect((tinyImage.cols - qrSize.width)/2, (tinyImage.rows - qrSize.height)/2,

From db093afc1fbc7e0c159bf9915c69c9284b273ac1 Mon Sep 17 00:00:00 2001
From: Evgeny Latkin <e.latkin@yadro.com>
Date: Thu, 21 Mar 2024 20:29:05 +0700
Subject: [PATCH 12/19] Add `const` qualifier to `candidate_points` at
 WeChatQRCode::impl::decode

---
 modules/wechat_qrcode/src/wechat_qrcode.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/modules/wechat_qrcode/src/wechat_qrcode.cpp b/modules/wechat_qrcode/src/wechat_qrcode.cpp
index 637827a7f5d..64aad73610b 100644
--- a/modules/wechat_qrcode/src/wechat_qrcode.cpp
+++ b/modules/wechat_qrcode/src/wechat_qrcode.cpp
@@ -35,7 +35,8 @@ class WeChatQRCode::Impl {
      * @param points succussfully decoded qrcode with bounding box points.
      * @return vector<string>
      */
-    std::vector<std::string> decode(const Mat& img, std::vector<Mat>& candidate_points,
+    std::vector<std::string> decode(const Mat& img,
+                                    const std::vector<Mat>& candidate_points,
                                     std::vector<Mat>& points);
     int applyDetector(const Mat& img, std::vector<Mat>& points);
     Mat cropObj(const Mat& img, const Mat& point, Align& aligner);
@@ -123,13 +124,14 @@ float WeChatQRCode::getScaleFactor() {
     return p->scaleFactor;
 };
 
-vector<string> WeChatQRCode::Impl::decode(const Mat& img, vector<Mat>& candidate_points,
+vector<string> WeChatQRCode::Impl::decode(const Mat& img,
+                                          const vector<Mat>& candidate_points,
                                           vector<Mat>& points) {
     if (candidate_points.size() == 0) {
         return vector<string>();
     }
     vector<string> decode_results;
-    for (auto& point : candidate_points) {
+    for (const auto& point : candidate_points) {
         Mat cropped_img;
         Align aligner;
         if (use_nn_detector_) {

From 56869b76a385c5293bf093f9b8faa50969b0fd7f Mon Sep 17 00:00:00 2001
From: Yuantao Feng <yuantao.feng@opencv.org.cn>
Date: Fri, 22 Mar 2024 04:45:00 +0800
Subject: [PATCH 13/19] rename cv::float16_t to cv::hfloat (#3697)

* rename cv::float16_t to cv::fp16_t

* fp16_t -> hfloat
---
 modules/cudev/test/test_nd.cu | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/cudev/test/test_nd.cu b/modules/cudev/test/test_nd.cu
index 2fc3f396b95..b3d8bc0b86f 100644
--- a/modules/cudev/test/test_nd.cu
+++ b/modules/cudev/test/test_nd.cu
@@ -101,7 +101,7 @@ public:
 
     static void doTest3(const SizeArray& size)
     {
-        if (std::is_same<CnType, float16_t>::value) // GpuMat::convertTo is not implemented for CV_16F
+        if (std::is_same<CnType, hfloat>::value) // GpuMat::convertTo is not implemented for CV_16F
             return;
 
         const MatType gold = RandomMat(size);
@@ -140,7 +140,7 @@ public:
 
     static void doTest4(const SizeArray& size)
     {
-        if (std::is_same<CnType, float16_t>::value) // GpuMat::convertTo is not implemented for CV_16F
+        if (std::is_same<CnType, hfloat>::value) // GpuMat::convertTo is not implemented for CV_16F
             return;
 
         const MatType gold = RandomMat(size);
@@ -169,7 +169,7 @@ public:
 
     static void doTest5(const SizeArray& size)
     {
-        if (std::is_same<CnType, float16_t>::value) // GpuMat::convertTo is not implemented for CV_16F
+        if (std::is_same<CnType, hfloat>::value) // GpuMat::convertTo is not implemented for CV_16F
             return;
 
         const MatType gold = RandomMat(size);
@@ -204,7 +204,7 @@ using ElemTypes = ::testing::Types<
     Vec<int, 1>, Vec<int, 2>, Vec<int, 3>, Vec<int, 4>, // CV_32S
     Vec<float, 1>, Vec<float, 2>, Vec<float, 3>, Vec<float, 4>, // CV_32F
     Vec<double, 1>, Vec<double, 2>, Vec<double, 3>, Vec<double, 4>, //CV_64F
-    Vec<float16_t, 1>, Vec<float16_t, 2>, Vec<float16_t, 3>, Vec<float16_t, 4> // CV_16F
+    Vec<hfloat, 1>, Vec<hfloat, 2>, Vec<hfloat, 3>, Vec<hfloat, 4> // CV_16F
 >;
 
 using SizeArray = GpuMatND::SizeArray;

From 45f560b027403fa5e6b253076b5e01e330390bc4 Mon Sep 17 00:00:00 2001
From: Alex <sasha1_1997@mail.ru>
Date: Fri, 22 Mar 2024 10:59:29 +0300
Subject: [PATCH 14/19] added getColorCharts()

---
 .../mcc/include/opencv2/mcc/checker_model.hpp |  9 ++
 modules/mcc/src/checker_detector.cpp          | 25 ------
 modules/mcc/src/checker_detector.hpp          |  5 --
 modules/mcc/src/checker_model.cpp             | 87 ++++++++++---------
 modules/mcc/src/checker_model.hpp             | 10 +--
 5 files changed, 57 insertions(+), 79 deletions(-)

diff --git a/modules/mcc/include/opencv2/mcc/checker_model.hpp b/modules/mcc/include/opencv2/mcc/checker_model.hpp
index 0768c691e05..5552ea4030b 100644
--- a/modules/mcc/include/opencv2/mcc/checker_model.hpp
+++ b/modules/mcc/include/opencv2/mcc/checker_model.hpp
@@ -89,6 +89,15 @@ class CV_EXPORTS_W CChecker
 
     CV_WRAP virtual TYPECHART getTarget() = 0;
     CV_WRAP virtual std::vector<Point2f> getBox() = 0;
+
+    /** @brief Computes and returns the coordinates of the central parts of the charts modules.
+     *
+     * This method computes transformation matrix from the checkers's coordinates (`cv::mcc::CChecker::getBox()`)
+     * and find by this the coordinates of the central parts of the charts modules.
+     * It is used in `cv::mcc::CCheckerDraw::draw()` and in `ChartsRGB` calculation.
+     */
+    CV_WRAP virtual std::vector<Point2f> getColorCharts() = 0;
+
     CV_WRAP virtual Mat getChartsRGB() = 0;
     CV_WRAP virtual Mat getChartsYCbCr() = 0;
     CV_WRAP virtual float getCost() = 0;
diff --git a/modules/mcc/src/checker_detector.cpp b/modules/mcc/src/checker_detector.cpp
index 7972bd0a7de..31ea62f3358 100644
--- a/modules/mcc/src/checker_detector.cpp
+++ b/modules/mcc/src/checker_detector.cpp
@@ -1176,31 +1176,6 @@ void CCheckerDetectorImpl::
         x_new.insert(x_new.begin() + idx + 1, (x_new[idx] + x_new[idx + 1]) / 2);
 }
 
-void CCheckerDetectorImpl::
-    transform_points_forward(InputArray T, const std::vector<cv::Point2f> &X, std::vector<cv::Point2f> &Xt)
-{
-    size_t N = X.size();
-    if (N == 0)
-        return;
-
-    Xt.clear();
-    Xt.resize(N);
-    cv::Matx31f p, xt;
-    cv::Point2f pt;
-
-    cv::Matx33f _T = T.getMat();
-    for (int i = 0; i < (int)N; i++)
-    {
-        p(0, 0) = X[i].x;
-        p(1, 0) = X[i].y;
-        p(2, 0) = 1;
-        xt = _T * p;
-        pt.x = xt(0, 0) / xt(2, 0);
-        pt.y = xt(1, 0) / xt(2, 0);
-        Xt[i] = pt;
-    }
-}
-
 void CCheckerDetectorImpl::
     transform_points_inverse(InputArray T, const std::vector<cv::Point2f> &X, std::vector<cv::Point2f> &Xt)
 {
diff --git a/modules/mcc/src/checker_detector.hpp b/modules/mcc/src/checker_detector.hpp
index 75b1644a51b..4c922b5d1d4 100644
--- a/modules/mcc/src/checker_detector.hpp
+++ b/modules/mcc/src/checker_detector.hpp
@@ -171,11 +171,6 @@ class CCheckerDetectorImpl : public CCheckerDetector
         std::vector<float> &x_new,
         float tol);
 
-    void transform_points_forward(
-        InputArray T,
-        const std::vector<cv::Point2f> &X,
-        std::vector<cv::Point2f> &Xt);
-
     void transform_points_inverse(
         InputArray T,
         const std::vector<cv::Point2f> &X,
diff --git a/modules/mcc/src/checker_model.cpp b/modules/mcc/src/checker_model.cpp
index 2062e7705e5..310ed8baf96 100644
--- a/modules/mcc/src/checker_model.cpp
+++ b/modules/mcc/src/checker_model.cpp
@@ -411,6 +411,39 @@ std::vector<Point2f> CCheckerImpl::getBox()
 {
     return box;
 }
+std::vector<Point2f> CCheckerImpl::getColorCharts()
+{
+    // color chart classic model
+    CChartModel cccm(getTarget());
+    Mat lab;
+    size_t N;
+    std::vector<Point2f> fbox = cccm.box;
+    std::vector<Point2f> cellchart = cccm.cellchart;
+    std::vector<Point2f> charts(cellchart.size());
+
+    // tranformation
+    Matx33f ccT = getPerspectiveTransform(fbox, getBox());
+
+    std::vector<Point2f> bch(4), bcht(4);
+    N = cellchart.size() / 4;
+    for (size_t i = 0, k; i < N; i++)
+    {
+        k = 4 * i;
+        for (size_t j = 0ull; j < 4ull; j++)
+            bch[j] = cellchart[k + j];
+
+        polyanticlockwise(bch);
+        transform_points_forward(ccT, bch, bcht);
+
+        Point2f c(0, 0);
+        for (size_t j = 0; j < 4; j++)
+            c += bcht[j];
+        c /= 4;
+        for (size_t j = 0ull; j < 4ull; j++)
+            charts[k+j] = ((bcht[j] - c) * 0.50) + c;
+    }
+    return charts;
+}
 Mat CCheckerImpl::getChartsRGB()
 {
     return chartsRGB;
@@ -435,70 +468,40 @@ Ptr<CCheckerDraw> CCheckerDraw::create(Ptr<CChecker> pChecker, cv::Scalar color
     return makePtr<CCheckerDrawImpl>(pChecker, color, thickness);
 }
 
-void CCheckerDrawImpl::
-    draw(InputOutputArray img)
+void CCheckerDrawImpl::draw(InputOutputArray img)
 {
-
-    // color chart classic model
-    CChartModel cccm(m_pChecker->getTarget());
-    cv::Mat lab;
-    size_t N;
-    std::vector<cv::Point2f> fbox = cccm.box;
-    std::vector<cv::Point2f> cellchart = cccm.cellchart;
-
-    // tranformation
-    cv::Matx33f ccT = cv::getPerspectiveTransform(fbox, m_pChecker->getBox());
-
-    std::vector<cv::Point2f> bch(4), bcht(4);
-    N = cellchart.size() / 4;
+    std::vector<Point2f> charts = m_pChecker->getColorCharts();
+    size_t N = charts.size() / 4;
     for (size_t i = 0, k; i < N; i++)
     {
         k = 4 * i;
-        bch[0] = cellchart[k + 0];
-        bch[1] = cellchart[k + 1];
-        bch[2] = cellchart[k + 2];
-        bch[3] = cellchart[k + 3];
-
-        polyanticlockwise(bch);
-        transform_points_forward(ccT, bch, bcht);
-
-        cv::Point2f c(0, 0);
         for (size_t j = 0; j < 4; j++)
-            c += bcht[j];
-        c /= 4;
-        for (size_t j = 0; j < 4; j++)
-            bcht[j] = ((bcht[j] - c) * 0.50) + c;
-
-        cv::line(img, bcht[0], bcht[1], m_color, m_thickness, LINE_AA);
-        cv::line(img, bcht[1], bcht[2], m_color, m_thickness, LINE_AA);
-        cv::line(img, bcht[2], bcht[3], m_color, m_thickness, LINE_AA);
-        cv::line(img, bcht[3], bcht[0], m_color, m_thickness, LINE_AA);
+            cv::line(img, charts[k+j], charts[k+((j + 1) % 4)], m_color, m_thickness, LINE_AA);
     }
 }
 
-void CCheckerDrawImpl::
-    transform_points_forward(InputArray T, const std::vector<cv::Point2f> &X, std::vector<cv::Point2f> &Xt)
+void transform_points_forward(const Matx33f& T, const std::vector<Point2f> &X, std::vector<Point2f> &Xt)
 {
-
-    cv::Matx33f _T = T.getMat();
     size_t N = X.size();
-    Xt.clear();
-    Xt.resize(N);
+    if (Xt.size() != N)
+        Xt.resize(N);
+    std::fill(Xt.begin(), Xt.end(), Point2f(0.f, 0.f));
     if (N == 0)
         return;
 
-    cv::Matx31f p, xt;
-    cv::Point2f pt;
+    Matx31f p, xt;
+    Point2f pt;
     for (size_t i = 0; i < N; i++)
     {
         p(0, 0) = X[i].x;
         p(1, 0) = X[i].y;
         p(2, 0) = 1;
-        xt = _T * p;
+        xt = T * p;
         pt.x = xt(0, 0) / xt(2, 0);
         pt.y = xt(1, 0) / xt(2, 0);
         Xt[i] = pt;
     }
 }
+
 } // namespace mcc
 } // namespace cv
diff --git a/modules/mcc/src/checker_model.hpp b/modules/mcc/src/checker_model.hpp
index 31b85a5a144..4f116a8bdf5 100644
--- a/modules/mcc/src/checker_model.hpp
+++ b/modules/mcc/src/checker_model.hpp
@@ -137,6 +137,7 @@ class CCheckerImpl : public CChecker
 
     TYPECHART getTarget() CV_OVERRIDE;
     std::vector<Point2f> getBox() CV_OVERRIDE;
+    std::vector<Point2f> getColorCharts() CV_OVERRIDE;
     Mat getChartsRGB() CV_OVERRIDE;
     Mat getChartsYCbCr() CV_OVERRIDE;
     float getCost() CV_OVERRIDE;
@@ -173,16 +174,11 @@ class CCheckerDrawImpl : public CCheckerDraw
     Ptr<CChecker> m_pChecker;
     cv::Scalar m_color;
     int m_thickness;
-
-private:
-    /** \brief transformation perspetive*/
-    void transform_points_forward(
-        InputArray T,
-        const std::vector<cv::Point2f> &X,
-        std::vector<cv::Point2f> &Xt);
 };
 // @}
 
+void transform_points_forward(const Matx33f& T, const std::vector<Point2f> &X, std::vector<Point2f> &Xt);
+
 } // namespace mcc
 } // namespace cv
 

From 5300337197b7f580c96101953606e5f78890103c Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Tue, 26 Mar 2024 10:06:57 +0100
Subject: [PATCH 15/19] Merge pull request #3703 from vrabaud:cpp

Use proper C++ types. #3703

This is necessary to get https://github.com/opencv/opencv/pull/25248 working.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 modules/videostab/src/precomp.hpp              |  2 +-
 .../adaptive_threshold_mean_binarizer.cpp      |  4 ++--
 modules/xphoto/src/annf.hpp                    |  2 +-
 modules/xphoto/src/gcgraph.hpp                 |  2 +-
 modules/xphoto/src/inpainting.cpp              | 18 +++++++++---------
 modules/xphoto/src/norm2.hpp                   |  4 ++--
 modules/xphoto/src/oilpainting.cpp             | 14 +++++++-------
 modules/xphoto/src/photomontage.hpp            |  8 ++++----
 8 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/modules/videostab/src/precomp.hpp b/modules/videostab/src/precomp.hpp
index aa6026deee5..a353d35b439 100644
--- a/modules/videostab/src/precomp.hpp
+++ b/modules/videostab/src/precomp.hpp
@@ -59,7 +59,7 @@
 
 inline float sqr(float x) { return x * x; }
 
-inline float intensity(const cv::Point3_<uchar> &bgr)
+inline float intensity(const cv::Point3_<uint8_t> &bgr)
 {
     return 0.3f*bgr.x + 0.59f*bgr.y + 0.11f*bgr.z;
 }
diff --git a/modules/wechat_qrcode/src/zxing/common/binarizer/adaptive_threshold_mean_binarizer.cpp b/modules/wechat_qrcode/src/zxing/common/binarizer/adaptive_threshold_mean_binarizer.cpp
index 38a79b378e1..3f879f47fc2 100644
--- a/modules/wechat_qrcode/src/zxing/common/binarizer/adaptive_threshold_mean_binarizer.cpp
+++ b/modules/wechat_qrcode/src/zxing/common/binarizer/adaptive_threshold_mean_binarizer.cpp
@@ -88,7 +88,7 @@ int AdaptiveThresholdMeanBinarizer::TransMatToBuffer(cv::Mat mSrc, unsigned char
         unsigned char* pdi = ppBuffer + j * nWidth;
         for (int z = 0; z < nWidth; ++z) {
             int nj = nHeight - j - 1;
-            int value = *(uchar*)(mSrc.ptr<uchar>(nj) + z);
+            int value = *(uint8_t*)(mSrc.ptr<uint8_t>(nj) + z);
             if (value > 120)
                 pdi[z] = 0;
             else
@@ -96,4 +96,4 @@ int AdaptiveThresholdMeanBinarizer::TransMatToBuffer(cv::Mat mSrc, unsigned char
         }
     }
     return 0;
-}
\ No newline at end of file
+}
diff --git a/modules/xphoto/src/annf.hpp b/modules/xphoto/src/annf.hpp
index 111469fe1d0..53a9793baae 100644
--- a/modules/xphoto/src/annf.hpp
+++ b/modules/xphoto/src/annf.hpp
@@ -263,7 +263,7 @@ static void dominantTransforms(const cv::Mat &img, std::vector <cv::Point2i> &tr
     cv::GaussianBlur( annfHist, annfHist,
         cv::Size(0, 0), std::sqrt(2.0), 0.0, cv::BORDER_CONSTANT);
     cv::dilate( annfHist, _annfHist,
-        cv::Matx<uchar, 9, 9>::ones() );
+        cv::Matx<uint8_t, 9, 9>::ones() );
 
     std::vector < std::pair<double, int> > amount;
     std::vector <cv::Point2i> shiftM;
diff --git a/modules/xphoto/src/gcgraph.hpp b/modules/xphoto/src/gcgraph.hpp
index 513e9c2d117..f30073a7735 100644
--- a/modules/xphoto/src/gcgraph.hpp
+++ b/modules/xphoto/src/gcgraph.hpp
@@ -187,7 +187,7 @@ TWeight GCGraph<TWeight>::maxFlow()
         Vtx* v, *u;
         int e0 = -1, ei = 0, ej = 0;
         TWeight minWeight, weight;
-        uchar vt;
+        uint8_t vt;
 
         // grow S & T search trees, find an edge connecting them
         while( first != nilNode )
diff --git a/modules/xphoto/src/inpainting.cpp b/modules/xphoto/src/inpainting.cpp
index d6a406ca623..8a9694c9539 100644
--- a/modules/xphoto/src/inpainting.cpp
+++ b/modules/xphoto/src/inpainting.cpp
@@ -99,7 +99,7 @@ namespace xphoto
 
         for (int i = 0; i < ddmask.rows; ++i)
         {
-            uchar *dmask_data = (uchar *) ddmask.template ptr<uchar>(i);
+            uint8_t *dmask_data = (uint8_t *) ddmask.template ptr<uint8_t>(i);
             int *backref_data = (int *) backref.template ptr< int >(i);
 
             for (int j = 0; j < ddmask.cols; ++j)
@@ -123,7 +123,7 @@ namespace xphoto
 
         for (size_t i = 0; i < pPath.size(); ++i)
         {
-            uchar xmask = dmask.template at<uchar>(pPath[i]);
+            uint8_t xmask = dmask.template at<uint8_t>(pPath[i]);
 
             for (int j = 0; j < nTransform + 1; ++j)
             {
@@ -136,7 +136,7 @@ namespace xphoto
                 &&   u.x < src.cols && u.x >= 0 )
                 {
                     if ( xmask == 0 || j == nTransform )
-                        vmask = mask.template at<uchar>(u);
+                        vmask = mask.template at<uint8_t>(u);
                     vimg = img.template at<cv::Vec<float, cn> >(u);
                 }
 
@@ -221,14 +221,14 @@ namespace xphoto
                                                };
 
                             std::vector <cv::Vec <float, cn> > pointVec;
-                                            std::vector <uchar> maskVec;
+                                            std::vector <uint8_t> maskVec;
 
                             for (uint q = 0; q < sizeof(dv)/sizeof(cv::Point2i); ++q)
                                 if (u.x + dv[q].x >= 0 && u.x + dv[q].x < img.cols
                                 &&  u.y + dv[q].y >= 0 && u.y + dv[q].y < img.rows)
                                 {
                                     pointVec.push_back(img.template at<cv::Vec <float, cn> >(u + dv[q]));
-                                    maskVec.push_back(_mask.template at<uchar>(u + dv[q]));
+                                    maskVec.push_back(_mask.template at<uint8_t>(u + dv[q]));
                                 }
                                 else
                                 {
@@ -325,16 +325,16 @@ namespace xphoto
                 inpaint <char,   4>( src, mask, dst, algorithmType );
                 break;
             case CV_8UC1:
-                inpaint <uchar,  1>( src, mask, dst, algorithmType );
+                inpaint <uint8_t,  1>( src, mask, dst, algorithmType );
                 break;
             case CV_8UC2:
-                inpaint <uchar,  2>( src, mask, dst, algorithmType );
+                inpaint <uint8_t,  2>( src, mask, dst, algorithmType );
                 break;
             case CV_8UC3:
-                inpaint <uchar,  3>( src, mask, dst, algorithmType );
+                inpaint <uint8_t,  3>( src, mask, dst, algorithmType );
                 break;
             case CV_8UC4:
-                inpaint <uchar,  4>( src, mask, dst, algorithmType );
+                inpaint <uint8_t,  4>( src, mask, dst, algorithmType );
                 break;
             case CV_16SC1:
                 inpaint <short,  1>( src, mask, dst, algorithmType );
diff --git a/modules/xphoto/src/norm2.hpp b/modules/xphoto/src/norm2.hpp
index 027e522da3a..205827d4650 100644
--- a/modules/xphoto/src/norm2.hpp
+++ b/modules/xphoto/src/norm2.hpp
@@ -60,7 +60,7 @@ template <class T> struct same_as<T, T> : ttype {};   // is_same
 
 template <typename _Tp> struct is_norm2_type :
     int_const<bool, !same_as<_Tp,   char>::value
-                 && !same_as<_Tp,  uchar>::value
+                 && !same_as<_Tp,  uint8_t>::value
                  && !same_as<_Tp, ushort>::value
                  && !same_as<_Tp,   uint>::value>{};
 
@@ -70,4 +70,4 @@ template <typename _Tp, int cn> static inline typename iftype< is_norm2_type<_Tp
 template <typename _Tp> static inline typename iftype< is_norm2_type<_Tp>::value, _Tp >::
     type norm2(const _Tp &a, const _Tp &b) { return (a - b)*(a - b); }
 
-#endif /* __NORM2_HPP__ */
\ No newline at end of file
+#endif /* __NORM2_HPP__ */
diff --git a/modules/xphoto/src/oilpainting.cpp b/modules/xphoto/src/oilpainting.cpp
index daeffd386a7..61e42177e81 100644
--- a/modules/xphoto/src/oilpainting.cpp
+++ b/modules/xphoto/src/oilpainting.cpp
@@ -18,9 +18,9 @@ public :
 };
 
 template<>
-uchar Vec3fTo<uchar>::extract()
+uint8_t Vec3fTo<uint8_t>::extract()
 {
-    return static_cast<uchar>(a[0]);
+    return static_cast<uint8_t>(a[0]);
 }
 
 template<>
@@ -30,7 +30,7 @@ cv::Vec3b Vec3fTo<cv::Vec3b>::extract()
 }
 
 template<>
-cv::Vec3f Vec3fTo<uchar>::make(int x)
+cv::Vec3f Vec3fTo<uint8_t>::make(int x)
 {
     return cv::Vec3f((a*x)/x);
 }
@@ -84,7 +84,7 @@ class ParallelOilPainting : public ParallelLoopBody
                         if (y + yy >= 0 && y + yy < imgSrc.rows)
                         {
                             Type *vPtr = imgSrc.ptr<Type>(y + yy) + x - 0;
-                            uchar *uc = imgLuminance.ptr(y + yy) + x - 0;
+                            uint8_t *uc = imgLuminance.ptr(y + yy) + x - 0;
                             for (int xx = 0; xx <= halfsize; xx++, vPtr++, uc++)
                             {
                                 if (x + xx >= 0 && x + xx < imgSrc.cols)
@@ -104,7 +104,7 @@ class ParallelOilPainting : public ParallelLoopBody
                         if (y + yy >= 0 && y + yy < imgSrc.rows)
                         {
                             Type *vPtr = imgSrc.ptr<Type>(y + yy) + x - halfsize - 1;
-                            uchar *uc = imgLuminance.ptr(y + yy) + x - halfsize - 1;
+                            uint8_t *uc = imgLuminance.ptr(y + yy) + x - halfsize - 1;
                             int xx = -halfsize - 1;
                             if (x + xx >= 0 && x + xx < imgSrc.cols)
                             {
@@ -154,10 +154,10 @@ void oilPainting(InputArray _src, OutputArray _dst, int size, int dynValue,int c
     else
         lum = src.clone();
     double dratio = 1 / double(dynValue);
-    lum.forEach<uchar>([=](uchar &pixel, const int * /*position*/) { pixel = saturate_cast<uchar>(cvRound(pixel * dratio)); });
+    lum.forEach<uint8_t>([=](uint8_t &pixel, const int * /*position*/) { pixel = saturate_cast<uint8_t>(cvRound(pixel * dratio)); });
     if (_src.type() == CV_8UC1)
     {
-        ParallelOilPainting<uchar> oilAlgo(src, dst, lum, size, dynValue);
+        ParallelOilPainting<uint8_t> oilAlgo(src, dst, lum, size, dynValue);
         parallel_for_(Range(0, src.rows), oilAlgo);
     }
     else
diff --git a/modules/xphoto/src/photomontage.hpp b/modules/xphoto/src/photomontage.hpp
index bdafb360dc7..940ea8d2064 100644
--- a/modules/xphoto/src/photomontage.hpp
+++ b/modules/xphoto/src/photomontage.hpp
@@ -79,7 +79,7 @@ template <typename Tp> class Photomontage
 {
 private:
     const std::vector <std::vector <Tp> > &pointSeq;   // points for stitching
-    const std::vector <std::vector <uchar> > &maskSeq; // corresponding masks
+    const std::vector <std::vector <uint8_t> > &maskSeq; // corresponding masks
 
     const std::vector <std::vector <int> > &linkIdx;   // vector of neighbors for pointSeq
 
@@ -116,7 +116,7 @@ template <typename Tp> class Photomontage
     void gradientDescent(); // gradient descent in alpha-expansion topology
 
     Photomontage(const std::vector <std::vector <Tp> > &pointSeq,
-                 const std::vector <std::vector <uchar> > &maskSeq,
+                 const std::vector <std::vector <uint8_t> > &maskSeq,
                  const std::vector <std::vector <int> > &linkIdx,
                        std::vector <labelTp> &labelSeq);
     virtual ~Photomontage(){};
@@ -219,7 +219,7 @@ gradientDescent()
 
 template <typename Tp> Photomontage <Tp>::
 Photomontage( const std::vector <std::vector <Tp> > &_pointSeq,
-            const std::vector <std::vector <uchar> > &_maskSeq,
+            const std::vector <std::vector <uint8_t> > &_maskSeq,
               const std::vector <std::vector <int> > &_linkIdx,
                               std::vector <labelTp> &_labelSeq )
   :
@@ -235,7 +235,7 @@ Photomontage( const std::vector <std::vector <Tp> > &_pointSeq,
 
 template <typename Tp> static inline
 void photomontage( const std::vector <std::vector <Tp> > &pointSeq,
-                 const std::vector <std::vector <uchar> > &maskSeq,
+                 const std::vector <std::vector <uint8_t> > &maskSeq,
                    const std::vector <std::vector <int> > &linkIdx,
                    std::vector <gcoptimization::labelTp> &labelSeq )
 {

From 5e592c2d96cfacdca971e5d1bac5ec9eab3b01ed Mon Sep 17 00:00:00 2001
From: Alexander Panov <alexander.panov@xperience.ai>
Date: Tue, 26 Mar 2024 12:27:15 +0300
Subject: [PATCH 16/19] Merge pull request #3699 from
 AleksandrPanov:mcc_add_perf_tests_improve_performance

Mcc add perf tests improve performance #3699

Added perf tests to mcc module.
Also these optimizations have been added:

- added `parallel_for_` to `performThreshold()`
- removed `toL`/`fromL` and added `dst` to avoid copy data
- added `parallel_for_` to `elementWise()` ("batch" optimization improves performance of Windows version, Linux without changes).

Configuration:
Ryzen 5950X, 2x16 GB 3000 MHz DDR4
OS: Windows 10, Ubuntu 20.04.5 LTS

Performance results in milliseconds:

| OS and alg version   | process, ms | infer, ms |
| -------------------- | ----- | ------ |
| win_default          | 63.09 | 457.57 |
| win_optimized_without_batch       | 48.69 | 111.78 |
| win_optimized_batch  | 48.42 | 47.28  |
| linux_default        | 50.88 | 300.7  |
| linux_optimized_batch| 36.06 | 41.62  |

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 modules/mcc/perf/perf_main.cpp       |  3 ++
 modules/mcc/perf/perf_mcc.cpp        | 51 ++++++++++++++++++++++++++++
 modules/mcc/perf/perf_precomp.hpp    | 14 ++++++++
 modules/mcc/src/ccm.cpp              |  7 ++--
 modules/mcc/src/checker_detector.cpp | 23 +++++++------
 modules/mcc/src/colorspace.cpp       | 37 +++++++++-----------
 modules/mcc/src/colorspace.hpp       | 21 +++++-------
 modules/mcc/src/utils.cpp            |  6 ++--
 modules/mcc/src/utils.hpp            | 23 +++++++++++--
 9 files changed, 131 insertions(+), 54 deletions(-)
 create mode 100644 modules/mcc/perf/perf_main.cpp
 create mode 100644 modules/mcc/perf/perf_mcc.cpp
 create mode 100644 modules/mcc/perf/perf_precomp.hpp

diff --git a/modules/mcc/perf/perf_main.cpp b/modules/mcc/perf/perf_main.cpp
new file mode 100644
index 00000000000..c6d28db59f8
--- /dev/null
+++ b/modules/mcc/perf/perf_main.cpp
@@ -0,0 +1,3 @@
+#include "perf_precomp.hpp"
+
+CV_PERF_TEST_MAIN(mcc)
diff --git a/modules/mcc/perf/perf_mcc.cpp b/modules/mcc/perf/perf_mcc.cpp
new file mode 100644
index 00000000000..f5e721074b7
--- /dev/null
+++ b/modules/mcc/perf/perf_mcc.cpp
@@ -0,0 +1,51 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test
+{
+namespace
+{
+
+using namespace std;
+
+PERF_TEST(CV_mcc_perf, detect) {
+    string path = cvtest::findDataFile("cv/mcc/mcc_ccm_test.jpg");
+    Mat img = imread(path, IMREAD_COLOR);
+    Ptr<CCheckerDetector> detector = CCheckerDetector::create();
+
+    // detect MCC24 board
+    TEST_CYCLE() {
+        ASSERT_TRUE(detector->process(img, MCC24, 1, false));
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST(CV_mcc_perf, infer) {
+    // read gold chartsRGB
+    string path = cvtest::findDataFile("cv/mcc/mcc_ccm_test.yml");
+    FileStorage fs(path, FileStorage::READ);
+    Mat chartsRGB;
+    FileNode node = fs["chartsRGB"];
+    node >> chartsRGB;
+    fs.release();
+
+    // compute CCM
+    ColorCorrectionModel model(chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255., COLORCHECKER_Macbeth);
+    model.run();
+
+    Mat img(1000, 4000, CV_8UC3);
+    randu(img, 0, 255);
+    img.convertTo(img, CV_64F, 1. / 255.);
+
+    TEST_CYCLE() {
+        model.infer(img);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
+} // namespace opencv_test
diff --git a/modules/mcc/perf/perf_precomp.hpp b/modules/mcc/perf/perf_precomp.hpp
new file mode 100644
index 00000000000..5ef54694959
--- /dev/null
+++ b/modules/mcc/perf/perf_precomp.hpp
@@ -0,0 +1,14 @@
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/mcc.hpp"
+
+namespace opencv_test
+{
+using namespace cv::mcc;
+using namespace cv::ccm;
+using namespace perf;
+}
+
+#endif
diff --git a/modules/mcc/src/ccm.cpp b/modules/mcc/src/ccm.cpp
index 7e26d164124..aec535e05b4 100644
--- a/modules/mcc/src/ccm.cpp
+++ b/modules/mcc/src/ccm.cpp
@@ -289,14 +289,13 @@ Mat ColorCorrectionModel::infer(const Mat& img, bool islinear)
         CV_Error(Error::StsBadArg, "No CCM values!" );
     }
     Mat img_lin = (p->linear)->linearize(img);
-    Mat img_ccm(img_lin.size(), img_lin.type());
-    Mat ccm_ = p->ccm.reshape(0, p->shape / 3);
-    img_ccm = multiple(p->prepare(img_lin), ccm_);
+    Mat ccm = p->ccm.reshape(0, p->shape / 3);
+    Mat img_ccm = multiple(p->prepare(img_lin), ccm);
     if (islinear == true)
     {
         return img_ccm;
     }
-    return p->cs.fromL(img_ccm);
+    return p->cs.fromLFunc(img_ccm, img_lin);
 }
 
 void ColorCorrectionModel::Impl::getColor(CONST_COLOR constcolor)
diff --git a/modules/mcc/src/checker_detector.cpp b/modules/mcc/src/checker_detector.cpp
index 7972bd0a7de..58c703a6a81 100644
--- a/modules/mcc/src/checker_detector.cpp
+++ b/modules/mcc/src/checker_detector.cpp
@@ -539,17 +539,18 @@ void CCheckerDetectorImpl::
     // number of window sizes (scales) to apply adaptive thresholding
     int nScales = (params->adaptiveThreshWinSizeMax - params->adaptiveThreshWinSizeMin) / params->adaptiveThreshWinSizeStep + 1;
     thresholdImgs.create(nScales, 1, CV_8U);
-    std::vector<cv::Mat> _thresholdImgs;
-    for (int i = 0; i < nScales; i++)
-    {
-        int currScale = params->adaptiveThreshWinSizeMin + i * params->adaptiveThreshWinSizeStep;
-
-        cv::Mat tempThresholdImg;
-        cv::adaptiveThreshold(grayscaleImg, tempThresholdImg, 255, cv::ADAPTIVE_THRESH_MEAN_C,
-                              cv::THRESH_BINARY_INV, currScale, params->adaptiveThreshConstant);
-
-        _thresholdImgs.push_back(tempThresholdImg);
-    }
+    std::vector<cv::Mat> _thresholdImgs(nScales);
+    parallel_for_(Range(0, nScales),[&](const Range& range) {
+        const int start = range.start;
+        const int end = range.end;
+        for (int i = start; i < end; i++) {
+            int currScale = params->adaptiveThreshWinSizeMin + i * params->adaptiveThreshWinSizeStep;
+            cv::Mat tempThresholdImg;
+            cv::adaptiveThreshold(grayscaleImg, tempThresholdImg, 255, ADAPTIVE_THRESH_MEAN_C,
+                                  THRESH_BINARY_INV, currScale, params->adaptiveThreshConstant);
+            _thresholdImgs[i] = tempThresholdImg;
+        }
+    });
 
     thresholdImgs.assign(_thresholdImgs);
 }
diff --git a/modules/mcc/src/colorspace.cpp b/modules/mcc/src/colorspace.cpp
index 9dfe3f6e125..f7ee5cf2208 100644
--- a/modules/mcc/src/colorspace.cpp
+++ b/modules/mcc/src/colorspace.cpp
@@ -83,9 +83,9 @@ Operations RGBBase_::relation(const ColorSpace& other) const
     }
     if (linear)
     {
-        return Operations({ Operation(fromL) });
+        return Operations({ Operation([this](Mat rgbl) -> Mat { return fromLFunc(rgbl); }) });
     }
-    return Operations({ Operation(toL) });
+    return Operations({ Operation([this](Mat rgb) -> Mat { return toLFunc(rgb); })});
 }
 
 /* @brief Initial operations.
@@ -134,12 +134,6 @@ void RGBBase_::calM()
  */
 void RGBBase_::calOperations()
 {
-    // rgb -> rgbl
-    toL = [this](Mat rgb) -> Mat { return toLFunc(rgb); };
-
-    // rgbl -> rgb
-    fromL = [this](Mat rgbl) -> Mat { return fromLFunc(rgbl); };
-
     if (linear)
     {
         to = Operations({ Operation(M_to.t()) });
@@ -147,23 +141,25 @@ void RGBBase_::calOperations()
     }
     else
     {
-        to = Operations({ Operation(toL), Operation(M_to.t()) });
-        from = Operations({ Operation(M_from.t()), Operation(fromL) });
+        // rgb -> rgbl
+        to = Operations({ Operation([this](Mat rgb) -> Mat { return toLFunc(rgb); }), Operation(M_to.t()) });
+        // rgbl -> rgb
+        from = Operations({ Operation(M_from.t()), Operation([this](Mat rgbl) -> Mat { return fromLFunc(rgbl); }) });
     }
 }
 
-Mat RGBBase_::toLFunc(Mat& /*rgb*/) { return Mat(); }
+Mat RGBBase_::toLFunc(Mat& /*rgb*/) const { return Mat(); }
 
-Mat RGBBase_::fromLFunc(Mat& /*rgbl*/) { return Mat(); }
+Mat RGBBase_::fromLFunc(Mat& /*rgbl*/, Mat dst) const { return dst; }
 
 /* @brief Base of Adobe RGB color space;
  */
 
-Mat AdobeRGBBase_::toLFunc(Mat& rgb) { return gammaCorrection(rgb, gamma); }
+Mat AdobeRGBBase_::toLFunc(Mat& rgb) const { return gammaCorrection(rgb, gamma); }
 
-Mat AdobeRGBBase_::fromLFunc(Mat& rgbl)
+Mat AdobeRGBBase_::fromLFunc(Mat& rgbl, Mat dst) const
 {
-    return gammaCorrection(rgbl, 1. / gamma);
+    return gammaCorrection(rgbl, 1. / gamma, dst);
 }
 
 /* @brief Base of sRGB color space;
@@ -179,7 +175,7 @@ void sRGBBase_::calLinear()
 
 /* @brief Used by toLFunc.
  */
-double sRGBBase_::toLFuncEW(double& x)
+double sRGBBase_::toLFuncEW(double& x) const
 {
     if (x > K0)
     {
@@ -199,7 +195,7 @@ double sRGBBase_::toLFuncEW(double& x)
  * @param rgb the input array, type of cv::Mat.
  * @return the output array, type of cv::Mat.
  */
-Mat sRGBBase_::toLFunc(Mat& rgb)
+Mat sRGBBase_::toLFunc(Mat& rgb) const
 {
     return elementWise(rgb,
             [this](double a_) -> double { return toLFuncEW(a_); });
@@ -207,7 +203,7 @@ Mat sRGBBase_::toLFunc(Mat& rgb)
 
 /* @brief Used by fromLFunc.
  */
-double sRGBBase_::fromLFuncEW(double& x)
+double sRGBBase_::fromLFuncEW(const double& x) const
 {
     if (x > beta)
     {
@@ -227,10 +223,9 @@ double sRGBBase_::fromLFuncEW(double& x)
  * @param rgbl the input array, type of cv::Mat.
  * @return the output array, type of cv::Mat.
  */
-Mat sRGBBase_::fromLFunc(Mat& rgbl)
+Mat sRGBBase_::fromLFunc(Mat& rgbl, Mat dst) const
 {
-    return elementWise(rgbl,
-            [this](double a_) -> double { return fromLFuncEW(a_); });
+    return elementWise(rgbl, [this](double a_) -> double { return fromLFuncEW(a_); }, dst);
 }
 
 /* @brief sRGB color space.
diff --git a/modules/mcc/src/colorspace.hpp b/modules/mcc/src/colorspace.hpp
index 57b5bc2ff40..572fea38781 100644
--- a/modules/mcc/src/colorspace.hpp
+++ b/modules/mcc/src/colorspace.hpp
@@ -83,8 +83,6 @@ class RGBBase_ : public ColorSpace
     double yg;
     double xb;
     double yb;
-    MatFunc toL;
-    MatFunc fromL;
     Mat M_to;
     Mat M_from;
 
@@ -108,6 +106,9 @@ class RGBBase_ : public ColorSpace
     */
     void bind(RGBBase_& rgbl);
 
+    virtual Mat toLFunc(Mat& /*rgb*/) const;
+
+    virtual Mat fromLFunc(Mat& /*rgbl*/, Mat dst=Mat()) const;
 private:
     virtual void setParameter() {};
 
@@ -120,10 +121,6 @@ class RGBBase_ : public ColorSpace
     virtual void calOperations();
 
     virtual void calLinear() {};
-
-    virtual Mat toLFunc(Mat& /*rgb*/);
-
-    virtual Mat fromLFunc(Mat& /*rgbl*/);
 };
 
 /** @brief Base of Adobe RGB color space;
@@ -136,8 +133,8 @@ class AdobeRGBBase_ : public RGBBase_
     double gamma;
 
 private:
-    Mat toLFunc(Mat& rgb) CV_OVERRIDE;
-    Mat fromLFunc(Mat& rgbl) CV_OVERRIDE;
+    Mat toLFunc(Mat& rgb) const CV_OVERRIDE;
+    Mat fromLFunc(Mat& rgbl, Mat dst=Mat()) const CV_OVERRIDE;
 };
 
 /** @brief Base of sRGB color space;
@@ -160,23 +157,23 @@ class sRGBBase_ : public RGBBase_
     virtual void calLinear() CV_OVERRIDE;
     /** @brief Used by toLFunc.
     */
-    double toLFuncEW(double& x);
+    double toLFuncEW(double& x) const;
 
     /** @brief Linearization.
         @param rgb the input array, type of cv::Mat.
         @return the output array, type of cv::Mat.
     */
-    Mat toLFunc(Mat& rgb) CV_OVERRIDE;
+    Mat toLFunc(Mat& rgb) const CV_OVERRIDE;
 
     /** @brief Used by fromLFunc.
     */
-    double fromLFuncEW(double& x);
+    double fromLFuncEW(const double& x) const;
 
     /** @brief Delinearization.
         @param rgbl the input array, type of cv::Mat.
         @return the output array, type of cv::Mat.
     */
-    Mat fromLFunc(Mat& rgbl) CV_OVERRIDE;
+    Mat fromLFunc(Mat& rgbl, Mat dst=Mat()) const CV_OVERRIDE;
 };
 
 /** @brief sRGB color space.
diff --git a/modules/mcc/src/utils.cpp b/modules/mcc/src/utils.cpp
index 3a0128b6ef6..ceac095a5ab 100644
--- a/modules/mcc/src/utils.cpp
+++ b/modules/mcc/src/utils.cpp
@@ -30,14 +30,14 @@
 namespace cv {
 namespace ccm {
 
-double gammaCorrection_(const double& element, const double& gamma)
+inline double gammaCorrection_(const double& element, const double& gamma)
 {
     return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));
 }
 
-Mat gammaCorrection(const Mat& src, const double& gamma)
+Mat gammaCorrection(const Mat& src, const double& gamma, Mat dst)
 {
-    return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); });
+    return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);
 }
 
 Mat maskCopyTo(const Mat& src, const Mat& mask)
diff --git a/modules/mcc/src/utils.hpp b/modules/mcc/src/utils.hpp
index 02570ca0184..07ca65cb968 100644
--- a/modules/mcc/src/utils.hpp
+++ b/modules/mcc/src/utils.hpp
@@ -42,8 +42,9 @@ double gammaCorrection_(const double& element, const double& gamma);
             \f]
     @param src the input array,type of Mat.
     @param gamma a constant for gamma correction.
+    @param dst the output array, type of Mat.
  */
-Mat gammaCorrection(const Mat& src, const double& gamma);
+Mat gammaCorrection(const Mat& src, const double& gamma, Mat dst=Mat());
 
 /** @brief maskCopyTo a function to delete unsatisfied elementwise.
     @param src the input array, type of Mat.
@@ -77,10 +78,26 @@ Mat rgb2gray(const Mat& rgb);
     @param lambda a for operation
  */
 template <typename F>
-Mat elementWise(const Mat& src, F&& lambda)
+Mat elementWise(const Mat& src, F&& lambda, Mat dst=Mat())
 {
-    Mat dst = src.clone();
+    if (dst.empty() || !dst.isContinuous() || dst.total() != src.total() || dst.type() != src.type())
+        dst = Mat(src.rows, src.cols, src.type());
     const int channel = src.channels();
+    if (src.isContinuous()) {
+        const int num_elements = (int)src.total()*channel;
+        const double *psrc = (double*)src.data;
+        double *pdst = (double*)dst.data;
+        const int batch = getNumThreads() > 1 ? 128 : num_elements;
+        const int N = (num_elements / batch) + ((num_elements % batch) > 0);
+        parallel_for_(Range(0, N),[&](const Range& range) {
+            const int start = range.start * batch;
+            const int end = std::min(range.end*batch, num_elements);
+            for (int i = start; i < end; i++) {
+                pdst[i] = lambda(psrc[i]);
+            }
+        });
+        return dst;
+    }
     switch (channel)
     {
     case 1:

From 9edb0cee37877792684356dd50f92d2db5c7a74e Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Tue, 26 Mar 2024 15:01:41 +0100
Subject: [PATCH 17/19] Use proper C++ types again.

This is necessary to get https://github.com/opencv/opencv/pull/25248 working.
This was missed in 5300337197b7f580c96101953606e5f78890103c
---
 modules/xphoto/src/norm2.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/xphoto/src/norm2.hpp b/modules/xphoto/src/norm2.hpp
index 205827d4650..1f26ef9c4be 100644
--- a/modules/xphoto/src/norm2.hpp
+++ b/modules/xphoto/src/norm2.hpp
@@ -59,10 +59,10 @@ template <class T> struct same_as<T, T> : ttype {};   // is_same
 
 
 template <typename _Tp> struct is_norm2_type :
-    int_const<bool, !same_as<_Tp,   char>::value
+    int_const<bool, !same_as<_Tp,   int8_t>::value
                  && !same_as<_Tp,  uint8_t>::value
-                 && !same_as<_Tp, ushort>::value
-                 && !same_as<_Tp,   uint>::value>{};
+                 && !same_as<_Tp, uint16_t>::value
+                 && !same_as<_Tp, uint32_t>::value>{};
 
 template <typename _Tp, int cn> static inline typename iftype< is_norm2_type<_Tp>::value, _Tp >::
     type norm2(cv::Vec<_Tp, cn> a, cv::Vec<_Tp, cn> b) { return (a - b).dot(a - b); }

From 85589dd12e95d9a3ce9222d4bafb574857de9ede Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Wed, 27 Mar 2024 12:31:04 +0300
Subject: [PATCH 18/19] Added performance test for cv::thining.

---
 modules/ximgproc/perf/perf_thining.cpp | 36 ++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 modules/ximgproc/perf/perf_thining.cpp

diff --git a/modules/ximgproc/perf/perf_thining.cpp b/modules/ximgproc/perf/perf_thining.cpp
new file mode 100644
index 00000000000..6703a01da80
--- /dev/null
+++ b/modules/ximgproc/perf/perf_thining.cpp
@@ -0,0 +1,36 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+typedef tuple<Size, int> ThinningPerfParam;
+typedef TestBaseWithParam<ThinningPerfParam> ThinningPerfTest;
+
+PERF_TEST_P(ThinningPerfTest, perf,
+    Combine(
+        Values(sz1080p, sz720p, szVGA),
+        Values(THINNING_ZHANGSUEN, THINNING_GUOHALL)
+    )
+)
+{
+    ThinningPerfParam params = GetParam();
+    Size size = get<0>(params);
+    int type  = get<1>(params);
+
+    Mat src = Mat::zeros(size, CV_8UC1);
+    for (int x = 50; x < src.cols - 50; x += 50)
+        cv::circle(src, Point(x, x/2), 30 + x/2, Scalar(255), 5);
+
+    Mat dst;
+    TEST_CYCLE()
+    {
+        thinning(src, dst, type);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+}} // namespace

From ab8210686e338dd8e5cd902fa01116dc22324315 Mon Sep 17 00:00:00 2001
From: Mengqing Cao <52243582+MengqingCao@users.noreply.github.com>
Date: Thu, 28 Mar 2024 15:58:56 +0800
Subject: [PATCH 19/19] Merge pull request #3608 from MengqingCao:dvpp_support

Add additional image processing operators for Ascend NPU by utilizing DVPP #3608

The user base for [Ascend NPU](https://www.hiascend.com/en/) and programming with CANN is increasing rapidly, with a growing number of users joining each day. To facilitate the use of these users, this PR provides more support for Ascend backend operators. All operators this PR offers are using use DVPP as the computational unit. Digital Vision Pre-Processing (DVPP) is an image processing unit built into the Ascend AI processor. Its main functions include image and video encoding/decoding, as well as image cropping and scaling.

The high-frequency operators with NPU as the backend and basic data structure AscendMat has been provided in #3552, while it still lacks many image processing operators. Moreover, only two interpolation algorithms for the resize operator are supported in #3552. In this PR, the bilinear interpolation algorithm and nearest neighbour interpolation algorithm are implemented for the resize operator, as well as the Ascend implementation of the copyMakeBorder operator.

In addition, the serialization of image processing operations is widely used in the preprocessing and post-processing stages of computer vision deep learning methods. Therefore, providing integrated operators is very meaningful for improving the convenience of use for OpenCV and deep learning crossover users. For example, torchvision also provides similar operators: [RESIZED_CROP](https://pytorch.org/vision/stable/generated/torchvision.transforms.functional.resized_crop.html?highlight=resizedcrop).
Thus, this PR also provides two serialization processing operators: cropResize and cropResizeMakeBorder.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [N/A] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/cannops/include/opencv2/cann.hpp      |  15 +
 .../include/opencv2/cann_interface.hpp        | 165 ++++++++--
 modules/cannops/include/opencv2/dvpp_call.hpp | 107 ++++++
 .../cannops/misc/python/test/test_cannops.py  |  45 +++
 modules/cannops/perf/perf_core.cpp            | 172 ++++++++++
 modules/cannops/perf/perf_main.cpp            |  14 +-
 modules/cannops/src/core.cpp                  | 209 ++++++++++--
 modules/cannops/src/dvpp_call.cpp             | 310 ++++++++++++++++++
 modules/cannops/src/precomp.hpp               |   1 +
 modules/cannops/test/test_core.cpp            | 122 +++++++
 modules/cannops/test/test_main.cpp            |  14 +-
 .../ascend_npu_image_processing.markdown      |  21 +-
 12 files changed, 1126 insertions(+), 69 deletions(-)
 create mode 100644 modules/cannops/include/opencv2/dvpp_call.hpp
 create mode 100644 modules/cannops/src/dvpp_call.cpp

diff --git a/modules/cannops/include/opencv2/cann.hpp b/modules/cannops/include/opencv2/cann.hpp
index bd351481624..4f4f3a7d8d2 100644
--- a/modules/cannops/include/opencv2/cann.hpp
+++ b/modules/cannops/include/opencv2/cann.hpp
@@ -318,6 +318,21 @@ CV_EXPORTS_W void initAcl();
  */
 CV_EXPORTS_W void finalizeAcl();
 
+/**
+ * @brief init DVPP system.
+ * @note The DVPP interfaces used are all version V2.
+ * Supported devices: Atlas Inference Series products, Atlas 200/500 A2 Inference products and
+ * Atlas A2 Training Series products/Atlas 300I A2 Inference products
+ */
+CV_EXPORTS_W void initDvpp();
+
+/**
+ * @brief finalize DVPP system.
+ * @note Supported devices: Atlas Inference Series products, Atlas 200/500 A2 Inference products and
+ * Atlas A2 Training Series products/Atlas 300I A2 Inference products
+ */
+CV_EXPORTS_W void finalizeDvpp();
+
 //! @} cann_init
 
 } // namespace cann
diff --git a/modules/cannops/include/opencv2/cann_interface.hpp b/modules/cannops/include/opencv2/cann_interface.hpp
index 6b13090f4f1..8d7c90a1cc1 100644
--- a/modules/cannops/include/opencv2/cann_interface.hpp
+++ b/modules/cannops/include/opencv2/cann_interface.hpp
@@ -111,6 +111,7 @@ CV_EXPORTS_W void subtract(const Scalar& src1, const AscendMat& src2, CV_OUT Asc
  * @param scale Optional scale factor.
  * @param dtype Optional depth of the output array.
  * @param stream AscendStream for the asynchronous version.
+ * @note when scale != 1, src must be one of the following types: float16, float32, int32
  * @sa cv::multiply cuda::multiply
  */
 CV_EXPORTS_W void multiply(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -145,6 +146,9 @@ CV_EXPORTS_W void multiply(const Scalar& src1, const AscendMat& src2, CV_OUT Asc
  * @param scale Optional scale factor.
  * @param dtype Optional depth of the output array.
  * @param stream AscendStream for the asynchronous version.
+ * @note when scale == 1, src must be one of the following types: float16, float32, double, uint16,
+ * int8, uint8, int16, int32, int64; when scale != 1, src must be one of the following types:
+ * int32, int16, float16, float32.
  * @sa cv::divide cuda::divide
  */
 CV_EXPORTS_W void divide(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -178,6 +182,7 @@ CV_EXPORTS_W void divide(const Scalar& src1, const AscendMat& src2, CV_OUT Ascen
  * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
  * destination array to be changed. The mask can be used only with single channel images.
  * @param stream AscendStream for the asynchronous version.
+ * @note src must be one of the following types: int32, int16, uint16
  * @sa cv::bitwise_and cuda::bitwise_and
  */
 CV_EXPORTS_W void bitwise_and(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -211,6 +216,7 @@ CV_EXPORTS_W void bitwise_and(const Scalar& src1, const AscendMat& src2, CV_OUT
  * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
  * destination array to be changed. The mask can be used only with single channel images.
  * @param stream AscendStream for the asynchronous version.
+ * @note src must be one of the following types: int32, int16, uint16
  * @sa cv::bitwise_or cuda::bitwise_or
  */
 CV_EXPORTS_W void bitwise_or(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -245,6 +251,7 @@ CV_EXPORTS_W void bitwise_or(const Scalar& src1, const AscendMat& src2, CV_OUT A
  * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
  * destination array to be changed. The mask can be used only with single channel images.
  * @param stream AscendStream for the asynchronous version.
+ * @note src must be one of the following types: int32, int16, uint16
  * @sa cv::bitwise_xor cuda::bitwise_xor
  */
 CV_EXPORTS_W void bitwise_xor(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -277,6 +284,7 @@ CV_EXPORTS_W void bitwise_xor(const Scalar& src1, const AscendMat& src2, CV_OUT
  * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
  * destination array to be changed. The mask can be used only with single channel images.
  * @param stream AscendStream for the asynchronous version.
+ * @note src must be one of the following types: int32, int16, uint16
  * @sa cv::bitwise_not cuda::bitwise_not
  */
 CV_EXPORTS_W void bitwise_not(const InputArray src, OutputArray dst,
@@ -306,6 +314,7 @@ The function addWeighted calculates the weighted sum of two arrays as follows:
 
 where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each
 channel is processed independently.
+@note src must be one of the following types: int32, int16, float16, float32.
 
 @sa cv::addWeighted cv::cuda::addWeighted
  */
@@ -326,6 +335,7 @@ CV_EXPORTS_W void addWeighted(const AscendMat& src1, double alpha, const AscendM
 @param type Threshold type. For details, see threshold . The THRESH_MASK, THRESH_OTSU and
 THRESH_TRIANGLE threshold types are not supported.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types: float16, float32.
 
 @sa cv::threshold cv::cuda::threshold
 */
@@ -346,11 +356,13 @@ CV_EXPORTS_W double threshold(const AscendMat& src, CV_OUT AscendMat& dst, doubl
 @param n Number of source matrices.
 @param dst Destination matrix.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types: float16, float32, double, int32, int16, int8, int64,
+uint8, uint16, uint32, uint64.
 
 @sa cv::merge cv::cuda::merge
  */
 CV_EXPORTS_W void merge(const AscendMat* src, size_t n, CV_OUT AscendMat& dst,
-                      AscendStream& stream = AscendStream::Null());
+                        AscendStream& stream = AscendStream::Null());
 /** @overload */
 CV_EXPORTS_W void merge(const std::vector<AscendMat>& src, CV_OUT AscendMat& dst,
                         AscendStream& stream = AscendStream::Null());
@@ -366,11 +378,13 @@ CV_EXPORTS_W void merge(const std::vector<AscendMat>& src, OutputArray& dst,
 @param src Source matrix.
 @param dst Destination array/vector of single-channel matrices.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32,
+ uint64, int8, int16, bool
 
 @sa cv::split cv::cuda::split
  */
 CV_EXPORTS_W void split(const AscendMat& src, AscendMat* dst,
-                      AscendStream& stream = AscendStream::Null());
+                        AscendStream& stream = AscendStream::Null());
 /** @overload */
 CV_EXPORTS_W void split(const AscendMat& src, CV_OUT std::vector<AscendMat>& dst,
                         AscendStream& stream = AscendStream::Null());
@@ -386,6 +400,8 @@ CV_EXPORTS_W void split(const InputArray src, CV_OUT std::vector<AscendMat>& dst
 @param src Source matrix.
 @param dst Destination matrix.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types:
+float16,float,int8,int16,int32,int64,uint8,uint16,uint32,uint64,bool
 
 @sa cv::transpose cv::cuda::transpose
  */
@@ -403,6 +419,7 @@ CV_EXPORTS_W void transpose(const AscendMat& src, CV_OUT AscendMat& dst,
 -   \> 0 Flips around y-axis.
 -   \< 0 Flips around both axes.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types: float16,float,int64,int32,int16,uint16
 
 @sa cv::flip cv::cuda::flip
  */
@@ -421,6 +438,7 @@ The function cv::rotate rotates the array in one of three different ways:
 and the rows and cols are switched for ROTATE_90_CLOCKWISE and ROTATE_90_COUNTERCLOCKWISE.
 @param rotateCode an enum to specify how to rotate the array; see the enum #RotateFlags
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types: float16,float,int64,int32,int16,uint16
 
 @sa cv::rotate
 */
@@ -445,21 +463,6 @@ CV_EXPORTS_W AscendMat crop(InputArray src, const Rect& rect,
 /** @overload */
 CV_EXPORTS_W AscendMat crop(const AscendMat& src, const Rect& rect,
                             AscendStream& stream = AscendStream::Null());
-/** @brief Resizes an image src down to or up to the specified size.
-@param src    input image
-@param dst    output image; it has the size dsize (when it is non-zero) or the size computed from
-src.size(), fx, and fy; the type of dst is the same as of src.
-@param dsize  output image size; if it equals zero, it is computed as:
-     \f[𝚍𝚜𝚒𝚣𝚎 = 𝚂𝚒𝚣𝚎(𝚛𝚘𝚞𝚗𝚍(𝚏𝚡*𝚜𝚛𝚌.𝚌𝚘𝚕𝚜), 𝚛𝚘𝚞𝚗𝚍(𝚏𝚢*𝚜𝚛𝚌.𝚛𝚘𝚠𝚜))\f]
-     Either dsize or both fx and fy must be non-zero.
-@param fx     scale factor along the horizontal axis; when it equals 0, it is computed as
-\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚠𝚒𝚍𝚝𝚑/𝚜𝚛𝚌.𝚌𝚘𝚕𝚜\f]
-
-@param fy     scale factor along the vertical axis; when it equals 0, it is computed as
-\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚑𝚎𝚒𝚐𝚑𝚝/𝚜𝚛𝚌.𝚛𝚘𝚠𝚜\f]
-@param interpolation    interpolation method(see **cv.cann.InterpolationFlags**)
-@sa cv::resize
-*/
 
 //! interpolation algorithm
 enum InterpolationFlags
@@ -478,14 +481,121 @@ enum InterpolationFlags
     INTER_MAX = 7,
 };
 
-CV_EXPORTS_W void resize(InputArray _src, OutputArray _dst, Size dsize, double inv_scale_x,
-                         double inv_scale_y, int interpolation,
-                         AscendStream& stream = AscendStream::Null());
+/** @brief Resizes an image src down to or up to the specified size.
+@param src    input image
+@param dst    output image; it has the size dsize (when it is non-zero) or the size computed from
+src.size(), fx, and fy; the type of dst is the same as of src.
+@param dsize  output image size; if it equals zero, it is computed as:
+     \f[𝚍𝚜𝚒𝚣𝚎 = 𝚂𝚒𝚣𝚎(𝚛𝚘𝚞𝚗𝚍(𝚏𝚡*𝚜𝚛𝚌.𝚌𝚘𝚕𝚜), 𝚛𝚘𝚞𝚗𝚍(𝚏𝚢*𝚜𝚛𝚌.𝚛𝚘𝚠𝚜))\f]
+     Either dsize or both fx and fy must be non-zero.
+@param fx     scale factor along the horizontal axis; when it equals 0, it is computed as
+\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚠𝚒𝚍𝚝𝚑/𝚜𝚛𝚌.𝚌𝚘𝚕𝚜\f]
+
+@param fy     scale factor along the vertical axis; when it equals 0, it is computed as
+\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚑𝚎𝚒𝚐𝚑𝚝/𝚜𝚛𝚌.𝚛𝚘𝚠𝚜\f]
+@param interpolation    interpolation method(see **cv.cann.InterpolationFlags**)
+@param stream AscendStream for the asynchronous version.
+* @note  There are some constraints for the input datatype:
+  * when resampling using
+  * nearest neighbor or bilinear interpolation: Input images must be uint8, and only GRAY and BGR
+  images are supported. The resolution of input and output images must in range of [10*6,
+4096*4096].
+  * bicubic interpolation: Input images can be of different types, output images must be
+  float or uint8.
+  * pixel area interpolation: Input images can be of different types but output images
+  are always float.\n
+  * Only the following devices are supported when resampling using nearest neighbor or bilinear
+  interpolation: Atlas Inference Series products, Atlas 200/500 A2 Inference products and
+  Atlas A2 Training Series products/Atlas 300I A2 Inference products
+@sa cv::resize
+*/
+CV_EXPORTS_W void resize(InputArray src, OutputArray dst, Size dsize, double fx, double fy,
+                         int interpolation, AscendStream& stream = AscendStream::Null());
 /** @overload */
-CV_EXPORTS_W void resize(const AscendMat& src, CV_OUT AscendMat& dst, Size dsize, double inv_scale_x,
-                         double inv_scale_y, int interpolation,
-                         AscendStream& stream = AscendStream::Null());
+CV_EXPORTS_W void resize(const AscendMat& src, CV_OUT AscendMat& dst, Size dsize, double fx,
+                         double fy, int interpolation, AscendStream& stream = AscendStream::Null());
+
+/** @brief crop a sub image from a big one, and resize it to certain size.
+@param src input array.
+@param dst output array. it has the size dsize (when it is non-zero) or the size computed from
+src.size(), fx, and fy; the type of dst is the same as of src.
+@param rect a rect to crop a array to
+@param dsize  output image size; if it equals zero, it is computed as cv::resize do.
+@param fx     scale factor along the horizontal axis; when it equals 0, it is computed as
+\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚠𝚒𝚍𝚝𝚑/𝚜𝚛𝚌.𝚌𝚘𝚕𝚜\f]
+@param fy     scale factor along the vertical axis; when it equals 0, it is computed as
+\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚑𝚎𝚒𝚐𝚑𝚝/𝚜𝚛𝚌.𝚛𝚘𝚠𝚜\f]
+@param interpolation    interpolation method, only support INTER_NEAREST and INTER_LINEAR here.
+    (see **cv.cann.InterpolationFlags**)
+@note  The input images must be uint8, and only GRAY and BGR images are supported. The resolution of
+input and output images must in range of [10*6, 4096*4096].
+@note Only the following devices are supported: Atlas Inference Series products, Atlas 200/500 A2
+Inference products and Atlas A2 Training Series products/Atlas 300I A2 Inference products.
+@sa cv::gapi::crop, cv::resize, cv::cann::resize
+*/
+CV_EXPORTS_W void cropResize(const InputArray src, OutputArray dst, const Rect& rect, Size dsize,
+                             double fx, double fy, int interpolation);
+/** @overload */
+CV_EXPORTS_W void cropResize(const AscendMat& src, CV_OUT AscendMat& dst, const Rect& rect,
+                             Size dsize, double fx, double fy, int interpolation);
 
+/** @brief crop a sub image from a big one, resize it to certain size, and form the top/left border
+and fills it with specified bordertype.
+@param src input array.
+@param dst output array; it has the size Size(dsize.height + top, dsize.width + left).
+@param rect a rect to crop a array to
+@param dsize  resize size;
+@param fx     scale factor along the horizontal axis;
+@param fy     scale factor along the vertical axis;
+@param interpolation    interpolation method, only INTER_NEAREST and INTER_LINEAR are supported.
+    (see **cv.cann.InterpolationFlags**)
+@param borderType border extrapolate method, only cv::BorderTypes::BORDER_CONSTANT and
+cv::BorderTypes::BORDER_REPLICATE are supported.
+@param value Border BGR or YUV value if borderType==BORDER_CONSTANT.
+@param top Number of pixels for top padding
+@param left Number of pixels for left padding
+@note  The input images must be uint8, and only GRAY and BGR images are supported. The resolution of
+input and output images must in range of [10*6, 4096*4096].
+@note Only the following devices are supported: Atlas Inference Series products, Atlas 200/500 A2
+Inference products and Atlas A2 Training Series products/Atlas 300I A2 Inference products.
+@sa cv::gapi::crop, cv::resize, cv::cann::resize, cv::BorderTypes
+*/
+
+CV_EXPORTS_W void cropResizeMakeBorder(const InputArray src, OutputArray dst, const Rect& rect,
+                                       Size dsize, double fx, double fy, int interpolation, int top,
+                                       int left, const int borderType, Scalar value = Scalar());
+/** @overload */
+CV_EXPORTS_W void cropResizeMakeBorder(const AscendMat& src, CV_OUT AscendMat& dst,
+                                       const Rect& rect, Size dsize, double fx, double fy,
+                                       int interpolation, int top, int left, const int borderType,
+                                       Scalar value = Scalar());
+/** @brief Forms a border and fills it with specified bordertype around the copy of input image.
+@param src Source image.
+@param dst Destination image of the same type as src and the size Size(src.cols+left+right,
+src.rows+top+bottom).
+@param top Number of pixels for top padding
+@param bottom Number of pixels for bottom padding
+@param left Number of pixels for left padding
+@param right Number of pixels for right padding
+Parameter specifying how many pixels in each direction from the source image rectangle to
+extrapolate. For example, top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs to be
+built.
+@param borderType Border type. only cv::BorderTypes::BORDER_CONSTANT and
+cv::BorderTypes::BORDER_REPLICATE are supported.
+@param value Border BGR or YUV value if borderType==BORDER_CONSTANT.
+@note  The input images must be uint8, and only GRAY and BGR images are supported. The resolution of
+input and output images must in range of [10*6, 4096*4096].
+@note Only the following devices are supported: Atlas Inference Series products, Atlas 200/500 A2
+Inference products and Atlas A2 Training Series products/Atlas 300I A2 Inference products.
+@sa  cv::copyMakeBorder, cv::borderInterpolate
+*/
+CV_EXPORTS_W void copyMakeBorder(const InputArray src, OutputArray dst, int top, int bottom,
+                                 int left, int right, int borderType,
+                                 const Scalar& value = Scalar());
+/** @overload */
+CV_EXPORTS_W void copyMakeBorder(const AscendMat& src, CV_OUT AscendMat& dst, int top, int bottom,
+                                 int left, int right, int borderType,
+                                 const Scalar& value = Scalar());
 //! @} cannops_core
 
 //! @addtogroup cannimgproc
@@ -495,10 +605,17 @@ CV_EXPORTS_W void resize(const AscendMat& src, CV_OUT AscendMat& dst, Size dsize
 
 @param src Source image with CV_8U , CV_16U , or CV_32F depth and 1, 3, or 4 channels.
 @param dst Destination image.
-@param code Color space conversion code. For details, see cvtColor .
+@param code Color space conversion code. For details, see cv::ColorConversionCodes .
 @param dstCn Number of channels in the destination image. If the parameter is 0, the number of the
 channels is derived automatically from src and the code .
 @param stream AscendStream for the asynchronous version.
+@note The supported conversion types are as follows:
+  { CV_BGR2BGRA, CV_BGRA2BGR, CV_BGR2RGBA, CV_RGBA2BGR,
+  CV_BGR2RGB, CV_BGRA2RGBA, CV_BGR2GRAY, CV_RGB2GRAY,
+  CV_GRAY2BGR, CV_GRAY2BGRA, CV_BGRA2GRAY, CV_RGBA2GRAY,
+  CV_BGR2XYZ, CV_RGB2XYZ, CV_XYZ2BGR, CV_XYZ2RGB,
+  CV_BGR2YCrCb, CV_RGB2YCrCb, CV_YCrCb2BGR, CV_YCrCb2RGB,
+  CV_BGR2YUV, CV_RGB2YUV, CV_YUV2BGR, CV_YUV2RGB }
 
 @sa cv::cvtColor cv::cuda::cvtColor
  */
diff --git a/modules/cannops/include/opencv2/dvpp_call.hpp b/modules/cannops/include/opencv2/dvpp_call.hpp
new file mode 100644
index 00000000000..e70d56ea801
--- /dev/null
+++ b/modules/cannops/include/opencv2/dvpp_call.hpp
@@ -0,0 +1,107 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef ENABLE_DVPP_INTERFACE
+    #define ENABLE_DVPP_INTERFACE
+#endif // ENABLE_DVPP_INTERFACE
+
+#include <vector>
+#include <string>
+#include <acl/acl.h>
+#include <acl/acl_op_compiler.h>
+#include <acl/dvpp/hi_dvpp.h>
+#include "acl/acl_op.h"
+#include "cann_call.hpp"
+
+namespace cv
+{
+namespace cann
+{
+struct AscendPicDesc
+{
+    const char* name;
+    std::shared_ptr<hi_void> data;
+    std::vector<int64_t> batchNum;
+
+    size_t widthAlignment = 16;
+    size_t heightAlignment = 1;
+    size_t sizeAlignment = 3;
+    size_t sizeNum = 3;
+
+    hi_vpc_pic_info Pic;
+    AscendPicDesc& setMemAlign();
+    AscendPicDesc& setPic(hi_pixel_format _picture_format);
+    std::shared_ptr<hi_void> allocate();
+    AscendPicDesc(){};
+    AscendPicDesc(const AscendMat& ascendMat, hi_pixel_format _picture_format);
+    AscendPicDesc(const Mat& mat, hi_pixel_format _picture_format);
+};
+
+/*
+ ***************************** hi_mpi_vpc warppers ***************************
+ The DVPP VPC interfaces here are all version v2. Only the following devices are supported: Atlas
+ Inference Series products, Atlas 200/500 A2 Inference products and Atlas A2 Training Series
+ products/Atlas 300I A2 Inference products.
+*/
+inline void vpcResizeWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                             int interpolation, uint32_t* taskID)
+{
+    uint32_t ret = hi_mpi_vpc_resize(chnId, &inPic, &outPic, 0, 0, interpolation, taskID, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to resize image");
+}
+void vpcCropResizeWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                          int cnt, uint32_t* taskID, const Rect& rect, Size dsize,
+                          int interpolation);
+
+void vpcCropResizeMakeBorderWarpper(hi_vpc_chn chnId, std::vector<AscendPicDesc>& inPicDesc,
+                                    std::vector<AscendPicDesc>& outPicDesc, int cnt,
+                                    uint32_t* taskID, const Rect& rect, Size dsize,
+                                    int interpolation, const int borderType, Scalar scalarV,
+                                    int top, int left);
+void vpcCopyMakeBorderWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                              uint32_t* taskID, int* offsets, int bordertype, Scalar value);
+/*****************************************************************************/
+
+/**
+ * @brief Interface for calling DVPP operator descriptors.
+ * The DVPP VPC interfaces here are all version v2. Supported devices: Atlas Inference Series
+ * products, Atlas 200/500 A2 Inference products and Atlas A2 Training Series products/Atlas 300I A2
+ * Inference products.
+ */
+class DvppOperatorDesc
+{
+private:
+    DvppOperatorDesc& addInput(AscendPicDesc& picDesc);
+    DvppOperatorDesc& addOutput(AscendPicDesc& picDesc);
+    std::set<std::shared_ptr<hi_void>> holder;
+
+public:
+    DvppOperatorDesc()
+    {
+        chnId = 0;
+        stChnAttr = {};
+        createChannel();
+    }
+    virtual ~DvppOperatorDesc() { reset(); }
+    DvppOperatorDesc& addInput(const AscendMat& mat);
+    DvppOperatorDesc& addOutput(AscendMat& mat);
+    DvppOperatorDesc& addInput(const Mat& mat);
+    DvppOperatorDesc& addOutput(Mat& mat);
+
+    DvppOperatorDesc& getResult(Mat& dst, uint32_t& taskIDResult);
+    DvppOperatorDesc& getResult(AscendMat& dst, uint32_t& taskIDResult);
+
+    DvppOperatorDesc& reset();
+    DvppOperatorDesc& createChannel();
+
+    std::vector<AscendPicDesc> inputDesc_;
+    std::vector<AscendPicDesc> outputDesc_;
+
+    hi_vpc_chn chnId;
+    hi_vpc_chn_attr stChnAttr;
+};
+
+} // namespace cann
+} // namespace cv
\ No newline at end of file
diff --git a/modules/cannops/misc/python/test/test_cannops.py b/modules/cannops/misc/python/test/test_cannops.py
index f1b53bc192c..48d4ff18d11 100644
--- a/modules/cannops/misc/python/test/test_cannops.py
+++ b/modules/cannops/misc/python/test/test_cannops.py
@@ -24,6 +24,7 @@ def genMask(mask, listx, listy):
 class cannop_test(NewOpenCVTests):
     def test_ascend(self):
         cv.cann.initAcl()
+        cv.cann.initDvpp()
         cv.cann.getDevice()
         cv.cann.setDevice(0)
         stream = cv.cann.AscendStream_Null()
@@ -275,6 +276,50 @@ def test_imgproc(self):
                 aclMat, 127, 255, tType)
             self.assertTrue(np.allclose(cvThresh, cannThresh.download()))
             self.assertTrue(np.allclose(cvRet, cannRet))
+
+        npMat = (np.random.random((1280, 1024, 3)) * 255).astype(np.uint8)
+        w_off, h_off, crop_w, crop_h = 0, 0, 512, 384
+        roi = [w_off, h_off, crop_w, crop_h]
+        aclMat = cv.cann.AscendMat()
+        aclMat.upload(npMat)
+
+        # resize
+        dstSize = np.array([crop_w, crop_h])
+        self.assertTrue(np.allclose(cv.cann.resize(npMat, dstSize, 0, 0, 1),
+                        cv.resize(npMat, dstSize, 0, 0, 1)))
+        self.assertTrue(np.allclose(cv.cann.resize(aclMat, dstSize, 0, 0, 1).download(),
+                        cv.resize(npMat, dstSize, 0, 0, 1)))
+        # cropResize
+        self.assertTrue(np.allclose(cv.cann.cropResize(npMat, roi, dstSize, 0, 0, 1),
+                        cv.resize(npMat[h_off:crop_h, w_off:crop_w], dstSize, 0, 0, 1)), 0)
+        self.assertTrue(np.allclose(cv.cann.cropResize(aclMat, roi, dstSize, 0, 0, 1).download(),
+                        cv.resize(npMat[h_off:crop_h, w_off:crop_w], dstSize, 0, 0, 1)), 0)
+
+        # cropResizeMakeBorder
+        # TODO cv.copyMakeBorder ignores borderColorValue param; find the reason and fix it
+        borderColorValue = (100, 0, 255)
+        top, bottom, left, right = 32, 0, 10, 0
+        borderTypes = [0, 1]
+
+        for borderType in borderTypes:
+            self.assertTrue(np.allclose(cv.cann.cropResizeMakeBorder(npMat, roi, dstSize,
+                                0, 0, 1, top, left, borderType),
+                            cv.copyMakeBorder(cv.resize(npMat[h_off:crop_h, w_off:crop_w],
+                                dstSize, 0, 0, 1), top, bottom, left, right, borderType), 1))
+            self.assertTrue(np.allclose(cv.cann.cropResizeMakeBorder(aclMat, roi, dstSize,
+                                0, 0, 1, top, left, borderType).download(),
+                            cv.copyMakeBorder(cv.resize(npMat[h_off:crop_h, w_off:crop_w],
+                                dstSize, 0, 0, 1), top, bottom, left, right, borderType), 1))
+
+        # copyMakeBorder
+        for borderType in borderTypes:
+            self.assertTrue(np.allclose(cv.cann.copyMakeBorder(npMat, top, bottom, left, right,
+                                                               borderType),
+                            cv.copyMakeBorder(npMat, top, bottom, left, right, borderType)))
+            self.assertTrue(np.allclose(cv.cann.copyMakeBorder(aclMat, top, bottom, left, right,
+                                                               borderType).download(),
+                            cv.copyMakeBorder(npMat, top, bottom, left, right, borderType)))
+
         cv.cann.resetDevice()
 
 if __name__ == '__main__':
diff --git a/modules/cannops/perf/perf_core.cpp b/modules/cannops/perf/perf_core.cpp
index a9d86fca881..914a122d287 100644
--- a/modules/cannops/perf/perf_core.cpp
+++ b/modules/cannops/perf/perf_core.cpp
@@ -11,6 +11,7 @@ namespace
 {
 #define TYPICAL_ASCEND_MAT_SIZES \
     Values(::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz4320p)
+#define DVPP_ASCEND_MAT_SIZES Values(::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz5MP)
 #define DEF_PARAM_TEST(name, ...) \
     typedef ::perf::TestBaseWithParam<testing::tuple<__VA_ARGS__>> name
 
@@ -157,5 +158,176 @@ PERF_TEST_P(NPU, CROP_OVERLOAD, TYPICAL_ASCEND_MAT_SIZES)
     cv::cann::resetDevice();
     SANITY_CHECK_NOTHING();
 }
+
+PERF_TEST_P(CPU, RESIZE, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_8UC3);
+    Mat dst;
+    declare.in(mat, WARMUP_RNG);
+    Size dsize = Size(256, 256);
+    TEST_CYCLE_N(10) { cv::resize(mat, dst, dsize, 0, 0, 1); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, RESIZE, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_32FC3);
+    AscendMat dst;
+    AscendMat src;
+    src.upload(mat);
+    declare.in(mat, WARMUP_RNG);
+    Size dsize = Size(256, 256);
+    TEST_CYCLE_N(10) { cv::cann::resize(src, dst, dsize, 0, 0, 3); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, THRESHOLD, TYPICAL_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_32FC3);
+    AscendMat dst;
+    AscendMat src;
+    src.upload(mat);
+    declare.in(mat, WARMUP_RNG);
+    TEST_CYCLE_N(10) { cv::cann::threshold(src, dst, 100.0, 255.0, cv::THRESH_BINARY); }
+    SANITY_CHECK_NOTHING();
+}
+PERF_TEST_P(CPU, THRESHOLD, TYPICAL_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_32FC3);
+    Mat dst;
+    declare.in(mat, WARMUP_RNG);
+    TEST_CYCLE_N(10) { cv::threshold(mat, dst, 100.0, 255.0, cv::THRESH_BINARY); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, RESIZE_INTER_NEAREST, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_8UC3);
+    Mat dst;
+    declare.in(mat, WARMUP_RNG);
+    Size dsize = Size(256, 256);
+    TEST_CYCLE_N(10) { cv::cann::resize(mat, dst, dsize, 0, 0, 0); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, COPY_MAKE_BORDER, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat resized_cv, checker, cpuOpRet, cpuMat(GET_PARAM(0), CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    int top, bottom, left, right;
+    top = (int)(20);
+    bottom = top;
+    left = (int)(20);
+    right = left;
+    int borderType = 1;
+    float scalarV[3] = {0, 0, 255};
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2]};
+
+    TEST_CYCLE_N(10)
+    {
+        cv::cann::copyMakeBorder(cpuMat, checker, top, bottom, left, right, borderType, value);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+PERF_TEST_P(CPU, COPY_MAKE_BORDER, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat resized_cv, checker, cpuOpRet, cpuMat(GET_PARAM(0), CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    int top, bottom, left, right;
+    top = (int)(20);
+    bottom = top;
+    left = (int)(20);
+    right = left;
+    int borderType = 1;
+    float scalarV[3] = {0, 0, 255};
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2]};
+
+    TEST_CYCLE_N(10)
+    {
+        cv::copyMakeBorder(cpuMat, checker, top, bottom, left, right, borderType, value);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, CROP_RESIZE_MAKE_BORDER, DVPP_ASCEND_MAT_SIZES)
+{
+    Size size = GET_PARAM(0);
+    Mat resized_cv, checker, cpuOpRet, cpuMat(size, CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+
+    const Rect b(1, 0, size.width / 2, size.height);
+    Size dsize = Size(size.width / 4, size.height / 2);
+    int top, left;
+    top = (int)(20);
+    left = (int)(20);
+    int borderType = 0;
+    float scalarV[3] = {1, 1, 1};
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2]};
+
+    TEST_CYCLE_N(10)
+    {
+        cv::cann::cropResizeMakeBorder(cpuMat, checker, b, dsize, 0, 0, 1, top, left, borderType,
+                                       value);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(CPU, CROP_RESIZE_MAKE_BORDER, DVPP_ASCEND_MAT_SIZES)
+{
+    Size size = GET_PARAM(0);
+    Mat resized_cv, checker, cpuOpRet, cpuMat(size, CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    const Rect b(1, 0, size.width / 2, size.height);
+    Size dsize = Size(size.width / 4, size.height / 2);
+    int top, bottom, left, right;
+    top = (int)(20);
+    bottom = 0;
+    left = (int)(20);
+    right = 0;
+    int borderType = 0;
+    float scalarV[3] = {1, 1, 1};
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2]};
+
+    TEST_CYCLE_N(10)
+    {
+        Mat cropped_cv(cpuMat, b);
+        cv::resize(cropped_cv, resized_cv, dsize, 0, 0, 1);
+        cv::copyMakeBorder(resized_cv, cpuOpRet, top, bottom, left, right, borderType, value);
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, CROP_RESIZE, DVPP_ASCEND_MAT_SIZES)
+{
+    Size size = GET_PARAM(0);
+    Mat resized_cv, checker, cpuOpRet, cpuMat(size, CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    const Rect b(1, 0, size.width / 2, size.height);
+    Size dsize = Size(size.width / 4, size.height / 2);
+
+    TEST_CYCLE_N(10) { cv::cann::cropResize(cpuMat, checker, b, dsize, 0, 0, 1); }
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(CPU, CROP_RESIZE, DVPP_ASCEND_MAT_SIZES)
+{
+    Size size = GET_PARAM(0);
+    Mat resized_cv, checker, cpuOpRet, cpuMat(size, CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    const Rect b(1, 0, size.width / 2, size.height);
+    Size dsize = Size(size.width / 4, size.height / 2);
+
+    TEST_CYCLE_N(10)
+    {
+        Mat cropped_cv(cpuMat, b);
+        cv::resize(cropped_cv, resized_cv, dsize, 0, 0, 1);
+    }
+    SANITY_CHECK_NOTHING();
+}
+
 } // namespace
 } // namespace opencv_test
diff --git a/modules/cannops/perf/perf_main.cpp b/modules/cannops/perf/perf_main.cpp
index 33503ac4158..9e03d48904f 100644
--- a/modules/cannops/perf/perf_main.cpp
+++ b/modules/cannops/perf/perf_main.cpp
@@ -10,8 +10,18 @@ class CannEnvironment : public ::testing::Environment
 {
 public:
     virtual ~CannEnvironment() = default;
-    virtual void SetUp() CV_OVERRIDE { cv::cann::initAcl(); }
-    virtual void TearDown() CV_OVERRIDE { cv::cann::finalizeAcl(); }
+    virtual void SetUp() CV_OVERRIDE
+    {
+        initAcl();
+        cv::cann::setDevice(DEVICE_ID);
+        initDvpp();
+    }
+    virtual void TearDown() CV_OVERRIDE
+    {
+        finalizeAcl();
+        cv::cann::resetDevice();
+        finalizeDvpp();
+    }
 };
 
 static void initTests()
diff --git a/modules/cannops/src/core.cpp b/modules/cannops/src/core.cpp
index 7d328915ef9..027cd119f10 100644
--- a/modules/cannops/src/core.cpp
+++ b/modules/cannops/src/core.cpp
@@ -241,6 +241,56 @@ AscendMat crop(InputArray _src, const Rect& rect, AscendStream& stream)
     return crop(src, rect, stream);
 }
 
+/************************** resize **************************/
+void checkResize(Size& ssize, Size& dsize, double inv_scale_x, double inv_scale_y,
+                 int& interpolation)
+{
+    CV_Assert(!ssize.empty());
+    float_t scaleX = (float_t)inv_scale_x;
+    float_t scaleY = (float_t)inv_scale_y;
+    // interpolation: resize mode, support bilinear/nearest neighbor/bicubic/pixel area relation.
+    CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST ||
+              interpolation == INTER_CUBIC || interpolation == INTER_AREA);
+    switch (interpolation)
+    {
+        case INTER_LINEAR:
+            interpolation = INTER_NEAREST;
+            break;
+        case INTER_NEAREST:
+            interpolation = INTER_LINEAR;
+            break;
+        default:
+            break;
+    }
+
+    if (dsize.empty())
+    {
+        CV_Assert(scaleX > 0);
+        CV_Assert(scaleY > 0);
+        dsize = Size(saturate_cast<int>(ssize.width * inv_scale_x),
+                     saturate_cast<int>(ssize.height * inv_scale_y));
+        CV_Assert(!dsize.empty());
+    }
+    else
+    {
+        scaleX = (float_t)dsize.width / ssize.width;
+        scaleY = (float_t)dsize.height / ssize.height;
+        CV_Assert(scaleX > 0);
+        CV_Assert(scaleY > 0);
+    }
+}
+
+template <typename inMat, typename outMat>
+void resize(const inMat& src, outMat& dst, int interpolation)
+{
+    DvppOperatorDesc op;
+    op.addInput(src).addOutput(dst);
+    uint32_t taskID = 0;
+    vpcResizeWarpper(op.chnId, op.inputDesc_[0].Pic, op.outputDesc_[0].Pic, interpolation, &taskID);
+
+    uint32_t taskIDResult = taskID;
+    op.getResult(dst, taskIDResult);
+}
 void resize(const AscendMat& src, AscendMat& dst, int32_t* dstSize, int interpolation,
             AscendStream& stream)
 {
@@ -258,7 +308,6 @@ void resize(const AscendMat& src, AscendMat& dst, int32_t* dstSize, int interpol
         default:
             break;
     }
-
     runner.setOp(mode)
         .addInput(src, "images")
         .addInput<int32_t>(dstSize, dims, 1, ACL_INT32, "size")
@@ -271,30 +320,18 @@ void resize(const AscendMat& src, AscendMat& dst, Size dsize, double inv_scale_x
             double inv_scale_y, int interpolation, AscendStream& stream)
 {
     Size ssize = src.size();
-    CV_Assert(!ssize.empty());
-    float_t scaleX = (float_t)inv_scale_x;
-    float_t scaleY = (float_t)inv_scale_y;
-    CV_Assert(interpolation == INTER_CUBIC || interpolation == INTER_AREA);
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+    int32_t dstSize[] = {dsize.height, dsize.width};
+    dst.create(dstSize[0], dstSize[1], src.type());
 
-    if (dsize.empty())
+    if (interpolation == INTER_CUBIC || interpolation == INTER_AREA)
     {
-        CV_Assert(scaleX > 0);
-        CV_Assert(scaleY > 0);
-        dsize = Size(saturate_cast<int>(ssize.width * inv_scale_x),
-                     saturate_cast<int>(ssize.height * inv_scale_y));
-        CV_Assert(!dsize.empty());
+        resize(src, dst, dstSize, interpolation, stream);
     }
     else
     {
-        scaleX = (float_t)dsize.width / ssize.width;
-        scaleY = (float_t)dsize.height / ssize.height;
-        CV_Assert(scaleX > 0);
-        CV_Assert(scaleY > 0);
+        resize(src, dst, interpolation);
     }
-
-    int32_t dstSize[] = {dsize.width, dsize.height};
-    dst.create(dstSize[0], dstSize[1], src.type());
-    resize(src, dst, dstSize, interpolation, stream);
 }
 
 void resize(InputArray _src, OutputArray _dst, Size dsize, double inv_scale_x, double inv_scale_y,
@@ -302,8 +339,138 @@ void resize(InputArray _src, OutputArray _dst, Size dsize, double inv_scale_x, d
 {
     AscendMat src, dst;
     src.upload(_src, stream);
-    resize(src, dst, dsize, inv_scale_x, inv_scale_y, interpolation, stream);
-    dst.download(_dst, stream);
+    if (interpolation == INTER_CUBIC || interpolation == INTER_AREA)
+    {
+        resize(src, dst, dsize, inv_scale_x, inv_scale_y, interpolation, stream);
+        dst.download(_dst, stream);
+    }
+    else
+    {
+        Mat srcCV = _src.getMat();
+        Size ssize = srcCV.size();
+        checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+        _dst.create(dsize, srcCV.type());
+        Mat dstCV = _dst.getMat();
+        resize(srcCV, dstCV, interpolation);
+    }
+}
+
+/************************** CropResize **************************/
+template <typename inMat, typename outMat>
+void cropResize(const inMat& src, outMat& dst, const Rect& rect, Size dsize, int interpolation)
+{
+    DvppOperatorDesc op;
+    op.addInput(src).addOutput(dst);
+    uint32_t taskID = 0;
+    int cnt = 1;
+
+    vpcCropResizeWarpper(op.chnId, op.inputDesc_[0].Pic, op.outputDesc_[0].Pic, cnt, &taskID, rect,
+                         dsize, interpolation);
+
+    uint32_t taskIDResult = taskID;
+    op.getResult(dst, taskIDResult);
+}
+
+void cropResize(const AscendMat& src, AscendMat& dst, const Rect& rect, Size dsize,
+                double inv_scale_x, double inv_scale_y, int interpolation)
+{
+    Size ssize = src.size();
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+    dst.create(dsize.height, dsize.width, src.type());
+    cropResize(src, dst, rect, dsize, interpolation);
+}
+
+void cropResize(const InputArray _src, OutputArray _dst, const Rect& rect, Size dsize,
+                double inv_scale_x, double inv_scale_y, int interpolation)
+{
+    Size ssize = _src.size();
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+
+    Mat src = _src.getMat();
+    _dst.create(dsize.height, dsize.width, src.type());
+    Mat dst = _dst.getMat();
+
+    cropResize(src, dst, rect, dsize, interpolation);
+}
+
+/************************** CopyMakeBorder **************************/
+template <typename inMat, typename outMat>
+void copyMakeBorder(const inMat& src, outMat& dst, int* offsets, int borderType,
+                    const Scalar& value)
+{
+    DvppOperatorDesc op;
+    op.addInput(src).addOutput(dst);
+    uint32_t taskID = 0;
+    vpcCopyMakeBorderWarpper(op.chnId, op.inputDesc_[0].Pic, op.outputDesc_[0].Pic, &taskID,
+                             offsets, borderType, value);
+
+    uint32_t taskIDResult = taskID;
+    op.getResult(dst, taskIDResult);
+}
+
+void copyMakeBorder(const AscendMat& src, AscendMat& dst, int top, int bottom, int left, int right,
+                    int borderType, const Scalar& value)
+{
+    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
+    int offsets[] = {top, bottom, left, right};
+    copyMakeBorder(src, dst, offsets, borderType, value);
+}
+
+void copyMakeBorder(const InputArray _src, OutputArray _dst, int top, int bottom, int left,
+                    int right, int borderType, const Scalar& value)
+{
+    CV_Assert(borderType < 2);
+    Mat src = _src.getMat();
+    _dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
+    Mat dst = _dst.getMat();
+    int offsets[] = {top, bottom, left, right};
+
+    copyMakeBorder(src, dst, offsets, borderType, value);
+}
+
+/************************** CropResizeMakeBorder **************************/
+
+template <typename inMat, typename outMat>
+void cropResizeMakeBorder(const inMat& src, outMat& dst, const Rect& rect, Size dsize,
+                          int interpolation, int top, int left, const int borderType,
+                          Scalar scalarV)
+{
+    DvppOperatorDesc op;
+    op.addInput(src).addOutput(dst);
+    uint32_t taskID = 0;
+    int cnt = 1;
+    vpcCropResizeMakeBorderWarpper(op.chnId, op.inputDesc_, op.outputDesc_, cnt, &taskID, rect,
+                                   dsize, interpolation, borderType, scalarV, top, left);
+
+    uint32_t taskIDResult = taskID;
+    op.getResult(dst, taskIDResult);
+}
+
+void cropResizeMakeBorder(const AscendMat& src, AscendMat& dst, const Rect& rect, Size dsize,
+                          double inv_scale_x, double inv_scale_y, int interpolation, int top,
+                          int left, const int borderType, Scalar scalarV)
+{
+    CV_Assert(borderType < 2);
+    Size ssize = src.size();
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+    dst.create(dsize.height + top, dsize.width + left, src.type());
+
+    cropResizeMakeBorder(src, dst, rect, dsize, interpolation, top, left, borderType, scalarV);
+}
+
+void cropResizeMakeBorder(const InputArray _src, OutputArray _dst, const Rect& rect, Size dsize,
+                          double inv_scale_x, double inv_scale_y, int interpolation, int top,
+                          int left, const int borderType, Scalar scalarV)
+{
+    CV_Assert(borderType < 2);
+    Size ssize = _src.size();
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+
+    Mat src = _src.getMat();
+    _dst.create(dsize.height + top, dsize.width + left, src.type());
+    Mat dst = _dst.getMat();
+
+    cropResizeMakeBorder(src, dst, rect, dsize, interpolation, top, left, borderType, scalarV);
 }
 
 } // namespace cann
diff --git a/modules/cannops/src/dvpp_call.cpp b/modules/cannops/src/dvpp_call.cpp
new file mode 100644
index 00000000000..f81604dc258
--- /dev/null
+++ b/modules/cannops/src/dvpp_call.cpp
@@ -0,0 +1,310 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include <acl/acl.h>
+#include <acl/dvpp/hi_dvpp.h>
+#include "opencv2/dvpp_call.hpp"
+#include <iostream>
+#include <memory>
+#include <cstdarg>
+#include <string>
+
+#define unlikely(expr) __builtin_expect(!!(expr), 0)
+#define likely(expr) __builtin_expect(!!(expr), 1)
+
+namespace cv
+{
+namespace cann
+{
+
+/******************************AscendPicDesc****************************/
+AscendPicDesc& AscendPicDesc::setMemAlign()
+{
+    if (Pic.picture_format == HI_PIXEL_FORMAT_BGR_888 ||
+        Pic.picture_format == HI_PIXEL_FORMAT_RGB_888 ||
+        Pic.picture_format == HI_PIXEL_FORMAT_YUV_PACKED_444)
+    {
+        widthAlignment = 16;
+        heightAlignment = 1;
+        sizeAlignment = 3;
+        sizeNum = 3;
+    }
+    else if (Pic.picture_format == HI_PIXEL_FORMAT_YUV_400)
+    {
+        widthAlignment = 16;
+        heightAlignment = 1;
+        sizeAlignment = 1;
+        sizeNum = 1;
+    }
+    else if (Pic.picture_format == HI_PIXEL_FORMAT_ARGB_8888 ||
+             Pic.picture_format == HI_PIXEL_FORMAT_ABGR_8888 ||
+             Pic.picture_format == HI_PIXEL_FORMAT_RGBA_8888 ||
+             Pic.picture_format == HI_PIXEL_FORMAT_BGRA_8888)
+    {
+        widthAlignment = 16;
+        heightAlignment = 1;
+        sizeAlignment = 4;
+        sizeNum = 4;
+    }
+    return *this;
+}
+
+AscendPicDesc& AscendPicDesc::setPic(hi_pixel_format _picture_format)
+{
+    // set input
+    Pic.picture_format = _picture_format;
+    setMemAlign();
+    Pic.picture_width_stride = ALIGN_UP(Pic.picture_width, widthAlignment) * sizeAlignment;
+    Pic.picture_height_stride = ALIGN_UP(Pic.picture_height, heightAlignment);
+    Pic.picture_buffer_size =
+        Pic.picture_width_stride * Pic.picture_height_stride * sizeAlignment / sizeNum;
+    return *this;
+}
+
+std::shared_ptr<hi_void> AscendPicDesc::allocate()
+{
+    Pic.picture_address = nullptr;
+    uint32_t ret = hi_mpi_dvpp_malloc(0, &Pic.picture_address, Pic.picture_buffer_size);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to malloc mem on dvpp");
+
+    return std::shared_ptr<hi_void>(Pic.picture_address, [](void* ptr) { hi_mpi_dvpp_free(ptr); });
+}
+
+AscendPicDesc::AscendPicDesc(const AscendMat& ascendMat, hi_pixel_format _picture_format)
+{
+    Pic.picture_width = ascendMat.cols;
+    Pic.picture_height = ascendMat.rows;
+    setPic(_picture_format);
+    data = allocate();
+}
+
+AscendPicDesc::AscendPicDesc(const Mat& mat, hi_pixel_format _picture_format)
+{
+    Pic.picture_width = mat.cols;
+    Pic.picture_height = mat.rows;
+    setPic(_picture_format);
+    data = allocate();
+}
+
+/******************************hi_mpi_vpc warppers****************************/
+void vpcCropResizeWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                          int cnt, uint32_t* taskID, const Rect& rect, Size dsize,
+                          int interpolation)
+{
+    hi_vpc_crop_region cropRegion = {.top_offset = static_cast<hi_u32>(rect.y),
+                                     .left_offset = static_cast<hi_u32>(rect.x),
+                                     .crop_width = static_cast<hi_u32>(rect.width),
+                                     .crop_height = static_cast<hi_u32>(rect.height)};
+
+    hi_vpc_resize_info resize_info = {.resize_width = static_cast<hi_u32>(dsize.width),
+                                      .resize_height = static_cast<hi_u32>(dsize.height),
+                                      .interpolation = static_cast<hi_u32>(interpolation)};
+    hi_vpc_crop_resize_region crop_resize_info[1];
+    crop_resize_info[0].dest_pic_info = outPic;
+    crop_resize_info[0].crop_region = cropRegion;
+    crop_resize_info[0].resize_info = resize_info;
+    uint32_t ret = hi_mpi_vpc_crop_resize(chnId, (const hi_vpc_pic_info*)&inPic, crop_resize_info,
+                                          cnt, taskID, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to crop and resize image");
+}
+
+void vpcCopyMakeBorderWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                              uint32_t* taskID, int* offsets, int bordertype, Scalar value)
+{
+    hi_vpc_make_border_info make_border_info;
+    make_border_info = {.top = static_cast<hi_u32>(offsets[0]),
+                        .bottom = static_cast<hi_u32>(offsets[1]),
+                        .left = static_cast<hi_u32>(offsets[2]),
+                        .right = static_cast<hi_u32>(offsets[3]),
+                        .border_type = saturate_cast<hi_vpc_bord_type>(bordertype)};
+    if (outPic.picture_format == HI_PIXEL_FORMAT_BGR_888)
+    {
+        make_border_info.scalar_value.val[0] = value[2];
+        make_border_info.scalar_value.val[1] = value[1];
+        make_border_info.scalar_value.val[2] = value[0];
+    }
+    else if (outPic.picture_format == HI_PIXEL_FORMAT_YUV_400)
+    {
+        make_border_info.scalar_value.val[0] = value[0];
+        make_border_info.scalar_value.val[1] = value[1];
+        make_border_info.scalar_value.val[2] = value[2];
+    }
+    make_border_info.scalar_value.val[3] = value[3];
+    uint32_t ret = hi_mpi_vpc_copy_make_border(chnId, (const hi_vpc_pic_info*)&inPic, &outPic,
+                                               make_border_info, taskID, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to crop and resize image");
+}
+
+void setBatchCropResizeMakeBorder(std::vector<AscendPicDesc>& outPicDesc,
+                                  hi_vpc_crop_resize_border_region crop_resize_make_border_info[],
+                                  const Rect& rect, Size dsize, int interpolation,
+                                  const int borderType, Scalar scalarV, int top, int left,
+                                  int batchSize)
+{
+    hi_vpc_crop_region cropRegion = {.top_offset = static_cast<hi_u32>(rect.y),
+                                     .left_offset = static_cast<hi_u32>(rect.x),
+                                     .crop_width = static_cast<hi_u32>(rect.width),
+                                     .crop_height = static_cast<hi_u32>(rect.height)};
+
+    hi_vpc_resize_info resize_info = {.resize_width = static_cast<hi_u32>(dsize.width),
+                                      .resize_height = static_cast<hi_u32>(dsize.height),
+                                      .interpolation = static_cast<hi_u32>(interpolation)};
+    for (int i = 0; i < batchSize; i++)
+    {
+        crop_resize_make_border_info[i].dest_pic_info = outPicDesc[i].Pic;
+        crop_resize_make_border_info[i].crop_region = cropRegion;
+        crop_resize_make_border_info[i].resize_info = resize_info;
+        crop_resize_make_border_info[i].dest_top_offset = top;
+        crop_resize_make_border_info[i].dest_left_offset = left;
+        crop_resize_make_border_info[i].border_type = static_cast<hi_vpc_bord_type>(borderType);
+        if (crop_resize_make_border_info[i].dest_pic_info.picture_format == HI_PIXEL_FORMAT_BGR_888)
+        {
+            crop_resize_make_border_info[i].scalar_value.val[0] = scalarV[2];
+            crop_resize_make_border_info[i].scalar_value.val[1] = scalarV[1];
+            crop_resize_make_border_info[i].scalar_value.val[2] = scalarV[0];
+        }
+        else if (crop_resize_make_border_info[i].dest_pic_info.picture_format ==
+                 HI_PIXEL_FORMAT_YUV_400)
+        {
+            crop_resize_make_border_info[i].scalar_value.val[0] = scalarV[0];
+            crop_resize_make_border_info[i].scalar_value.val[1] = scalarV[1];
+            crop_resize_make_border_info[i].scalar_value.val[2] = scalarV[2];
+        }
+        crop_resize_make_border_info[i].scalar_value.val[3] = scalarV[3];
+    }
+}
+
+void vpcCropResizeMakeBorderWarpper(hi_vpc_chn chnId, std::vector<AscendPicDesc>& inPicDesc,
+                                    std::vector<AscendPicDesc>& outPicDesc, int cnt,
+                                    uint32_t* taskID, const Rect& rect, Size dsize,
+                                    int interpolation, const int borderType, Scalar scalarV,
+                                    int top, int left)
+{
+    hi_vpc_crop_resize_border_region crop_resize_make_border_info[1];
+
+    setBatchCropResizeMakeBorder(outPicDesc, crop_resize_make_border_info, rect, dsize,
+                                 interpolation, borderType, scalarV, top, left, 1);
+    uint32_t ret =
+        hi_mpi_vpc_crop_resize_make_border(chnId, (const hi_vpc_pic_info*)&inPicDesc[0].Pic,
+                                           crop_resize_make_border_info, cnt, taskID, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to crop, resize and make border of image");
+}
+
+/******************************DvppOperatorDesc****************************/
+DvppOperatorDesc& DvppOperatorDesc::reset()
+{
+    uint32_t ret = hi_mpi_vpc_destroy_chn(chnId);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to destory DVPP vpc channel");
+    inputDesc_.clear();
+    outputDesc_.clear();
+    holder.clear();
+    return *this;
+}
+void initDvpp() { hi_mpi_sys_init(); }
+
+void finalizeDvpp() { hi_mpi_sys_exit(); }
+
+DvppOperatorDesc& DvppOperatorDesc::createChannel()
+{
+    uint32_t ret = hi_mpi_vpc_sys_create_chn(&chnId, &stChnAttr);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to create DVPP vpc channel");
+    return *this;
+}
+
+// copy input array to dvpp memory
+DvppOperatorDesc& DvppOperatorDesc::addInput(AscendPicDesc& picDesc)
+{
+    inputDesc_.push_back(picDesc);
+    holder.insert(picDesc.data);
+    return *this;
+}
+
+template <typename inMat>
+hi_pixel_format setPixelFormat(const inMat& mat)
+{
+    CV_Assert(mat.channels() == 3 || mat.channels() == 1);
+    hi_pixel_format _picture_format;
+    if (mat.channels() == 3)
+    {
+        _picture_format = HI_PIXEL_FORMAT_BGR_888;
+    }
+    else if (mat.channels() == 1)
+    {
+        _picture_format = HI_PIXEL_FORMAT_YUV_400;
+    }
+    return _picture_format;
+}
+
+DvppOperatorDesc& DvppOperatorDesc::addInput(const AscendMat& mat)
+{
+    Mat matHost;
+    mat.download(matHost);
+    return addInput(matHost);
+}
+
+DvppOperatorDesc& DvppOperatorDesc::addInput(const Mat& mat)
+{
+    hi_pixel_format _picture_format = setPixelFormat(mat);
+
+    AscendPicDesc picDesc(mat, _picture_format);
+    aclrtMemcpy2d(picDesc.Pic.picture_address, picDesc.Pic.picture_width_stride, mat.data,
+                  mat.step[0], mat.step[0], picDesc.Pic.picture_height, ACL_MEMCPY_HOST_TO_DEVICE);
+
+    return addInput(picDesc);
+}
+
+// malloc memory for output
+DvppOperatorDesc& DvppOperatorDesc::addOutput(AscendPicDesc& picDesc)
+{
+    outputDesc_.push_back(picDesc);
+    holder.insert(picDesc.data);
+    return *this;
+}
+
+DvppOperatorDesc& DvppOperatorDesc::addOutput(AscendMat& mat)
+{
+    hi_pixel_format _picture_format = setPixelFormat(mat);
+    AscendPicDesc picDesc(mat, _picture_format);
+    return addOutput(picDesc);
+}
+
+DvppOperatorDesc& DvppOperatorDesc::addOutput(Mat& mat)
+{
+    hi_pixel_format _picture_format = setPixelFormat(mat);
+    AscendPicDesc picDesc(mat, _picture_format);
+    return addOutput(picDesc);
+}
+
+// get process result and copy it to host/device
+DvppOperatorDesc& DvppOperatorDesc::getResult(Mat& dst, uint32_t& taskIDResult)
+{
+    uint32_t ret = hi_mpi_vpc_get_process_result(chnId, taskIDResult, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to get process result.");
+    const uint32_t esz = CV_ELEM_SIZE(dst.type());
+    size_t step = esz * dst.cols;
+
+    aclrtMemcpy2d(dst.data, dst.step[0], outputDesc_[0].Pic.picture_address,
+                  outputDesc_[0].Pic.picture_width_stride, dst.step[0],
+                  outputDesc_[0].Pic.picture_height, ACL_MEMCPY_DEVICE_TO_HOST);
+    return *this;
+}
+
+DvppOperatorDesc& DvppOperatorDesc::getResult(AscendMat& dst, uint32_t& taskIDResult)
+{
+    Mat matHost;
+    matHost.create(dst.rows, dst.cols, dst.type());
+    getResult(matHost, taskIDResult);
+    dst.upload(matHost);
+    return *this;
+}
+
+} // namespace cann
+} // namespace cv
diff --git a/modules/cannops/src/precomp.hpp b/modules/cannops/src/precomp.hpp
index 8aadaf4d8de..fe81c8a42cb 100644
--- a/modules/cannops/src/precomp.hpp
+++ b/modules/cannops/src/precomp.hpp
@@ -10,6 +10,7 @@
 #include "opencv2/cann_call.hpp"
 #include "opencv2/cann_interface.hpp"
 #include "opencv2/cann_private.hpp"
+#include "opencv2/dvpp_call.hpp"
 #include "opencv2/ascendc_kernels.hpp"
 #define ALIGN_UP(num, align) (((num) + (align) - 1) & ~((align) - 1))
 
diff --git a/modules/cannops/test/test_core.cpp b/modules/cannops/test/test_core.cpp
index 6b63a8cf061..98d554335aa 100644
--- a/modules/cannops/test/test_core.cpp
+++ b/modules/cannops/test/test_core.cpp
@@ -212,6 +212,128 @@ TEST(CORE, RESIZE)
     cv::cann::resetDevice();
 }
 
+TEST(CORE, RESIZE_NEW)
+{
+    Mat resized_cv, checker;
+    Mat cpuMat = randomMat(1280, 1706, CV_8UC3, 100.0, 255.0);
+    Size dsize = Size(768, 832);
+    // add support for {0 INTER_NEAREST} and {1 INTER_LINEAR}
+    // only the resize result of INTER_LINEAR is close to CV's.
+    int interpolation = 1;
+    cv::resize(cpuMat, resized_cv, dsize, 0, 0, interpolation);
+    cv::cann::resize(cpuMat, checker, dsize, 0, 0, interpolation);
+    EXPECT_MAT_NEAR(resized_cv, checker, 1);
+
+    cv::resize(cpuMat, resized_cv, Size(), 0.5, 0.5, interpolation);
+    cv::cann::resize(cpuMat, checker, Size(), 0.5, 0.5, interpolation);
+    EXPECT_MAT_NEAR(resized_cv, checker, 1);
+
+    AscendMat npuMat, npuChecker;
+    npuMat.upload(cpuMat);
+    cv::resize(cpuMat, resized_cv, dsize, 0, 0, interpolation);
+    cv::cann::resize(npuMat, npuChecker, dsize, 0, 0, interpolation);
+    npuChecker.download(checker);
+    EXPECT_MAT_NEAR(resized_cv, checker, 1);
+
+    cv::resize(cpuMat, resized_cv, Size(), 0.5, 0.5, interpolation);
+    cv::cann::resize(npuMat, npuChecker, Size(), 0.5, 0.5, interpolation);
+    npuChecker.download(checker);
+    EXPECT_MAT_NEAR(resized_cv, checker, 1);
+}
+
+TEST(CORE, CROP_RESIZE)
+{
+    Mat cpuMat = randomMat(1280, 1706, CV_8UC1, 100.0, 255.0);
+    Mat resized_cv, checker, cpuOpRet;
+    Size dsize = Size(496, 512);
+    const Rect b(300, 500, 224, 256);
+
+    cv::cann::cropResize(cpuMat, checker, b, dsize, 0, 0, 1);
+    Mat cropped_cv(cpuMat, b);
+    cv::resize(cropped_cv, cpuOpRet, dsize, 0, 0, 1);
+    EXPECT_MAT_NEAR(checker, cpuOpRet, 1);
+
+    AscendMat npuMat, npuChecker;
+    npuMat.upload(cpuMat);
+    cv::cann::cropResize(npuMat, npuChecker, b, dsize, 0, 0, 1);
+    npuChecker.download(checker);
+    EXPECT_MAT_NEAR(cpuOpRet, checker, 1);
+}
+TEST(CORE, CROP_RESIZE_MAKE_BORDER)
+{
+    Mat cpuMat = randomMat(1024, 896, CV_8UC1, 100.0, 255.0);
+
+    Mat resized_cv, checker, cpuOpRet;
+    Size dsize = Size(320, 256);
+    const Rect b(300, 500, 496, 512);
+    RNG rng(12345);
+    float scalarV[3] = {0, 0, 255};
+    int top, bottom, left, right;
+    top = 54;
+    bottom = 0;
+    left = 32;
+    right = 0;
+    int interpolation = 1;
+
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2], 0};
+    for (int borderType = 0; borderType < 2; borderType++)
+    {
+        cv::cann::cropResizeMakeBorder(cpuMat, checker, b, dsize, 0, 0, interpolation, top, left,
+                                       borderType, value);
+        Mat cropped_cv(cpuMat, b);
+        cv::resize(cropped_cv, resized_cv, dsize, 0, 0, interpolation);
+        cv::copyMakeBorder(resized_cv, cpuOpRet, top, bottom, left, right, borderType, value);
+        EXPECT_MAT_NEAR(checker, cpuOpRet, 1e-10);
+    }
+    AscendMat npuMat, npuChecker;
+    npuMat.upload(cpuMat);
+    for (int borderType = 0; borderType < 2; borderType++)
+    {
+        cv::cann::cropResizeMakeBorder(npuMat, npuChecker, b, dsize, 0, 0, interpolation, top, left,
+                                       borderType, value);
+        npuChecker.download(checker);
+        Mat cropped_cv(cpuMat, b);
+        cv::resize(cropped_cv, resized_cv, dsize, 0, 0, interpolation);
+        cv::copyMakeBorder(resized_cv, cpuOpRet, top, bottom, left, right, borderType, value);
+        EXPECT_MAT_NEAR(checker, cpuOpRet, 1e-10);
+    }
+}
+
+TEST(CORE, COPY_MAKE_BORDER)
+{
+    Mat cpuMat = randomMat(1280, 1706, CV_8UC3, 100, 255);
+
+    Mat cpuOpRet, checker;
+    RNG rng(12345);
+    Scalar value = {static_cast<double>(rng.uniform(0, 255)),
+                    static_cast<double>(rng.uniform(0, 255)),
+                    static_cast<double>(rng.uniform(0, 255))};
+    int top, bottom, left, right;
+    top = 20;
+    bottom = 30;
+    left = 30;
+    right = 20;
+
+    int borderType = 0;
+    for (borderType = 0; borderType < 2; borderType++)
+    {
+        cv::cann::copyMakeBorder(cpuMat, checker, top, bottom, left, right, borderType, value);
+
+        cv::copyMakeBorder(cpuMat, cpuOpRet, top, bottom, left, right, borderType, value);
+        EXPECT_MAT_NEAR(checker, cpuOpRet, 1e-10);
+    }
+
+    AscendMat npuMat, npuChecker;
+    npuMat.upload(cpuMat);
+    for (borderType = 0; borderType < 2; borderType++)
+    {
+        cv::cann::copyMakeBorder(npuMat, npuChecker, top, bottom, left, right, borderType, value);
+        npuChecker.download(checker);
+
+        cv::copyMakeBorder(cpuMat, cpuOpRet, top, bottom, left, right, borderType, value);
+        EXPECT_MAT_NEAR(checker, cpuOpRet, 1e-10);
+    }
+}
 
 } // namespace
 } // namespace opencv_test
diff --git a/modules/cannops/test/test_main.cpp b/modules/cannops/test/test_main.cpp
index 202c6af27ee..d14f2a2869e 100644
--- a/modules/cannops/test/test_main.cpp
+++ b/modules/cannops/test/test_main.cpp
@@ -8,8 +8,18 @@ class CannEnvironment : public ::testing::Environment
 {
 public:
     virtual ~CannEnvironment() = default;
-    virtual void SetUp() CV_OVERRIDE { initAcl(); }
-    virtual void TearDown() CV_OVERRIDE { finalizeAcl(); }
+    virtual void SetUp() CV_OVERRIDE
+    {
+        initAcl();
+        cv::cann::setDevice(DEVICE_ID);
+        initDvpp();
+    }
+    virtual void TearDown() CV_OVERRIDE
+    {
+        finalizeAcl();
+        cv::cann::resetDevice();
+        finalizeDvpp();
+    }
 };
 
 static void initTests()
diff --git a/modules/cannops/tutorials/ascend_npu_image_processing.markdown b/modules/cannops/tutorials/ascend_npu_image_processing.markdown
index ed905831d31..80e54b4cc23 100644
--- a/modules/cannops/tutorials/ascend_npu_image_processing.markdown
+++ b/modules/cannops/tutorials/ascend_npu_image_processing.markdown
@@ -108,23 +108,4 @@ Results
 
 4. Upon applying the flip operation with a flip code of 0 (flipping around the x-axis), we achieve the final result:
 
-   ![puppy_processed_normalized](./puppy_processed.jpg)
-
-
-
-## Usage Limitations
-
-While Ascend supports most commonly used operators, there are still some limitations that need to be addressed.
-
-- There is no strict limit on the size of the input image used for encoding; however, it depends on the available RAM size of your device.
-- Please note that not all data types (dtypes) are supported by every operator. The current dtype limitations are outlined in the following table. We are actively working on addressing these limitations through automatic dtype conversion in an upcoming commit.
-
-
-| Operator               | Supported Dtype                                              |
-| ---------------------- | ------------------------------------------------------------ |
-| multiply (with scale)  | float16,float32,int32                                        |
-| divide (with scale)    | float16,float,int32,int8,uint8                               |
-| bitwise add/or/xor/not | int32,int16,uint16                                           |
-| flip                   | float16,float,int64,int32,int16,uint16                       |
-| transpose              | float16,float,int64,int32,int16,int8,uint64,uint32,uint16,uint8,bool |
-| rotate                 | float16,float,int64,int32,int16,uint16                       |
+   ![puppy_processed_normalized](./puppy_processed.jpg)
\ No newline at end of file